![]() |
MAGMA
2.7.1
Matrix Algebra for GPU and Multicore Architectures
|
Functions | |
magma_int_t | magma_cgetrf_batched (magma_int_t m, magma_int_t n, magmaFloatComplex **dA_array, magma_int_t ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue) |
CGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. More... | |
magma_int_t | magma_cgetrf_recpanel_batched (magma_int_t m, magma_int_t n, magma_int_t min_recpnb, magmaFloatComplex **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t **dipiv_array, magma_int_t **dpivinfo_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue) |
This is an internal routine that might have many assumption. More... | |
magma_int_t | magma_cgetrf_recpanel_native (magma_int_t m, magma_int_t n, magmaFloatComplex_ptr dA, magma_int_t ldda, magma_int_t *dipiv, magma_int_t *dipivinfo, magma_int_t *dinfo, magma_int_t gbstep, magma_queue_t queue, magma_queue_t update_queue) |
This is an internal routine. More... | |
magma_int_t | magma_cgetrf_vbatched_max_nocheck_work (magma_int_t *m, magma_int_t *n, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, magmaFloatComplex **dA_array, magma_int_t *ldda, magma_int_t **dipiv_array, magma_int_t *info_array, void *work, magma_int_t *lwork, magma_int_t batchCount, magma_queue_t queue) |
CGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. More... | |
magma_int_t | magma_cgetrf_vbatched (magma_int_t *m, magma_int_t *n, magmaFloatComplex **dA_array, magma_int_t *ldda, magma_int_t **dipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue) |
CGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. More... | |
magma_int_t | magma_dgetrf_batched (magma_int_t m, magma_int_t n, double **dA_array, magma_int_t ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue) |
DGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. More... | |
magma_int_t | magma_dgetrf_recpanel_batched (magma_int_t m, magma_int_t n, magma_int_t min_recpnb, double **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t **dipiv_array, magma_int_t **dpivinfo_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue) |
This is an internal routine that might have many assumption. More... | |
magma_int_t | magma_dgetrf_recpanel_native (magma_int_t m, magma_int_t n, magmaDouble_ptr dA, magma_int_t ldda, magma_int_t *dipiv, magma_int_t *dipivinfo, magma_int_t *dinfo, magma_int_t gbstep, magma_queue_t queue, magma_queue_t update_queue) |
This is an internal routine. More... | |
magma_int_t | magma_dgetrf_vbatched_max_nocheck_work (magma_int_t *m, magma_int_t *n, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, double **dA_array, magma_int_t *ldda, magma_int_t **dipiv_array, magma_int_t *info_array, void *work, magma_int_t *lwork, magma_int_t batchCount, magma_queue_t queue) |
DGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. More... | |
magma_int_t | magma_dgetrf_vbatched (magma_int_t *m, magma_int_t *n, double **dA_array, magma_int_t *ldda, magma_int_t **dipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue) |
DGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. More... | |
magma_int_t | magma_sgetrf_batched (magma_int_t m, magma_int_t n, float **dA_array, magma_int_t ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue) |
SGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. More... | |
magma_int_t | magma_sgetrf_recpanel_batched (magma_int_t m, magma_int_t n, magma_int_t min_recpnb, float **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t **dipiv_array, magma_int_t **dpivinfo_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue) |
This is an internal routine that might have many assumption. More... | |
magma_int_t | magma_sgetrf_recpanel_native (magma_int_t m, magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda, magma_int_t *dipiv, magma_int_t *dipivinfo, magma_int_t *dinfo, magma_int_t gbstep, magma_queue_t queue, magma_queue_t update_queue) |
This is an internal routine. More... | |
magma_int_t | magma_sgetrf_vbatched_max_nocheck_work (magma_int_t *m, magma_int_t *n, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, float **dA_array, magma_int_t *ldda, magma_int_t **dipiv_array, magma_int_t *info_array, void *work, magma_int_t *lwork, magma_int_t batchCount, magma_queue_t queue) |
SGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. More... | |
magma_int_t | magma_sgetrf_vbatched (magma_int_t *m, magma_int_t *n, float **dA_array, magma_int_t *ldda, magma_int_t **dipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue) |
SGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. More... | |
magma_int_t | magma_zgetrf_batched (magma_int_t m, magma_int_t n, magmaDoubleComplex **dA_array, magma_int_t ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue) |
ZGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. More... | |
magma_int_t | magma_zgetrf_recpanel_batched (magma_int_t m, magma_int_t n, magma_int_t min_recpnb, magmaDoubleComplex **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t **dipiv_array, magma_int_t **dpivinfo_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue) |
This is an internal routine that might have many assumption. More... | |
magma_int_t | magma_zgetrf_recpanel_native (magma_int_t m, magma_int_t n, magmaDoubleComplex_ptr dA, magma_int_t ldda, magma_int_t *dipiv, magma_int_t *dipivinfo, magma_int_t *dinfo, magma_int_t gbstep, magma_queue_t queue, magma_queue_t update_queue) |
This is an internal routine. More... | |
magma_int_t | magma_zgetrf_vbatched_max_nocheck_work (magma_int_t *m, magma_int_t *n, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, magmaDoubleComplex **dA_array, magma_int_t *ldda, magma_int_t **dipiv_array, magma_int_t *info_array, void *work, magma_int_t *lwork, magma_int_t batchCount, magma_queue_t queue) |
ZGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. More... | |
magma_int_t | magma_zgetrf_vbatched (magma_int_t *m, magma_int_t *n, magmaDoubleComplex **dA_array, magma_int_t *ldda, magma_int_t **dipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue) |
ZGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. More... | |
magma_int_t | magma_cgbtrf_batched (magma_int_t use_pivoting, magma_int_t m, magma_int_t n, magma_int_t kl, magma_int_t ku, magmaFloatComplex **dA_array, magma_int_t ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue) |
cgbtrf_batched computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges. More... | |
magma_int_t | magma_cgetf2_nopiv_internal_batched (magma_int_t m, magma_int_t n, magmaFloatComplex **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue) |
cgetf2_nopiv computes the non-pivoting LU factorization of an M-by-N matrix A. More... | |
magma_int_t | magma_cgetrf_batched_smallsq_noshfl (magma_int_t n, magmaFloatComplex **dA_array, magma_int_t ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue) |
cgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges. More... | |
magma_int_t | magma_cgetrf_batched_smallsq_shfl (magma_int_t n, magmaFloatComplex **dA_array, magma_int_t ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue) |
cgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges. More... | |
magma_int_t | magma_dgbtrf_batched (magma_int_t use_pivoting, magma_int_t m, magma_int_t n, magma_int_t kl, magma_int_t ku, double **dA_array, magma_int_t ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue) |
dgbtrf_batched computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges. More... | |
magma_int_t | magma_dgetf2_nopiv_internal_batched (magma_int_t m, magma_int_t n, double **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue) |
dgetf2_nopiv computes the non-pivoting LU factorization of an M-by-N matrix A. More... | |
magma_int_t | magma_dgetrf_batched_smallsq_noshfl (magma_int_t n, double **dA_array, magma_int_t ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue) |
dgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges. More... | |
magma_int_t | magma_dgetrf_batched_smallsq_shfl (magma_int_t n, double **dA_array, magma_int_t ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue) |
dgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges. More... | |
magma_int_t | magma_sgbtrf_batched (magma_int_t use_pivoting, magma_int_t m, magma_int_t n, magma_int_t kl, magma_int_t ku, float **dA_array, magma_int_t ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue) |
sgbtrf_batched computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges. More... | |
magma_int_t | magma_sgetf2_nopiv_internal_batched (magma_int_t m, magma_int_t n, float **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue) |
sgetf2_nopiv computes the non-pivoting LU factorization of an M-by-N matrix A. More... | |
magma_int_t | magma_sgetrf_batched_smallsq_noshfl (magma_int_t n, float **dA_array, magma_int_t ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue) |
sgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges. More... | |
magma_int_t | magma_sgetrf_batched_smallsq_shfl (magma_int_t n, float **dA_array, magma_int_t ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue) |
sgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges. More... | |
magma_int_t | magma_zgbtrf_batched (magma_int_t use_pivoting, magma_int_t m, magma_int_t n, magma_int_t kl, magma_int_t ku, magmaDoubleComplex **dA_array, magma_int_t ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue) |
zgbtrf_batched computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges. More... | |
magma_int_t | magma_zgetf2_nopiv_internal_batched (magma_int_t m, magma_int_t n, magmaDoubleComplex **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue) |
zgetf2_nopiv computes the non-pivoting LU factorization of an M-by-N matrix A. More... | |
magma_int_t | magma_zgetrf_batched_smallsq_noshfl (magma_int_t n, magmaDoubleComplex **dA_array, magma_int_t ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue) |
zgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges. More... | |
magma_int_t | magma_zgetrf_batched_smallsq_shfl (magma_int_t n, magmaDoubleComplex **dA_array, magma_int_t ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue) |
zgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges. More... | |
magma_int_t magma_cgetrf_batched | ( | magma_int_t | m, |
magma_int_t | n, | ||
magmaFloatComplex ** | dA_array, | ||
magma_int_t | ldda, | ||
magma_int_t ** | ipiv_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
CGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
[in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
[in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_cgetrf_recpanel_batched | ( | magma_int_t | m, |
magma_int_t | n, | ||
magma_int_t | min_recpnb, | ||
magmaFloatComplex ** | dA_array, | ||
magma_int_t | ai, | ||
magma_int_t | aj, | ||
magma_int_t | ldda, | ||
magma_int_t ** | dipiv_array, | ||
magma_int_t ** | dpivinfo_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | gbstep, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
This is an internal routine that might have many assumption.
Documentation is not fully completed
CGETRF_PANEL computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
[in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
[in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
[in] | min_recpnb | INTEGER. Internal use. The recursive nb |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ai | INTEGER Row offset for A. |
[in] | aj | INTEGER Column offset for A. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | dipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | dpivinfo_array | Array of pointers, dimension (batchCount), for internal use. |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | gbstep | INTEGER internal use. |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_cgetrf_recpanel_native | ( | magma_int_t | m, |
magma_int_t | n, | ||
magmaFloatComplex_ptr | dA, | ||
magma_int_t | ldda, | ||
magma_int_t * | dipiv, | ||
magma_int_t * | dipivinfo, | ||
magma_int_t * | dinfo, | ||
magma_int_t | gbstep, | ||
magma_queue_t | queue, | ||
magma_queue_t | update_queue | ||
) |
This is an internal routine.
CGETRF_PANEL computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a GPU-only routine. The host CPU is not used.
[in] | m | INTEGER The number of rows the matrix A. M >= 0. |
[in] | n | INTEGER The number of columns the matrix A. N >= 0. |
[in,out] | dA | A COMPLEX array on the GPU, dimension (LDDA,N). On entry, an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | INTEGER The leading dimension of A. LDDA >= max(1,M). |
[out] | dipiv | An INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | dipivinfo | An INTEGER array, for internal use. |
[out] | dinfo | INTEGER, stored on the GPU
|
[in] | gbstep | INTEGER internal use. |
[in] | queue | magma_queue_t Queue to execute in. |
[in] | update_queue | magma_queue_t Internal use. |
magma_int_t magma_cgetrf_vbatched_max_nocheck_work | ( | magma_int_t * | m, |
magma_int_t * | n, | ||
magma_int_t | max_m, | ||
magma_int_t | max_n, | ||
magma_int_t | max_minmn, | ||
magma_int_t | max_mxn, | ||
magmaFloatComplex ** | dA_array, | ||
magma_int_t * | ldda, | ||
magma_int_t ** | dipiv_array, | ||
magma_int_t * | info_array, | ||
void * | work, | ||
magma_int_t * | lwork, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
CGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is the variable-size batched version, which factors batchCount matrices of different sizes in parallel. Each matrix is assumed to have its own size and leading dimension.
[in] | M | Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of rows of each matrix A. M[i] >= 0. |
[in] | N | Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of columns of each matrix A. N[i] >= 0. |
[in] | MAX_M | INTEGER The maximum number of rows across the batch |
[in] | MAX_N | INTEGER The maximum number of columns across the batch |
[in] | MAX_MINMN | INTEGER The maximum value of min(Mi, Ni) for i = 1, 2, ..., batchCount |
[in] | MAX_MxN | INTEGER The maximum value of the product (Mi x Ni) for i = 1, 2, ..., batchCount |
[in,out] | dA_array | Array of pointers on the GPU, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA[i],N[i]). On entry, each pointer is an M[i]-by-N[i] matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | Array of INTEGERs on the GPU Each is the leading dimension of each array A. LDDA[i] >= max(1,M[i]). |
[out] | dipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M[i],N[i])) The pivot indices; for 1 <= p <= min(M[i],N[i]), row p of the matrix was interchanged with row IPIV(p). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | WORK | VOID pointer A workspace of size LWORK[0] |
[in,out] | LWORK | INTEGER pointer If lwork[0] < 0, a workspace query is assumed, and lwork[0] is overwritten by the required workspace size in bytes. Otherwise, lwork[0] is the size of work |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_cgetrf_vbatched | ( | magma_int_t * | m, |
magma_int_t * | n, | ||
magmaFloatComplex ** | dA_array, | ||
magma_int_t * | ldda, | ||
magma_int_t ** | dipiv_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
CGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is the variable-size batched version, which factors batchCount matrices of different sizes in parallel. Each matrix is assumed to have its own size and leading dimension.
[in] | M | Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of rows of each matrix A. M[i] >= 0. |
[in] | N | Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of columns of each matrix A. N[i] >= 0. |
[in,out] | dA_array | Array of pointers on the GPU, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA[i],N[i]). On entry, each pointer is an M[i]-by-N[i] matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | Array of INTEGERs on the GPU Each is the leading dimension of each array A. LDDA[i] >= max(1,M[i]). |
[out] | dipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M[i],N[i])) The pivot indices; for 1 <= p <= min(M[i],N[i]), row p of the matrix was interchanged with row IPIV(p). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_dgetrf_batched | ( | magma_int_t | m, |
magma_int_t | n, | ||
double ** | dA_array, | ||
magma_int_t | ldda, | ||
magma_int_t ** | ipiv_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
DGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
[in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
[in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_dgetrf_recpanel_batched | ( | magma_int_t | m, |
magma_int_t | n, | ||
magma_int_t | min_recpnb, | ||
double ** | dA_array, | ||
magma_int_t | ai, | ||
magma_int_t | aj, | ||
magma_int_t | ldda, | ||
magma_int_t ** | dipiv_array, | ||
magma_int_t ** | dpivinfo_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | gbstep, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
This is an internal routine that might have many assumption.
Documentation is not fully completed
DGETRF_PANEL computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
[in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
[in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
[in] | min_recpnb | INTEGER. Internal use. The recursive nb |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ai | INTEGER Row offset for A. |
[in] | aj | INTEGER Column offset for A. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | dipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | dpivinfo_array | Array of pointers, dimension (batchCount), for internal use. |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | gbstep | INTEGER internal use. |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_dgetrf_recpanel_native | ( | magma_int_t | m, |
magma_int_t | n, | ||
magmaDouble_ptr | dA, | ||
magma_int_t | ldda, | ||
magma_int_t * | dipiv, | ||
magma_int_t * | dipivinfo, | ||
magma_int_t * | dinfo, | ||
magma_int_t | gbstep, | ||
magma_queue_t | queue, | ||
magma_queue_t | update_queue | ||
) |
This is an internal routine.
DGETRF_PANEL computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a GPU-only routine. The host CPU is not used.
[in] | m | INTEGER The number of rows the matrix A. M >= 0. |
[in] | n | INTEGER The number of columns the matrix A. N >= 0. |
[in,out] | dA | A DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | INTEGER The leading dimension of A. LDDA >= max(1,M). |
[out] | dipiv | An INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | dipivinfo | An INTEGER array, for internal use. |
[out] | dinfo | INTEGER, stored on the GPU
|
[in] | gbstep | INTEGER internal use. |
[in] | queue | magma_queue_t Queue to execute in. |
[in] | update_queue | magma_queue_t Internal use. |
magma_int_t magma_dgetrf_vbatched_max_nocheck_work | ( | magma_int_t * | m, |
magma_int_t * | n, | ||
magma_int_t | max_m, | ||
magma_int_t | max_n, | ||
magma_int_t | max_minmn, | ||
magma_int_t | max_mxn, | ||
double ** | dA_array, | ||
magma_int_t * | ldda, | ||
magma_int_t ** | dipiv_array, | ||
magma_int_t * | info_array, | ||
void * | work, | ||
magma_int_t * | lwork, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
DGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is the variable-size batched version, which factors batchCount matrices of different sizes in parallel. Each matrix is assumed to have its own size and leading dimension.
[in] | M | Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of rows of each matrix A. M[i] >= 0. |
[in] | N | Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of columns of each matrix A. N[i] >= 0. |
[in] | MAX_M | INTEGER The maximum number of rows across the batch |
[in] | MAX_N | INTEGER The maximum number of columns across the batch |
[in] | MAX_MINMN | INTEGER The maximum value of min(Mi, Ni) for i = 1, 2, ..., batchCount |
[in] | MAX_MxN | INTEGER The maximum value of the product (Mi x Ni) for i = 1, 2, ..., batchCount |
[in,out] | dA_array | Array of pointers on the GPU, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA[i],N[i]). On entry, each pointer is an M[i]-by-N[i] matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | Array of INTEGERs on the GPU Each is the leading dimension of each array A. LDDA[i] >= max(1,M[i]). |
[out] | dipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M[i],N[i])) The pivot indices; for 1 <= p <= min(M[i],N[i]), row p of the matrix was interchanged with row IPIV(p). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | WORK | VOID pointer A workspace of size LWORK[0] |
[in,out] | LWORK | INTEGER pointer If lwork[0] < 0, a workspace query is assumed, and lwork[0] is overwritten by the required workspace size in bytes. Otherwise, lwork[0] is the size of work |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_dgetrf_vbatched | ( | magma_int_t * | m, |
magma_int_t * | n, | ||
double ** | dA_array, | ||
magma_int_t * | ldda, | ||
magma_int_t ** | dipiv_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
DGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is the variable-size batched version, which factors batchCount matrices of different sizes in parallel. Each matrix is assumed to have its own size and leading dimension.
[in] | M | Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of rows of each matrix A. M[i] >= 0. |
[in] | N | Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of columns of each matrix A. N[i] >= 0. |
[in,out] | dA_array | Array of pointers on the GPU, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA[i],N[i]). On entry, each pointer is an M[i]-by-N[i] matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | Array of INTEGERs on the GPU Each is the leading dimension of each array A. LDDA[i] >= max(1,M[i]). |
[out] | dipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M[i],N[i])) The pivot indices; for 1 <= p <= min(M[i],N[i]), row p of the matrix was interchanged with row IPIV(p). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_sgetrf_batched | ( | magma_int_t | m, |
magma_int_t | n, | ||
float ** | dA_array, | ||
magma_int_t | ldda, | ||
magma_int_t ** | ipiv_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
SGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
[in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
[in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_sgetrf_recpanel_batched | ( | magma_int_t | m, |
magma_int_t | n, | ||
magma_int_t | min_recpnb, | ||
float ** | dA_array, | ||
magma_int_t | ai, | ||
magma_int_t | aj, | ||
magma_int_t | ldda, | ||
magma_int_t ** | dipiv_array, | ||
magma_int_t ** | dpivinfo_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | gbstep, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
This is an internal routine that might have many assumption.
Documentation is not fully completed
SGETRF_PANEL computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
[in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
[in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
[in] | min_recpnb | INTEGER. Internal use. The recursive nb |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ai | INTEGER Row offset for A. |
[in] | aj | INTEGER Column offset for A. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | dipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | dpivinfo_array | Array of pointers, dimension (batchCount), for internal use. |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | gbstep | INTEGER internal use. |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_sgetrf_recpanel_native | ( | magma_int_t | m, |
magma_int_t | n, | ||
magmaFloat_ptr | dA, | ||
magma_int_t | ldda, | ||
magma_int_t * | dipiv, | ||
magma_int_t * | dipivinfo, | ||
magma_int_t * | dinfo, | ||
magma_int_t | gbstep, | ||
magma_queue_t | queue, | ||
magma_queue_t | update_queue | ||
) |
This is an internal routine.
SGETRF_PANEL computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a GPU-only routine. The host CPU is not used.
[in] | m | INTEGER The number of rows the matrix A. M >= 0. |
[in] | n | INTEGER The number of columns the matrix A. N >= 0. |
[in,out] | dA | A REAL array on the GPU, dimension (LDDA,N). On entry, an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | INTEGER The leading dimension of A. LDDA >= max(1,M). |
[out] | dipiv | An INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | dipivinfo | An INTEGER array, for internal use. |
[out] | dinfo | INTEGER, stored on the GPU
|
[in] | gbstep | INTEGER internal use. |
[in] | queue | magma_queue_t Queue to execute in. |
[in] | update_queue | magma_queue_t Internal use. |
magma_int_t magma_sgetrf_vbatched_max_nocheck_work | ( | magma_int_t * | m, |
magma_int_t * | n, | ||
magma_int_t | max_m, | ||
magma_int_t | max_n, | ||
magma_int_t | max_minmn, | ||
magma_int_t | max_mxn, | ||
float ** | dA_array, | ||
magma_int_t * | ldda, | ||
magma_int_t ** | dipiv_array, | ||
magma_int_t * | info_array, | ||
void * | work, | ||
magma_int_t * | lwork, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
SGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is the variable-size batched version, which factors batchCount matrices of different sizes in parallel. Each matrix is assumed to have its own size and leading dimension.
[in] | M | Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of rows of each matrix A. M[i] >= 0. |
[in] | N | Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of columns of each matrix A. N[i] >= 0. |
[in] | MAX_M | INTEGER The maximum number of rows across the batch |
[in] | MAX_N | INTEGER The maximum number of columns across the batch |
[in] | MAX_MINMN | INTEGER The maximum value of min(Mi, Ni) for i = 1, 2, ..., batchCount |
[in] | MAX_MxN | INTEGER The maximum value of the product (Mi x Ni) for i = 1, 2, ..., batchCount |
[in,out] | dA_array | Array of pointers on the GPU, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA[i],N[i]). On entry, each pointer is an M[i]-by-N[i] matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | Array of INTEGERs on the GPU Each is the leading dimension of each array A. LDDA[i] >= max(1,M[i]). |
[out] | dipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M[i],N[i])) The pivot indices; for 1 <= p <= min(M[i],N[i]), row p of the matrix was interchanged with row IPIV(p). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | WORK | VOID pointer A workspace of size LWORK[0] |
[in,out] | LWORK | INTEGER pointer If lwork[0] < 0, a workspace query is assumed, and lwork[0] is overwritten by the required workspace size in bytes. Otherwise, lwork[0] is the size of work |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_sgetrf_vbatched | ( | magma_int_t * | m, |
magma_int_t * | n, | ||
float ** | dA_array, | ||
magma_int_t * | ldda, | ||
magma_int_t ** | dipiv_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
SGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is the variable-size batched version, which factors batchCount matrices of different sizes in parallel. Each matrix is assumed to have its own size and leading dimension.
[in] | M | Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of rows of each matrix A. M[i] >= 0. |
[in] | N | Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of columns of each matrix A. N[i] >= 0. |
[in,out] | dA_array | Array of pointers on the GPU, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA[i],N[i]). On entry, each pointer is an M[i]-by-N[i] matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | Array of INTEGERs on the GPU Each is the leading dimension of each array A. LDDA[i] >= max(1,M[i]). |
[out] | dipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M[i],N[i])) The pivot indices; for 1 <= p <= min(M[i],N[i]), row p of the matrix was interchanged with row IPIV(p). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_zgetrf_batched | ( | magma_int_t | m, |
magma_int_t | n, | ||
magmaDoubleComplex ** | dA_array, | ||
magma_int_t | ldda, | ||
magma_int_t ** | ipiv_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
ZGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
[in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
[in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_zgetrf_recpanel_batched | ( | magma_int_t | m, |
magma_int_t | n, | ||
magma_int_t | min_recpnb, | ||
magmaDoubleComplex ** | dA_array, | ||
magma_int_t | ai, | ||
magma_int_t | aj, | ||
magma_int_t | ldda, | ||
magma_int_t ** | dipiv_array, | ||
magma_int_t ** | dpivinfo_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | gbstep, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
This is an internal routine that might have many assumption.
Documentation is not fully completed
ZGETRF_PANEL computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
[in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
[in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
[in] | min_recpnb | INTEGER. Internal use. The recursive nb |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ai | INTEGER Row offset for A. |
[in] | aj | INTEGER Column offset for A. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | dipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | dpivinfo_array | Array of pointers, dimension (batchCount), for internal use. |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | gbstep | INTEGER internal use. |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_zgetrf_recpanel_native | ( | magma_int_t | m, |
magma_int_t | n, | ||
magmaDoubleComplex_ptr | dA, | ||
magma_int_t | ldda, | ||
magma_int_t * | dipiv, | ||
magma_int_t * | dipivinfo, | ||
magma_int_t * | dinfo, | ||
magma_int_t | gbstep, | ||
magma_queue_t | queue, | ||
magma_queue_t | update_queue | ||
) |
This is an internal routine.
ZGETRF_PANEL computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a GPU-only routine. The host CPU is not used.
[in] | m | INTEGER The number of rows the matrix A. M >= 0. |
[in] | n | INTEGER The number of columns the matrix A. N >= 0. |
[in,out] | dA | A COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | INTEGER The leading dimension of A. LDDA >= max(1,M). |
[out] | dipiv | An INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | dipivinfo | An INTEGER array, for internal use. |
[out] | dinfo | INTEGER, stored on the GPU
|
[in] | gbstep | INTEGER internal use. |
[in] | queue | magma_queue_t Queue to execute in. |
[in] | update_queue | magma_queue_t Internal use. |
magma_int_t magma_zgetrf_vbatched_max_nocheck_work | ( | magma_int_t * | m, |
magma_int_t * | n, | ||
magma_int_t | max_m, | ||
magma_int_t | max_n, | ||
magma_int_t | max_minmn, | ||
magma_int_t | max_mxn, | ||
magmaDoubleComplex ** | dA_array, | ||
magma_int_t * | ldda, | ||
magma_int_t ** | dipiv_array, | ||
magma_int_t * | info_array, | ||
void * | work, | ||
magma_int_t * | lwork, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
ZGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is the variable-size batched version, which factors batchCount matrices of different sizes in parallel. Each matrix is assumed to have its own size and leading dimension.
[in] | M | Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of rows of each matrix A. M[i] >= 0. |
[in] | N | Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of columns of each matrix A. N[i] >= 0. |
[in] | MAX_M | INTEGER The maximum number of rows across the batch |
[in] | MAX_N | INTEGER The maximum number of columns across the batch |
[in] | MAX_MINMN | INTEGER The maximum value of min(Mi, Ni) for i = 1, 2, ..., batchCount |
[in] | MAX_MxN | INTEGER The maximum value of the product (Mi x Ni) for i = 1, 2, ..., batchCount |
[in,out] | dA_array | Array of pointers on the GPU, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA[i],N[i]). On entry, each pointer is an M[i]-by-N[i] matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | Array of INTEGERs on the GPU Each is the leading dimension of each array A. LDDA[i] >= max(1,M[i]). |
[out] | dipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M[i],N[i])) The pivot indices; for 1 <= p <= min(M[i],N[i]), row p of the matrix was interchanged with row IPIV(p). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | WORK | VOID pointer A workspace of size LWORK[0] |
[in,out] | LWORK | INTEGER pointer If lwork[0] < 0, a workspace query is assumed, and lwork[0] is overwritten by the required workspace size in bytes. Otherwise, lwork[0] is the size of work |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_zgetrf_vbatched | ( | magma_int_t * | m, |
magma_int_t * | n, | ||
magmaDoubleComplex ** | dA_array, | ||
magma_int_t * | ldda, | ||
magma_int_t ** | dipiv_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
ZGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is the variable-size batched version, which factors batchCount matrices of different sizes in parallel. Each matrix is assumed to have its own size and leading dimension.
[in] | M | Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of rows of each matrix A. M[i] >= 0. |
[in] | N | Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of columns of each matrix A. N[i] >= 0. |
[in,out] | dA_array | Array of pointers on the GPU, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA[i],N[i]). On entry, each pointer is an M[i]-by-N[i] matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | Array of INTEGERs on the GPU Each is the leading dimension of each array A. LDDA[i] >= max(1,M[i]). |
[out] | dipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M[i],N[i])) The pivot indices; for 1 <= p <= min(M[i],N[i]), row p of the matrix was interchanged with row IPIV(p). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_cgbtrf_batched | ( | magma_int_t | use_pivoting, |
magma_int_t | m, | ||
magma_int_t | n, | ||
magma_int_t | kl, | ||
magma_int_t | ku, | ||
magmaFloatComplex ** | dA_array, | ||
magma_int_t | ldda, | ||
magma_int_t ** | ipiv_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
cgbtrf_batched computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges.
This routine can deal only with square matrices of size up to 32
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
[in] | n | INTEGER The size of each matrix A. N >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_cgetf2_nopiv_internal_batched | ( | magma_int_t | m, |
magma_int_t | n, | ||
magmaFloatComplex ** | dA_array, | ||
magma_int_t | ai, | ||
magma_int_t | aj, | ||
magma_int_t | ldda, | ||
magma_int_t * | info_array, | ||
magma_int_t | gbstep, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
cgetf2_nopiv computes the non-pivoting LU factorization of an M-by-N matrix A.
This routine can deal with matrices of limited widths, so it is for internal use.
The factorization has the form A = L * U where L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is a batched version that factors batchCount M-by-N matrices in parallel.
[in] | m | INTEGER The number of rows the matrix A. N >= 0. |
[in] | n | INTEGER The number of columns of the matrix A. N >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = L*U; the unit diagonal elements of L are not stored. |
[in] | ai | INTEGER Row offset for dA_array. |
[in] | aj | INTEGER Column offset for dA_array. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | gbstep | INTEGER Internal use. |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_cgetrf_batched_smallsq_noshfl | ( | magma_int_t | n, |
magmaFloatComplex ** | dA_array, | ||
magma_int_t | ldda, | ||
magma_int_t ** | ipiv_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
cgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges.
This routine can deal only with square matrices of size up to 32
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
[in] | n | INTEGER The size of each matrix A. N >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_cgetrf_batched_smallsq_shfl | ( | magma_int_t | n, |
magmaFloatComplex ** | dA_array, | ||
magma_int_t | ldda, | ||
magma_int_t ** | ipiv_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
cgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges.
This routine can deal only with square matrices of size up to 32
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
[in] | n | INTEGER The size of each matrix A. N >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_dgbtrf_batched | ( | magma_int_t | use_pivoting, |
magma_int_t | m, | ||
magma_int_t | n, | ||
magma_int_t | kl, | ||
magma_int_t | ku, | ||
double ** | dA_array, | ||
magma_int_t | ldda, | ||
magma_int_t ** | ipiv_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
dgbtrf_batched computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges.
This routine can deal only with square matrices of size up to 32
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
[in] | n | INTEGER The size of each matrix A. N >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_dgetf2_nopiv_internal_batched | ( | magma_int_t | m, |
magma_int_t | n, | ||
double ** | dA_array, | ||
magma_int_t | ai, | ||
magma_int_t | aj, | ||
magma_int_t | ldda, | ||
magma_int_t * | info_array, | ||
magma_int_t | gbstep, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
dgetf2_nopiv computes the non-pivoting LU factorization of an M-by-N matrix A.
This routine can deal with matrices of limited widths, so it is for internal use.
The factorization has the form A = L * U where L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is a batched version that factors batchCount M-by-N matrices in parallel.
[in] | m | INTEGER The number of rows the matrix A. N >= 0. |
[in] | n | INTEGER The number of columns of the matrix A. N >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = L*U; the unit diagonal elements of L are not stored. |
[in] | ai | INTEGER Row offset for dA_array. |
[in] | aj | INTEGER Column offset for dA_array. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | gbstep | INTEGER Internal use. |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_dgetrf_batched_smallsq_noshfl | ( | magma_int_t | n, |
double ** | dA_array, | ||
magma_int_t | ldda, | ||
magma_int_t ** | ipiv_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
dgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges.
This routine can deal only with square matrices of size up to 32
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
[in] | n | INTEGER The size of each matrix A. N >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_dgetrf_batched_smallsq_shfl | ( | magma_int_t | n, |
double ** | dA_array, | ||
magma_int_t | ldda, | ||
magma_int_t ** | ipiv_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
dgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges.
This routine can deal only with square matrices of size up to 32
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
[in] | n | INTEGER The size of each matrix A. N >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_sgbtrf_batched | ( | magma_int_t | use_pivoting, |
magma_int_t | m, | ||
magma_int_t | n, | ||
magma_int_t | kl, | ||
magma_int_t | ku, | ||
float ** | dA_array, | ||
magma_int_t | ldda, | ||
magma_int_t ** | ipiv_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
sgbtrf_batched computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges.
This routine can deal only with square matrices of size up to 32
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
[in] | n | INTEGER The size of each matrix A. N >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_sgetf2_nopiv_internal_batched | ( | magma_int_t | m, |
magma_int_t | n, | ||
float ** | dA_array, | ||
magma_int_t | ai, | ||
magma_int_t | aj, | ||
magma_int_t | ldda, | ||
magma_int_t * | info_array, | ||
magma_int_t | gbstep, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
sgetf2_nopiv computes the non-pivoting LU factorization of an M-by-N matrix A.
This routine can deal with matrices of limited widths, so it is for internal use.
The factorization has the form A = L * U where L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is a batched version that factors batchCount M-by-N matrices in parallel.
[in] | m | INTEGER The number of rows the matrix A. N >= 0. |
[in] | n | INTEGER The number of columns of the matrix A. N >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = L*U; the unit diagonal elements of L are not stored. |
[in] | ai | INTEGER Row offset for dA_array. |
[in] | aj | INTEGER Column offset for dA_array. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | gbstep | INTEGER Internal use. |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_sgetrf_batched_smallsq_noshfl | ( | magma_int_t | n, |
float ** | dA_array, | ||
magma_int_t | ldda, | ||
magma_int_t ** | ipiv_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
sgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges.
This routine can deal only with square matrices of size up to 32
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
[in] | n | INTEGER The size of each matrix A. N >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_sgetrf_batched_smallsq_shfl | ( | magma_int_t | n, |
float ** | dA_array, | ||
magma_int_t | ldda, | ||
magma_int_t ** | ipiv_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
sgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges.
This routine can deal only with square matrices of size up to 32
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
[in] | n | INTEGER The size of each matrix A. N >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_zgbtrf_batched | ( | magma_int_t | use_pivoting, |
magma_int_t | m, | ||
magma_int_t | n, | ||
magma_int_t | kl, | ||
magma_int_t | ku, | ||
magmaDoubleComplex ** | dA_array, | ||
magma_int_t | ldda, | ||
magma_int_t ** | ipiv_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
zgbtrf_batched computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges.
This routine can deal only with square matrices of size up to 32
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
[in] | n | INTEGER The size of each matrix A. N >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_zgetf2_nopiv_internal_batched | ( | magma_int_t | m, |
magma_int_t | n, | ||
magmaDoubleComplex ** | dA_array, | ||
magma_int_t | ai, | ||
magma_int_t | aj, | ||
magma_int_t | ldda, | ||
magma_int_t * | info_array, | ||
magma_int_t | gbstep, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
zgetf2_nopiv computes the non-pivoting LU factorization of an M-by-N matrix A.
This routine can deal with matrices of limited widths, so it is for internal use.
The factorization has the form A = L * U where L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is a batched version that factors batchCount M-by-N matrices in parallel.
[in] | m | INTEGER The number of rows the matrix A. N >= 0. |
[in] | n | INTEGER The number of columns of the matrix A. N >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = L*U; the unit diagonal elements of L are not stored. |
[in] | ai | INTEGER Row offset for dA_array. |
[in] | aj | INTEGER Column offset for dA_array. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | gbstep | INTEGER Internal use. |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_zgetrf_batched_smallsq_noshfl | ( | magma_int_t | n, |
magmaDoubleComplex ** | dA_array, | ||
magma_int_t | ldda, | ||
magma_int_t ** | ipiv_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
zgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges.
This routine can deal only with square matrices of size up to 32
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
[in] | n | INTEGER The size of each matrix A. N >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_zgetrf_batched_smallsq_shfl | ( | magma_int_t | n, |
magmaDoubleComplex ** | dA_array, | ||
magma_int_t | ldda, | ||
magma_int_t ** | ipiv_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
zgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges.
This routine can deal only with square matrices of size up to 32
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
[in] | n | INTEGER The size of each matrix A. N >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |