![]() |
MAGMA 2.9.0
Matrix Algebra for GPU and Multicore Architectures
|
Functions | |
static magma_int_t | magma_cgetf2_batched_v1 (magma_int_t m, magma_int_t n, magmaFloatComplex **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue) |
CGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
magma_int_t | magma_cgetf2_native (magma_int_t m, magma_int_t n, magmaFloatComplex_ptr dA, magma_int_t ldda, magma_int_t *dipiv, magma_int_t *dipivinfo, magma_int_t *dinfo, magma_int_t gbstep, magma_event_t events[2], magma_queue_t queue, magma_queue_t update_queue) |
CGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
magma_int_t | magma_cgetf2_vbatched (magma_int_t *m, magma_int_t *n, magma_int_t *minmn, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, magmaFloatComplex **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t *ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue) |
CGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
static magma_int_t | magma_dgetf2_batched_v1 (magma_int_t m, magma_int_t n, double **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue) |
DGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
magma_int_t | magma_dgetf2_native (magma_int_t m, magma_int_t n, magmaDouble_ptr dA, magma_int_t ldda, magma_int_t *dipiv, magma_int_t *dipivinfo, magma_int_t *dinfo, magma_int_t gbstep, magma_event_t events[2], magma_queue_t queue, magma_queue_t update_queue) |
DGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
magma_int_t | magma_dgetf2_vbatched (magma_int_t *m, magma_int_t *n, magma_int_t *minmn, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, double **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t *ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue) |
DGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
static magma_int_t | magma_sgetf2_batched_v1 (magma_int_t m, magma_int_t n, float **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue) |
SGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
magma_int_t | magma_sgetf2_native (magma_int_t m, magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda, magma_int_t *dipiv, magma_int_t *dipivinfo, magma_int_t *dinfo, magma_int_t gbstep, magma_event_t events[2], magma_queue_t queue, magma_queue_t update_queue) |
SGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
magma_int_t | magma_sgetf2_vbatched (magma_int_t *m, magma_int_t *n, magma_int_t *minmn, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, float **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t *ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue) |
SGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
static magma_int_t | magma_zgetf2_batched_v1 (magma_int_t m, magma_int_t n, magmaDoubleComplex **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue) |
ZGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
magma_int_t | magma_zgetf2_native (magma_int_t m, magma_int_t n, magmaDoubleComplex_ptr dA, magma_int_t ldda, magma_int_t *dipiv, magma_int_t *dipivinfo, magma_int_t *dinfo, magma_int_t gbstep, magma_event_t events[2], magma_queue_t queue, magma_queue_t update_queue) |
ZGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
magma_int_t | magma_zgetf2_vbatched (magma_int_t *m, magma_int_t *n, magma_int_t *minmn, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, magmaDoubleComplex **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t *ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue) |
ZGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
void | magma_cgetf2trsm_batched (magma_int_t ib, magma_int_t n, magmaFloatComplex **dA_array, magma_int_t step, magma_int_t ldda, magma_int_t batchCount, magma_queue_t queue) |
cgetf2trsm solves one of the matrix equations on gpu | |
magma_int_t | magma_cgetf2_fused_batched (magma_int_t m, magma_int_t n, magmaFloatComplex **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t **dipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue) |
magma_cgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
void | magma_dgetf2trsm_batched (magma_int_t ib, magma_int_t n, double **dA_array, magma_int_t step, magma_int_t ldda, magma_int_t batchCount, magma_queue_t queue) |
dgetf2trsm solves one of the matrix equations on gpu | |
magma_int_t | magma_dgetf2_fused_batched (magma_int_t m, magma_int_t n, double **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t **dipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue) |
magma_dgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
void | magma_sgetf2trsm_batched (magma_int_t ib, magma_int_t n, float **dA_array, magma_int_t step, magma_int_t ldda, magma_int_t batchCount, magma_queue_t queue) |
sgetf2trsm solves one of the matrix equations on gpu | |
magma_int_t | magma_sgetf2_fused_batched (magma_int_t m, magma_int_t n, float **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t **dipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue) |
magma_sgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
void | magma_zgetf2trsm_batched (magma_int_t ib, magma_int_t n, magmaDoubleComplex **dA_array, magma_int_t step, magma_int_t ldda, magma_int_t batchCount, magma_queue_t queue) |
zgetf2trsm solves one of the matrix equations on gpu | |
magma_int_t | magma_zgetf2_fused_batched (magma_int_t m, magma_int_t n, magmaDoubleComplex **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t **dipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue) |
magma_zgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
|
static |
CGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
[in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
[in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ai | INTEGER Row offset for A. |
[in] | aj | INTEGER Column offset for A. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | gbstep | INTEGER internal use. |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
this is an internal routine that might have many assumption.
magma_int_t magma_cgetf2_native | ( | magma_int_t | m, |
magma_int_t | n, | ||
magmaFloatComplex_ptr | dA, | ||
magma_int_t | ldda, | ||
magma_int_t * | dipiv, | ||
magma_int_t * | dipivinfo, | ||
magma_int_t * | dinfo, | ||
magma_int_t | gbstep, | ||
magma_event_t | events[2], | ||
magma_queue_t | queue, | ||
magma_queue_t | update_queue ) |
CGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a GPU-only routine. The host CPU is not used.
[in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
[in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
[in,out] | dA | A COMPLEX array on the GPU, dimension (LDDA,N). On entry, an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | INTEGER The leading dimension of A. LDDA >= max(1,M). |
[out] | dipiv | An INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | dipivinfo | An INTEGER array, for internal use. |
[out] | dinfo | INTEGER, stored on the GPU
|
[in] | gbstep | INTEGER internal use. |
[in] | queues | magma_queue_t array of size 2. Queues to execute in. |
[in] | events | magma_event_t array of size 2 Internal use. |
This is an internal routine.
magma_int_t magma_cgetf2_vbatched | ( | magma_int_t * | m, |
magma_int_t * | n, | ||
magma_int_t * | minmn, | ||
magma_int_t | max_m, | ||
magma_int_t | max_n, | ||
magma_int_t | max_minmn, | ||
magma_int_t | max_mxn, | ||
magmaFloatComplex ** | dA_array, | ||
magma_int_t | Ai, | ||
magma_int_t | Aj, | ||
magma_int_t * | ldda, | ||
magma_int_t ** | ipiv_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | gbstep, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue ) |
CGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
[in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
[in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ai | INTEGER Row offset for A. |
[in] | aj | INTEGER Column offset for A. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | gbstep | INTEGER internal use. |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
this is an internal routine that might have many assumption.
|
static |
DGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
[in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
[in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ai | INTEGER Row offset for A. |
[in] | aj | INTEGER Column offset for A. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | gbstep | INTEGER internal use. |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
this is an internal routine that might have many assumption.
magma_int_t magma_dgetf2_native | ( | magma_int_t | m, |
magma_int_t | n, | ||
magmaDouble_ptr | dA, | ||
magma_int_t | ldda, | ||
magma_int_t * | dipiv, | ||
magma_int_t * | dipivinfo, | ||
magma_int_t * | dinfo, | ||
magma_int_t | gbstep, | ||
magma_event_t | events[2], | ||
magma_queue_t | queue, | ||
magma_queue_t | update_queue ) |
DGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a GPU-only routine. The host CPU is not used.
[in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
[in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
[in,out] | dA | A DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | INTEGER The leading dimension of A. LDDA >= max(1,M). |
[out] | dipiv | An INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | dipivinfo | An INTEGER array, for internal use. |
[out] | dinfo | INTEGER, stored on the GPU
|
[in] | gbstep | INTEGER internal use. |
[in] | queues | magma_queue_t array of size 2. Queues to execute in. |
[in] | events | magma_event_t array of size 2 Internal use. |
This is an internal routine.
magma_int_t magma_dgetf2_vbatched | ( | magma_int_t * | m, |
magma_int_t * | n, | ||
magma_int_t * | minmn, | ||
magma_int_t | max_m, | ||
magma_int_t | max_n, | ||
magma_int_t | max_minmn, | ||
magma_int_t | max_mxn, | ||
double ** | dA_array, | ||
magma_int_t | Ai, | ||
magma_int_t | Aj, | ||
magma_int_t * | ldda, | ||
magma_int_t ** | ipiv_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | gbstep, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue ) |
DGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
[in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
[in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ai | INTEGER Row offset for A. |
[in] | aj | INTEGER Column offset for A. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | gbstep | INTEGER internal use. |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
this is an internal routine that might have many assumption.
|
static |
SGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
[in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
[in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ai | INTEGER Row offset for A. |
[in] | aj | INTEGER Column offset for A. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | gbstep | INTEGER internal use. |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
this is an internal routine that might have many assumption.
magma_int_t magma_sgetf2_native | ( | magma_int_t | m, |
magma_int_t | n, | ||
magmaFloat_ptr | dA, | ||
magma_int_t | ldda, | ||
magma_int_t * | dipiv, | ||
magma_int_t * | dipivinfo, | ||
magma_int_t * | dinfo, | ||
magma_int_t | gbstep, | ||
magma_event_t | events[2], | ||
magma_queue_t | queue, | ||
magma_queue_t | update_queue ) |
SGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a GPU-only routine. The host CPU is not used.
[in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
[in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
[in,out] | dA | A REAL array on the GPU, dimension (LDDA,N). On entry, an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | INTEGER The leading dimension of A. LDDA >= max(1,M). |
[out] | dipiv | An INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | dipivinfo | An INTEGER array, for internal use. |
[out] | dinfo | INTEGER, stored on the GPU
|
[in] | gbstep | INTEGER internal use. |
[in] | queues | magma_queue_t array of size 2. Queues to execute in. |
[in] | events | magma_event_t array of size 2 Internal use. |
This is an internal routine.
magma_int_t magma_sgetf2_vbatched | ( | magma_int_t * | m, |
magma_int_t * | n, | ||
magma_int_t * | minmn, | ||
magma_int_t | max_m, | ||
magma_int_t | max_n, | ||
magma_int_t | max_minmn, | ||
magma_int_t | max_mxn, | ||
float ** | dA_array, | ||
magma_int_t | Ai, | ||
magma_int_t | Aj, | ||
magma_int_t * | ldda, | ||
magma_int_t ** | ipiv_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | gbstep, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue ) |
SGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
[in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
[in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ai | INTEGER Row offset for A. |
[in] | aj | INTEGER Column offset for A. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | gbstep | INTEGER internal use. |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
this is an internal routine that might have many assumption.
|
static |
ZGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
[in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
[in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ai | INTEGER Row offset for A. |
[in] | aj | INTEGER Column offset for A. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | gbstep | INTEGER internal use. |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
this is an internal routine that might have many assumption.
magma_int_t magma_zgetf2_native | ( | magma_int_t | m, |
magma_int_t | n, | ||
magmaDoubleComplex_ptr | dA, | ||
magma_int_t | ldda, | ||
magma_int_t * | dipiv, | ||
magma_int_t * | dipivinfo, | ||
magma_int_t * | dinfo, | ||
magma_int_t | gbstep, | ||
magma_event_t | events[2], | ||
magma_queue_t | queue, | ||
magma_queue_t | update_queue ) |
ZGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a GPU-only routine. The host CPU is not used.
[in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
[in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
[in,out] | dA | A COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | INTEGER The leading dimension of A. LDDA >= max(1,M). |
[out] | dipiv | An INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | dipivinfo | An INTEGER array, for internal use. |
[out] | dinfo | INTEGER, stored on the GPU
|
[in] | gbstep | INTEGER internal use. |
[in] | queues | magma_queue_t array of size 2. Queues to execute in. |
[in] | events | magma_event_t array of size 2 Internal use. |
This is an internal routine.
magma_int_t magma_zgetf2_vbatched | ( | magma_int_t * | m, |
magma_int_t * | n, | ||
magma_int_t * | minmn, | ||
magma_int_t | max_m, | ||
magma_int_t | max_n, | ||
magma_int_t | max_minmn, | ||
magma_int_t | max_mxn, | ||
magmaDoubleComplex ** | dA_array, | ||
magma_int_t | Ai, | ||
magma_int_t | Aj, | ||
magma_int_t * | ldda, | ||
magma_int_t ** | ipiv_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | gbstep, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue ) |
ZGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
[in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
[in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ai | INTEGER Row offset for A. |
[in] | aj | INTEGER Column offset for A. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | gbstep | INTEGER internal use. |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
this is an internal routine that might have many assumption.
void magma_cgetf2trsm_batched | ( | magma_int_t | ib, |
magma_int_t | n, | ||
magmaFloatComplex ** | dA_array, | ||
magma_int_t | step, | ||
magma_int_t | ldda, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue ) |
cgetf2trsm solves one of the matrix equations on gpu
B = C^-1 * B
where C, B are part of the matrix A in dA_array,
This version load C, B into shared memory and solve it and copy back to GPU device memory. This is an internal routine that might have many assumption.
[in] | ib | INTEGER The number of rows/columns of each matrix C, and rows of B. ib >= 0. |
[in] | n | INTEGER The number of columns of each matrix B. n >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[in] | step | INTEGER The starting address of matrix C in A. LDDA >= max(1,M). |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_cgetf2_fused_batched | ( | magma_int_t | m, |
magma_int_t | n, | ||
magmaFloatComplex ** | dA_array, | ||
magma_int_t | ai, | ||
magma_int_t | aj, | ||
magma_int_t | ldda, | ||
magma_int_t ** | dipiv_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue ) |
magma_cgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
This routine is used for batch LU panel factorization, and has specific assumption about the value of N
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is a right-looking unblocked version of the algorithm. The routine is a batched version that factors batchCount M-by-N matrices in parallel.
This version load an entire matrix (m*n) into registers and factorize it with pivoting and copy back to GPU device memory.
[in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
[in] | n | INTEGER The number of columns of each matrix A. ib >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ai | INTEGER Row offset for A. |
[in] | aj | INTEGER Column offset for A. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | dipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
void magma_dgetf2trsm_batched | ( | magma_int_t | ib, |
magma_int_t | n, | ||
double ** | dA_array, | ||
magma_int_t | step, | ||
magma_int_t | ldda, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue ) |
dgetf2trsm solves one of the matrix equations on gpu
B = C^-1 * B
where C, B are part of the matrix A in dA_array,
This version load C, B into shared memory and solve it and copy back to GPU device memory. This is an internal routine that might have many assumption.
[in] | ib | INTEGER The number of rows/columns of each matrix C, and rows of B. ib >= 0. |
[in] | n | INTEGER The number of columns of each matrix B. n >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[in] | step | INTEGER The starting address of matrix C in A. LDDA >= max(1,M). |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_dgetf2_fused_batched | ( | magma_int_t | m, |
magma_int_t | n, | ||
double ** | dA_array, | ||
magma_int_t | ai, | ||
magma_int_t | aj, | ||
magma_int_t | ldda, | ||
magma_int_t ** | dipiv_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue ) |
magma_dgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
This routine is used for batch LU panel factorization, and has specific assumption about the value of N
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is a right-looking unblocked version of the algorithm. The routine is a batched version that factors batchCount M-by-N matrices in parallel.
This version load an entire matrix (m*n) into registers and factorize it with pivoting and copy back to GPU device memory.
[in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
[in] | n | INTEGER The number of columns of each matrix A. ib >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ai | INTEGER Row offset for A. |
[in] | aj | INTEGER Column offset for A. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | dipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
void magma_sgetf2trsm_batched | ( | magma_int_t | ib, |
magma_int_t | n, | ||
float ** | dA_array, | ||
magma_int_t | step, | ||
magma_int_t | ldda, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue ) |
sgetf2trsm solves one of the matrix equations on gpu
B = C^-1 * B
where C, B are part of the matrix A in dA_array,
This version load C, B into shared memory and solve it and copy back to GPU device memory. This is an internal routine that might have many assumption.
[in] | ib | INTEGER The number of rows/columns of each matrix C, and rows of B. ib >= 0. |
[in] | n | INTEGER The number of columns of each matrix B. n >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[in] | step | INTEGER The starting address of matrix C in A. LDDA >= max(1,M). |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_sgetf2_fused_batched | ( | magma_int_t | m, |
magma_int_t | n, | ||
float ** | dA_array, | ||
magma_int_t | ai, | ||
magma_int_t | aj, | ||
magma_int_t | ldda, | ||
magma_int_t ** | dipiv_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue ) |
magma_sgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
This routine is used for batch LU panel factorization, and has specific assumption about the value of N
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is a right-looking unblocked version of the algorithm. The routine is a batched version that factors batchCount M-by-N matrices in parallel.
This version load an entire matrix (m*n) into registers and factorize it with pivoting and copy back to GPU device memory.
[in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
[in] | n | INTEGER The number of columns of each matrix A. ib >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ai | INTEGER Row offset for A. |
[in] | aj | INTEGER Column offset for A. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | dipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
void magma_zgetf2trsm_batched | ( | magma_int_t | ib, |
magma_int_t | n, | ||
magmaDoubleComplex ** | dA_array, | ||
magma_int_t | step, | ||
magma_int_t | ldda, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue ) |
zgetf2trsm solves one of the matrix equations on gpu
B = C^-1 * B
where C, B are part of the matrix A in dA_array,
This version load C, B into shared memory and solve it and copy back to GPU device memory. This is an internal routine that might have many assumption.
[in] | ib | INTEGER The number of rows/columns of each matrix C, and rows of B. ib >= 0. |
[in] | n | INTEGER The number of columns of each matrix B. n >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[in] | step | INTEGER The starting address of matrix C in A. LDDA >= max(1,M). |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
magma_int_t magma_zgetf2_fused_batched | ( | magma_int_t | m, |
magma_int_t | n, | ||
magmaDoubleComplex ** | dA_array, | ||
magma_int_t | ai, | ||
magma_int_t | aj, | ||
magma_int_t | ldda, | ||
magma_int_t ** | dipiv_array, | ||
magma_int_t * | info_array, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue ) |
magma_zgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
This routine is used for batch LU panel factorization, and has specific assumption about the value of N
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is a right-looking unblocked version of the algorithm. The routine is a batched version that factors batchCount M-by-N matrices in parallel.
This version load an entire matrix (m*n) into registers and factorize it with pivoting and copy back to GPU device memory.
[in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
[in] | n | INTEGER The number of columns of each matrix A. ib >= 0. |
[in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
[in] | ai | INTEGER Row offset for A. |
[in] | aj | INTEGER Column offset for A. |
[in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
[out] | dipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
[out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |