![]() |
MAGMA 2.10.0
Matrix Algebra for GPU and Multicore Architectures
|
Functions | |
| static magma_int_t | magma_cgetf2_batched_v1 (magma_int_t m, magma_int_t n, magmaFloatComplex **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue) |
| CGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
| magma_int_t | magma_cgetf2_native (magma_int_t m, magma_int_t n, magmaFloatComplex_ptr dA, magma_int_t ldda, magma_int_t *dipiv, magma_int_t *dipivinfo, magma_int_t *dinfo, magma_int_t gbstep, magma_event_t events[2], magma_queue_t queue, magma_queue_t update_queue) |
| CGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
| magma_int_t | magma_cgetf2_nopiv_vbatched (magma_int_t *m, magma_int_t *n, magma_int_t *minmn, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, magmaFloatComplex **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t *ldda, float *dtol_array, float eps, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue) |
| CGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
| magma_int_t | magma_cgetf2_vbatched (magma_int_t *m, magma_int_t *n, magma_int_t *minmn, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, magmaFloatComplex **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t *ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue) |
| CGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
| static magma_int_t | magma_dgetf2_batched_v1 (magma_int_t m, magma_int_t n, double **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue) |
| DGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
| magma_int_t | magma_dgetf2_native (magma_int_t m, magma_int_t n, magmaDouble_ptr dA, magma_int_t ldda, magma_int_t *dipiv, magma_int_t *dipivinfo, magma_int_t *dinfo, magma_int_t gbstep, magma_event_t events[2], magma_queue_t queue, magma_queue_t update_queue) |
| DGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
| magma_int_t | magma_dgetf2_nopiv_vbatched (magma_int_t *m, magma_int_t *n, magma_int_t *minmn, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, double **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t *ldda, double *dtol_array, double eps, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue) |
| DGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
| magma_int_t | magma_dgetf2_vbatched (magma_int_t *m, magma_int_t *n, magma_int_t *minmn, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, double **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t *ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue) |
| DGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
| static magma_int_t | magma_sgetf2_batched_v1 (magma_int_t m, magma_int_t n, float **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue) |
| SGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
| magma_int_t | magma_sgetf2_native (magma_int_t m, magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda, magma_int_t *dipiv, magma_int_t *dipivinfo, magma_int_t *dinfo, magma_int_t gbstep, magma_event_t events[2], magma_queue_t queue, magma_queue_t update_queue) |
| SGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
| magma_int_t | magma_sgetf2_nopiv_vbatched (magma_int_t *m, magma_int_t *n, magma_int_t *minmn, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, float **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t *ldda, float *dtol_array, float eps, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue) |
| SGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
| magma_int_t | magma_sgetf2_vbatched (magma_int_t *m, magma_int_t *n, magma_int_t *minmn, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, float **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t *ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue) |
| SGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
| static magma_int_t | magma_zgetf2_batched_v1 (magma_int_t m, magma_int_t n, magmaDoubleComplex **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue) |
| ZGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
| magma_int_t | magma_zgetf2_native (magma_int_t m, magma_int_t n, magmaDoubleComplex_ptr dA, magma_int_t ldda, magma_int_t *dipiv, magma_int_t *dipivinfo, magma_int_t *dinfo, magma_int_t gbstep, magma_event_t events[2], magma_queue_t queue, magma_queue_t update_queue) |
| ZGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
| magma_int_t | magma_zgetf2_nopiv_vbatched (magma_int_t *m, magma_int_t *n, magma_int_t *minmn, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, magmaDoubleComplex **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t *ldda, double *dtol_array, double eps, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue) |
| ZGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
| magma_int_t | magma_zgetf2_vbatched (magma_int_t *m, magma_int_t *n, magma_int_t *minmn, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, magmaDoubleComplex **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t *ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue) |
| ZGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
| void | magma_cgetf2trsm_batched (magma_int_t ib, magma_int_t n, magmaFloatComplex **dA_array, magma_int_t step, magma_int_t ldda, magma_int_t batchCount, magma_queue_t queue) |
| cgetf2trsm solves one of the matrix equations on gpu | |
| magma_int_t | magma_cgetf2_fused_batched (magma_int_t m, magma_int_t n, magmaFloatComplex **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t **dipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue) |
| magma_cgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
| void | magma_dgetf2trsm_batched (magma_int_t ib, magma_int_t n, double **dA_array, magma_int_t step, magma_int_t ldda, magma_int_t batchCount, magma_queue_t queue) |
| dgetf2trsm solves one of the matrix equations on gpu | |
| magma_int_t | magma_dgetf2_fused_batched (magma_int_t m, magma_int_t n, double **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t **dipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue) |
| magma_dgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
| void | magma_sgetf2trsm_batched (magma_int_t ib, magma_int_t n, float **dA_array, magma_int_t step, magma_int_t ldda, magma_int_t batchCount, magma_queue_t queue) |
| sgetf2trsm solves one of the matrix equations on gpu | |
| magma_int_t | magma_sgetf2_fused_batched (magma_int_t m, magma_int_t n, float **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t **dipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue) |
| magma_sgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
| void | magma_zgetf2trsm_batched (magma_int_t ib, magma_int_t n, magmaDoubleComplex **dA_array, magma_int_t step, magma_int_t ldda, magma_int_t batchCount, magma_queue_t queue) |
| zgetf2trsm solves one of the matrix equations on gpu | |
| magma_int_t | magma_zgetf2_fused_batched (magma_int_t m, magma_int_t n, magmaDoubleComplex **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t **dipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue) |
| magma_zgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. | |
|
static |
CGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
| [in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
| [in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
| [in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
| [in] | ai | INTEGER Row offset for A. |
| [in] | aj | INTEGER Column offset for A. |
| [in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
| [out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
| [out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
| [in] | gbstep | INTEGER internal use. |
| [in] | batchCount | INTEGER The number of matrices to operate on. |
| [in] | queue | magma_queue_t Queue to execute in. |
this is an internal routine that might have many assumption.
| magma_int_t magma_cgetf2_native | ( | magma_int_t | m, |
| magma_int_t | n, | ||
| magmaFloatComplex_ptr | dA, | ||
| magma_int_t | ldda, | ||
| magma_int_t * | dipiv, | ||
| magma_int_t * | dipivinfo, | ||
| magma_int_t * | dinfo, | ||
| magma_int_t | gbstep, | ||
| magma_event_t | events[2], | ||
| magma_queue_t | queue, | ||
| magma_queue_t | update_queue ) |
CGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a GPU-only routine. The host CPU is not used.
| [in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
| [in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
| [in,out] | dA | A COMPLEX array on the GPU, dimension (LDDA,N). On entry, an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
| [in] | ldda | INTEGER The leading dimension of A. LDDA >= max(1,M). |
| [out] | dipiv | An INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
| [out] | dipivinfo | An INTEGER array, for internal use. |
| [out] | dinfo | INTEGER, stored on the GPU
|
| [in] | gbstep | INTEGER internal use. |
| [in] | queues | magma_queue_t array of size 2. Queues to execute in. |
| [in] | events | magma_event_t array of size 2 Internal use. |
This is an internal routine.
| magma_int_t magma_cgetf2_nopiv_vbatched | ( | magma_int_t * | m, |
| magma_int_t * | n, | ||
| magma_int_t * | minmn, | ||
| magma_int_t | max_m, | ||
| magma_int_t | max_n, | ||
| magma_int_t | max_minmn, | ||
| magma_int_t | max_mxn, | ||
| magmaFloatComplex ** | dA_array, | ||
| magma_int_t | Ai, | ||
| magma_int_t | Aj, | ||
| magma_int_t * | ldda, | ||
| float * | dtol_array, | ||
| float | eps, | ||
| magma_int_t * | info_array, | ||
| magma_int_t | gbstep, | ||
| magma_int_t | batchCount, | ||
| magma_queue_t | queue ) |
CGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
| [in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
| [in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
| [in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
| [in] | ai | INTEGER Row offset for A. |
| [in] | aj | INTEGER Column offset for A. |
| [in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
| [in] | dtol_array | Array of DOUBLEs, dimension (batchCount), for corresponding matrices. Each is an the tolerance that is compared to the diagonal element before the column is scaled by its inverse. If the value of the diagonal is less than the threshold, the diagonal is replaced by the threshold. If the array is set to NULL, then the threshold is set to the eps parameter |
| [in] | eps | DOUBLE The value to use for the tolerance for all matrices if the dtol_array is NULL |
| [out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
| [in] | gbstep | INTEGER internal use. |
| [in] | batchCount | INTEGER The number of matrices to operate on. |
| [in] | queue | magma_queue_t Queue to execute in. |
this is an internal routine that might have many assumption.
| magma_int_t magma_cgetf2_vbatched | ( | magma_int_t * | m, |
| magma_int_t * | n, | ||
| magma_int_t * | minmn, | ||
| magma_int_t | max_m, | ||
| magma_int_t | max_n, | ||
| magma_int_t | max_minmn, | ||
| magma_int_t | max_mxn, | ||
| magmaFloatComplex ** | dA_array, | ||
| magma_int_t | Ai, | ||
| magma_int_t | Aj, | ||
| magma_int_t * | ldda, | ||
| magma_int_t ** | ipiv_array, | ||
| magma_int_t * | info_array, | ||
| magma_int_t | gbstep, | ||
| magma_int_t | batchCount, | ||
| magma_queue_t | queue ) |
CGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
| [in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
| [in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
| [in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
| [in] | ai | INTEGER Row offset for A. |
| [in] | aj | INTEGER Column offset for A. |
| [in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
| [out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
| [out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
| [in] | gbstep | INTEGER internal use. |
| [in] | batchCount | INTEGER The number of matrices to operate on. |
| [in] | queue | magma_queue_t Queue to execute in. |
this is an internal routine that might have many assumption.
|
static |
DGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
| [in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
| [in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
| [in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
| [in] | ai | INTEGER Row offset for A. |
| [in] | aj | INTEGER Column offset for A. |
| [in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
| [out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
| [out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
| [in] | gbstep | INTEGER internal use. |
| [in] | batchCount | INTEGER The number of matrices to operate on. |
| [in] | queue | magma_queue_t Queue to execute in. |
this is an internal routine that might have many assumption.
| magma_int_t magma_dgetf2_native | ( | magma_int_t | m, |
| magma_int_t | n, | ||
| magmaDouble_ptr | dA, | ||
| magma_int_t | ldda, | ||
| magma_int_t * | dipiv, | ||
| magma_int_t * | dipivinfo, | ||
| magma_int_t * | dinfo, | ||
| magma_int_t | gbstep, | ||
| magma_event_t | events[2], | ||
| magma_queue_t | queue, | ||
| magma_queue_t | update_queue ) |
DGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a GPU-only routine. The host CPU is not used.
| [in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
| [in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
| [in,out] | dA | A DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
| [in] | ldda | INTEGER The leading dimension of A. LDDA >= max(1,M). |
| [out] | dipiv | An INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
| [out] | dipivinfo | An INTEGER array, for internal use. |
| [out] | dinfo | INTEGER, stored on the GPU
|
| [in] | gbstep | INTEGER internal use. |
| [in] | queues | magma_queue_t array of size 2. Queues to execute in. |
| [in] | events | magma_event_t array of size 2 Internal use. |
This is an internal routine.
| magma_int_t magma_dgetf2_nopiv_vbatched | ( | magma_int_t * | m, |
| magma_int_t * | n, | ||
| magma_int_t * | minmn, | ||
| magma_int_t | max_m, | ||
| magma_int_t | max_n, | ||
| magma_int_t | max_minmn, | ||
| magma_int_t | max_mxn, | ||
| double ** | dA_array, | ||
| magma_int_t | Ai, | ||
| magma_int_t | Aj, | ||
| magma_int_t * | ldda, | ||
| double * | dtol_array, | ||
| double | eps, | ||
| magma_int_t * | info_array, | ||
| magma_int_t | gbstep, | ||
| magma_int_t | batchCount, | ||
| magma_queue_t | queue ) |
DGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
| [in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
| [in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
| [in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
| [in] | ai | INTEGER Row offset for A. |
| [in] | aj | INTEGER Column offset for A. |
| [in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
| [in] | dtol_array | Array of DOUBLEs, dimension (batchCount), for corresponding matrices. Each is an the tolerance that is compared to the diagonal element before the column is scaled by its inverse. If the value of the diagonal is less than the threshold, the diagonal is replaced by the threshold. If the array is set to NULL, then the threshold is set to the eps parameter |
| [in] | eps | DOUBLE The value to use for the tolerance for all matrices if the dtol_array is NULL |
| [out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
| [in] | gbstep | INTEGER internal use. |
| [in] | batchCount | INTEGER The number of matrices to operate on. |
| [in] | queue | magma_queue_t Queue to execute in. |
this is an internal routine that might have many assumption.
| magma_int_t magma_dgetf2_vbatched | ( | magma_int_t * | m, |
| magma_int_t * | n, | ||
| magma_int_t * | minmn, | ||
| magma_int_t | max_m, | ||
| magma_int_t | max_n, | ||
| magma_int_t | max_minmn, | ||
| magma_int_t | max_mxn, | ||
| double ** | dA_array, | ||
| magma_int_t | Ai, | ||
| magma_int_t | Aj, | ||
| magma_int_t * | ldda, | ||
| magma_int_t ** | ipiv_array, | ||
| magma_int_t * | info_array, | ||
| magma_int_t | gbstep, | ||
| magma_int_t | batchCount, | ||
| magma_queue_t | queue ) |
DGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
| [in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
| [in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
| [in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
| [in] | ai | INTEGER Row offset for A. |
| [in] | aj | INTEGER Column offset for A. |
| [in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
| [out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
| [out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
| [in] | gbstep | INTEGER internal use. |
| [in] | batchCount | INTEGER The number of matrices to operate on. |
| [in] | queue | magma_queue_t Queue to execute in. |
this is an internal routine that might have many assumption.
|
static |
SGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
| [in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
| [in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
| [in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
| [in] | ai | INTEGER Row offset for A. |
| [in] | aj | INTEGER Column offset for A. |
| [in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
| [out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
| [out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
| [in] | gbstep | INTEGER internal use. |
| [in] | batchCount | INTEGER The number of matrices to operate on. |
| [in] | queue | magma_queue_t Queue to execute in. |
this is an internal routine that might have many assumption.
| magma_int_t magma_sgetf2_native | ( | magma_int_t | m, |
| magma_int_t | n, | ||
| magmaFloat_ptr | dA, | ||
| magma_int_t | ldda, | ||
| magma_int_t * | dipiv, | ||
| magma_int_t * | dipivinfo, | ||
| magma_int_t * | dinfo, | ||
| magma_int_t | gbstep, | ||
| magma_event_t | events[2], | ||
| magma_queue_t | queue, | ||
| magma_queue_t | update_queue ) |
SGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a GPU-only routine. The host CPU is not used.
| [in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
| [in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
| [in,out] | dA | A REAL array on the GPU, dimension (LDDA,N). On entry, an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
| [in] | ldda | INTEGER The leading dimension of A. LDDA >= max(1,M). |
| [out] | dipiv | An INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
| [out] | dipivinfo | An INTEGER array, for internal use. |
| [out] | dinfo | INTEGER, stored on the GPU
|
| [in] | gbstep | INTEGER internal use. |
| [in] | queues | magma_queue_t array of size 2. Queues to execute in. |
| [in] | events | magma_event_t array of size 2 Internal use. |
This is an internal routine.
| magma_int_t magma_sgetf2_nopiv_vbatched | ( | magma_int_t * | m, |
| magma_int_t * | n, | ||
| magma_int_t * | minmn, | ||
| magma_int_t | max_m, | ||
| magma_int_t | max_n, | ||
| magma_int_t | max_minmn, | ||
| magma_int_t | max_mxn, | ||
| float ** | dA_array, | ||
| magma_int_t | Ai, | ||
| magma_int_t | Aj, | ||
| magma_int_t * | ldda, | ||
| float * | dtol_array, | ||
| float | eps, | ||
| magma_int_t * | info_array, | ||
| magma_int_t | gbstep, | ||
| magma_int_t | batchCount, | ||
| magma_queue_t | queue ) |
SGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
| [in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
| [in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
| [in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
| [in] | ai | INTEGER Row offset for A. |
| [in] | aj | INTEGER Column offset for A. |
| [in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
| [in] | dtol_array | Array of DOUBLEs, dimension (batchCount), for corresponding matrices. Each is an the tolerance that is compared to the diagonal element before the column is scaled by its inverse. If the value of the diagonal is less than the threshold, the diagonal is replaced by the threshold. If the array is set to NULL, then the threshold is set to the eps parameter |
| [in] | eps | DOUBLE The value to use for the tolerance for all matrices if the dtol_array is NULL |
| [out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
| [in] | gbstep | INTEGER internal use. |
| [in] | batchCount | INTEGER The number of matrices to operate on. |
| [in] | queue | magma_queue_t Queue to execute in. |
this is an internal routine that might have many assumption.
| magma_int_t magma_sgetf2_vbatched | ( | magma_int_t * | m, |
| magma_int_t * | n, | ||
| magma_int_t * | minmn, | ||
| magma_int_t | max_m, | ||
| magma_int_t | max_n, | ||
| magma_int_t | max_minmn, | ||
| magma_int_t | max_mxn, | ||
| float ** | dA_array, | ||
| magma_int_t | Ai, | ||
| magma_int_t | Aj, | ||
| magma_int_t * | ldda, | ||
| magma_int_t ** | ipiv_array, | ||
| magma_int_t * | info_array, | ||
| magma_int_t | gbstep, | ||
| magma_int_t | batchCount, | ||
| magma_queue_t | queue ) |
SGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
| [in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
| [in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
| [in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
| [in] | ai | INTEGER Row offset for A. |
| [in] | aj | INTEGER Column offset for A. |
| [in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
| [out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
| [out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
| [in] | gbstep | INTEGER internal use. |
| [in] | batchCount | INTEGER The number of matrices to operate on. |
| [in] | queue | magma_queue_t Queue to execute in. |
this is an internal routine that might have many assumption.
|
static |
ZGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
| [in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
| [in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
| [in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
| [in] | ai | INTEGER Row offset for A. |
| [in] | aj | INTEGER Column offset for A. |
| [in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
| [out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
| [out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
| [in] | gbstep | INTEGER internal use. |
| [in] | batchCount | INTEGER The number of matrices to operate on. |
| [in] | queue | magma_queue_t Queue to execute in. |
this is an internal routine that might have many assumption.
| magma_int_t magma_zgetf2_native | ( | magma_int_t | m, |
| magma_int_t | n, | ||
| magmaDoubleComplex_ptr | dA, | ||
| magma_int_t | ldda, | ||
| magma_int_t * | dipiv, | ||
| magma_int_t * | dipivinfo, | ||
| magma_int_t * | dinfo, | ||
| magma_int_t | gbstep, | ||
| magma_event_t | events[2], | ||
| magma_queue_t | queue, | ||
| magma_queue_t | update_queue ) |
ZGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a GPU-only routine. The host CPU is not used.
| [in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
| [in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
| [in,out] | dA | A COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
| [in] | ldda | INTEGER The leading dimension of A. LDDA >= max(1,M). |
| [out] | dipiv | An INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
| [out] | dipivinfo | An INTEGER array, for internal use. |
| [out] | dinfo | INTEGER, stored on the GPU
|
| [in] | gbstep | INTEGER internal use. |
| [in] | queues | magma_queue_t array of size 2. Queues to execute in. |
| [in] | events | magma_event_t array of size 2 Internal use. |
This is an internal routine.
| magma_int_t magma_zgetf2_nopiv_vbatched | ( | magma_int_t * | m, |
| magma_int_t * | n, | ||
| magma_int_t * | minmn, | ||
| magma_int_t | max_m, | ||
| magma_int_t | max_n, | ||
| magma_int_t | max_minmn, | ||
| magma_int_t | max_mxn, | ||
| magmaDoubleComplex ** | dA_array, | ||
| magma_int_t | Ai, | ||
| magma_int_t | Aj, | ||
| magma_int_t * | ldda, | ||
| double * | dtol_array, | ||
| double | eps, | ||
| magma_int_t * | info_array, | ||
| magma_int_t | gbstep, | ||
| magma_int_t | batchCount, | ||
| magma_queue_t | queue ) |
ZGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
| [in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
| [in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
| [in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
| [in] | ai | INTEGER Row offset for A. |
| [in] | aj | INTEGER Column offset for A. |
| [in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
| [in] | dtol_array | Array of DOUBLEs, dimension (batchCount), for corresponding matrices. Each is an the tolerance that is compared to the diagonal element before the column is scaled by its inverse. If the value of the diagonal is less than the threshold, the diagonal is replaced by the threshold. If the array is set to NULL, then the threshold is set to the eps parameter |
| [in] | eps | DOUBLE The value to use for the tolerance for all matrices if the dtol_array is NULL |
| [out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
| [in] | gbstep | INTEGER internal use. |
| [in] | batchCount | INTEGER The number of matrices to operate on. |
| [in] | queue | magma_queue_t Queue to execute in. |
this is an internal routine that might have many assumption.
| magma_int_t magma_zgetf2_vbatched | ( | magma_int_t * | m, |
| magma_int_t * | n, | ||
| magma_int_t * | minmn, | ||
| magma_int_t | max_m, | ||
| magma_int_t | max_n, | ||
| magma_int_t | max_minmn, | ||
| magma_int_t | max_mxn, | ||
| magmaDoubleComplex ** | dA_array, | ||
| magma_int_t | Ai, | ||
| magma_int_t | Aj, | ||
| magma_int_t * | ldda, | ||
| magma_int_t ** | ipiv_array, | ||
| magma_int_t * | info_array, | ||
| magma_int_t | gbstep, | ||
| magma_int_t | batchCount, | ||
| magma_queue_t | queue ) |
ZGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.
| [in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
| [in] | n | INTEGER The number of columns of each matrix A. N >= 0. |
| [in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
| [in] | ai | INTEGER Row offset for A. |
| [in] | aj | INTEGER Column offset for A. |
| [in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
| [out] | ipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
| [out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
| [in] | gbstep | INTEGER internal use. |
| [in] | batchCount | INTEGER The number of matrices to operate on. |
| [in] | queue | magma_queue_t Queue to execute in. |
this is an internal routine that might have many assumption.
| void magma_cgetf2trsm_batched | ( | magma_int_t | ib, |
| magma_int_t | n, | ||
| magmaFloatComplex ** | dA_array, | ||
| magma_int_t | step, | ||
| magma_int_t | ldda, | ||
| magma_int_t | batchCount, | ||
| magma_queue_t | queue ) |
cgetf2trsm solves one of the matrix equations on gpu
B = C^-1 * B
where C, B are part of the matrix A in dA_array,
This version load C, B into shared memory and solve it and copy back to GPU device memory. This is an internal routine that might have many assumption.
| [in] | ib | INTEGER The number of rows/columns of each matrix C, and rows of B. ib >= 0. |
| [in] | n | INTEGER The number of columns of each matrix B. n >= 0. |
| [in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
| [in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
| [in] | step | INTEGER The starting address of matrix C in A. LDDA >= max(1,M). |
| [in] | batchCount | INTEGER The number of matrices to operate on. |
| [in] | queue | magma_queue_t Queue to execute in. |
| magma_int_t magma_cgetf2_fused_batched | ( | magma_int_t | m, |
| magma_int_t | n, | ||
| magmaFloatComplex ** | dA_array, | ||
| magma_int_t | ai, | ||
| magma_int_t | aj, | ||
| magma_int_t | ldda, | ||
| magma_int_t ** | dipiv_array, | ||
| magma_int_t * | info_array, | ||
| magma_int_t | batchCount, | ||
| magma_queue_t | queue ) |
magma_cgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
This routine is used for batch LU panel factorization, and has specific assumption about the value of N
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is a right-looking unblocked version of the algorithm. The routine is a batched version that factors batchCount M-by-N matrices in parallel.
This version load an entire matrix (m*n) into registers and factorize it with pivoting and copy back to GPU device memory.
| [in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
| [in] | n | INTEGER The number of columns of each matrix A. ib >= 0. |
| [in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
| [in] | ai | INTEGER Row offset for A. |
| [in] | aj | INTEGER Column offset for A. |
| [in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
| [out] | dipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
| [out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
| [in] | batchCount | INTEGER The number of matrices to operate on. |
| [in] | queue | magma_queue_t Queue to execute in. |
| void magma_dgetf2trsm_batched | ( | magma_int_t | ib, |
| magma_int_t | n, | ||
| double ** | dA_array, | ||
| magma_int_t | step, | ||
| magma_int_t | ldda, | ||
| magma_int_t | batchCount, | ||
| magma_queue_t | queue ) |
dgetf2trsm solves one of the matrix equations on gpu
B = C^-1 * B
where C, B are part of the matrix A in dA_array,
This version load C, B into shared memory and solve it and copy back to GPU device memory. This is an internal routine that might have many assumption.
| [in] | ib | INTEGER The number of rows/columns of each matrix C, and rows of B. ib >= 0. |
| [in] | n | INTEGER The number of columns of each matrix B. n >= 0. |
| [in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
| [in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
| [in] | step | INTEGER The starting address of matrix C in A. LDDA >= max(1,M). |
| [in] | batchCount | INTEGER The number of matrices to operate on. |
| [in] | queue | magma_queue_t Queue to execute in. |
| magma_int_t magma_dgetf2_fused_batched | ( | magma_int_t | m, |
| magma_int_t | n, | ||
| double ** | dA_array, | ||
| magma_int_t | ai, | ||
| magma_int_t | aj, | ||
| magma_int_t | ldda, | ||
| magma_int_t ** | dipiv_array, | ||
| magma_int_t * | info_array, | ||
| magma_int_t | batchCount, | ||
| magma_queue_t | queue ) |
magma_dgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
This routine is used for batch LU panel factorization, and has specific assumption about the value of N
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is a right-looking unblocked version of the algorithm. The routine is a batched version that factors batchCount M-by-N matrices in parallel.
This version load an entire matrix (m*n) into registers and factorize it with pivoting and copy back to GPU device memory.
| [in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
| [in] | n | INTEGER The number of columns of each matrix A. ib >= 0. |
| [in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
| [in] | ai | INTEGER Row offset for A. |
| [in] | aj | INTEGER Column offset for A. |
| [in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
| [out] | dipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
| [out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
| [in] | batchCount | INTEGER The number of matrices to operate on. |
| [in] | queue | magma_queue_t Queue to execute in. |
| void magma_sgetf2trsm_batched | ( | magma_int_t | ib, |
| magma_int_t | n, | ||
| float ** | dA_array, | ||
| magma_int_t | step, | ||
| magma_int_t | ldda, | ||
| magma_int_t | batchCount, | ||
| magma_queue_t | queue ) |
sgetf2trsm solves one of the matrix equations on gpu
B = C^-1 * B
where C, B are part of the matrix A in dA_array,
This version load C, B into shared memory and solve it and copy back to GPU device memory. This is an internal routine that might have many assumption.
| [in] | ib | INTEGER The number of rows/columns of each matrix C, and rows of B. ib >= 0. |
| [in] | n | INTEGER The number of columns of each matrix B. n >= 0. |
| [in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
| [in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
| [in] | step | INTEGER The starting address of matrix C in A. LDDA >= max(1,M). |
| [in] | batchCount | INTEGER The number of matrices to operate on. |
| [in] | queue | magma_queue_t Queue to execute in. |
| magma_int_t magma_sgetf2_fused_batched | ( | magma_int_t | m, |
| magma_int_t | n, | ||
| float ** | dA_array, | ||
| magma_int_t | ai, | ||
| magma_int_t | aj, | ||
| magma_int_t | ldda, | ||
| magma_int_t ** | dipiv_array, | ||
| magma_int_t * | info_array, | ||
| magma_int_t | batchCount, | ||
| magma_queue_t | queue ) |
magma_sgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
This routine is used for batch LU panel factorization, and has specific assumption about the value of N
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is a right-looking unblocked version of the algorithm. The routine is a batched version that factors batchCount M-by-N matrices in parallel.
This version load an entire matrix (m*n) into registers and factorize it with pivoting and copy back to GPU device memory.
| [in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
| [in] | n | INTEGER The number of columns of each matrix A. ib >= 0. |
| [in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
| [in] | ai | INTEGER Row offset for A. |
| [in] | aj | INTEGER Column offset for A. |
| [in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
| [out] | dipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
| [out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
| [in] | batchCount | INTEGER The number of matrices to operate on. |
| [in] | queue | magma_queue_t Queue to execute in. |
| void magma_zgetf2trsm_batched | ( | magma_int_t | ib, |
| magma_int_t | n, | ||
| magmaDoubleComplex ** | dA_array, | ||
| magma_int_t | step, | ||
| magma_int_t | ldda, | ||
| magma_int_t | batchCount, | ||
| magma_queue_t | queue ) |
zgetf2trsm solves one of the matrix equations on gpu
B = C^-1 * B
where C, B are part of the matrix A in dA_array,
This version load C, B into shared memory and solve it and copy back to GPU device memory. This is an internal routine that might have many assumption.
| [in] | ib | INTEGER The number of rows/columns of each matrix C, and rows of B. ib >= 0. |
| [in] | n | INTEGER The number of columns of each matrix B. n >= 0. |
| [in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
| [in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
| [in] | step | INTEGER The starting address of matrix C in A. LDDA >= max(1,M). |
| [in] | batchCount | INTEGER The number of matrices to operate on. |
| [in] | queue | magma_queue_t Queue to execute in. |
| magma_int_t magma_zgetf2_fused_batched | ( | magma_int_t | m, |
| magma_int_t | n, | ||
| magmaDoubleComplex ** | dA_array, | ||
| magma_int_t | ai, | ||
| magma_int_t | aj, | ||
| magma_int_t | ldda, | ||
| magma_int_t ** | dipiv_array, | ||
| magma_int_t * | info_array, | ||
| magma_int_t | batchCount, | ||
| magma_queue_t | queue ) |
magma_zgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
This routine is used for batch LU panel factorization, and has specific assumption about the value of N
The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).
This is a right-looking unblocked version of the algorithm. The routine is a batched version that factors batchCount M-by-N matrices in parallel.
This version load an entire matrix (m*n) into registers and factorize it with pivoting and copy back to GPU device memory.
| [in] | m | INTEGER The number of rows of each matrix A. M >= 0. |
| [in] | n | INTEGER The number of columns of each matrix A. ib >= 0. |
| [in,out] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. |
| [in] | ai | INTEGER Row offset for A. |
| [in] | aj | INTEGER Column offset for A. |
| [in] | ldda | INTEGER The leading dimension of each array A. LDDA >= max(1,M). |
| [out] | dipiv_array | Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). |
| [out] | info_array | Array of INTEGERs, dimension (batchCount), for corresponding matrices.
|
| [in] | batchCount | INTEGER The number of matrices to operate on. |
| [in] | queue | magma_queue_t Queue to execute in. |