MAGMA 2.10.0
Matrix Algebra for GPU and Multicore Architectures
Loading...
Searching...
No Matches

Functions

static magma_int_t magma_cgetf2_batched_v1 (magma_int_t m, magma_int_t n, magmaFloatComplex **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue)
 CGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
 
magma_int_t magma_cgetf2_native (magma_int_t m, magma_int_t n, magmaFloatComplex_ptr dA, magma_int_t ldda, magma_int_t *dipiv, magma_int_t *dipivinfo, magma_int_t *dinfo, magma_int_t gbstep, magma_event_t events[2], magma_queue_t queue, magma_queue_t update_queue)
 CGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
 
magma_int_t magma_cgetf2_nopiv_vbatched (magma_int_t *m, magma_int_t *n, magma_int_t *minmn, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, magmaFloatComplex **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t *ldda, float *dtol_array, float eps, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue)
 CGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
 
magma_int_t magma_cgetf2_vbatched (magma_int_t *m, magma_int_t *n, magma_int_t *minmn, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, magmaFloatComplex **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t *ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue)
 CGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
 
static magma_int_t magma_dgetf2_batched_v1 (magma_int_t m, magma_int_t n, double **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue)
 DGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
 
magma_int_t magma_dgetf2_native (magma_int_t m, magma_int_t n, magmaDouble_ptr dA, magma_int_t ldda, magma_int_t *dipiv, magma_int_t *dipivinfo, magma_int_t *dinfo, magma_int_t gbstep, magma_event_t events[2], magma_queue_t queue, magma_queue_t update_queue)
 DGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
 
magma_int_t magma_dgetf2_nopiv_vbatched (magma_int_t *m, magma_int_t *n, magma_int_t *minmn, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, double **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t *ldda, double *dtol_array, double eps, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue)
 DGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
 
magma_int_t magma_dgetf2_vbatched (magma_int_t *m, magma_int_t *n, magma_int_t *minmn, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, double **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t *ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue)
 DGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
 
static magma_int_t magma_sgetf2_batched_v1 (magma_int_t m, magma_int_t n, float **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue)
 SGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
 
magma_int_t magma_sgetf2_native (magma_int_t m, magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda, magma_int_t *dipiv, magma_int_t *dipivinfo, magma_int_t *dinfo, magma_int_t gbstep, magma_event_t events[2], magma_queue_t queue, magma_queue_t update_queue)
 SGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
 
magma_int_t magma_sgetf2_nopiv_vbatched (magma_int_t *m, magma_int_t *n, magma_int_t *minmn, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, float **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t *ldda, float *dtol_array, float eps, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue)
 SGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
 
magma_int_t magma_sgetf2_vbatched (magma_int_t *m, magma_int_t *n, magma_int_t *minmn, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, float **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t *ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue)
 SGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
 
static magma_int_t magma_zgetf2_batched_v1 (magma_int_t m, magma_int_t n, magmaDoubleComplex **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue)
 ZGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
 
magma_int_t magma_zgetf2_native (magma_int_t m, magma_int_t n, magmaDoubleComplex_ptr dA, magma_int_t ldda, magma_int_t *dipiv, magma_int_t *dipivinfo, magma_int_t *dinfo, magma_int_t gbstep, magma_event_t events[2], magma_queue_t queue, magma_queue_t update_queue)
 ZGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
 
magma_int_t magma_zgetf2_nopiv_vbatched (magma_int_t *m, magma_int_t *n, magma_int_t *minmn, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, magmaDoubleComplex **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t *ldda, double *dtol_array, double eps, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue)
 ZGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
 
magma_int_t magma_zgetf2_vbatched (magma_int_t *m, magma_int_t *n, magma_int_t *minmn, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, magmaDoubleComplex **dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t *ldda, magma_int_t **ipiv_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue)
 ZGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
 
void magma_cgetf2trsm_batched (magma_int_t ib, magma_int_t n, magmaFloatComplex **dA_array, magma_int_t step, magma_int_t ldda, magma_int_t batchCount, magma_queue_t queue)
 cgetf2trsm solves one of the matrix equations on gpu
 
magma_int_t magma_cgetf2_fused_batched (magma_int_t m, magma_int_t n, magmaFloatComplex **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t **dipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue)
 magma_cgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
 
void magma_dgetf2trsm_batched (magma_int_t ib, magma_int_t n, double **dA_array, magma_int_t step, magma_int_t ldda, magma_int_t batchCount, magma_queue_t queue)
 dgetf2trsm solves one of the matrix equations on gpu
 
magma_int_t magma_dgetf2_fused_batched (magma_int_t m, magma_int_t n, double **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t **dipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue)
 magma_dgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
 
void magma_sgetf2trsm_batched (magma_int_t ib, magma_int_t n, float **dA_array, magma_int_t step, magma_int_t ldda, magma_int_t batchCount, magma_queue_t queue)
 sgetf2trsm solves one of the matrix equations on gpu
 
magma_int_t magma_sgetf2_fused_batched (magma_int_t m, magma_int_t n, float **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t **dipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue)
 magma_sgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
 
void magma_zgetf2trsm_batched (magma_int_t ib, magma_int_t n, magmaDoubleComplex **dA_array, magma_int_t step, magma_int_t ldda, magma_int_t batchCount, magma_queue_t queue)
 zgetf2trsm solves one of the matrix equations on gpu
 
magma_int_t magma_zgetf2_fused_batched (magma_int_t m, magma_int_t n, magmaDoubleComplex **dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t **dipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue)
 magma_zgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
 

Detailed Description

Function Documentation

◆ magma_cgetf2_batched_v1()

static magma_int_t magma_cgetf2_batched_v1 ( magma_int_t m,
magma_int_t n,
magmaFloatComplex ** dA_array,
magma_int_t ai,
magma_int_t aj,
magma_int_t ldda,
magma_int_t ** ipiv_array,
magma_int_t * info_array,
magma_int_t gbstep,
magma_int_t batchCount,
magma_queue_t queue )
static

CGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters
[in]mINTEGER The number of rows of each matrix A. M >= 0.
[in]nINTEGER The number of columns of each matrix A. N >= 0.
[in,out]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored.
[in]aiINTEGER Row offset for A.
[in]ajINTEGER Column offset for A.
[in]lddaINTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]ipiv_arrayArray of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]info_arrayArray of INTEGERs, dimension (batchCount), for corresponding matrices.
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.
  • > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]gbstepINTEGER internal use.
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

this is an internal routine that might have many assumption.

◆ magma_cgetf2_native()

magma_int_t magma_cgetf2_native ( magma_int_t m,
magma_int_t n,
magmaFloatComplex_ptr dA,
magma_int_t ldda,
magma_int_t * dipiv,
magma_int_t * dipivinfo,
magma_int_t * dinfo,
magma_int_t gbstep,
magma_event_t events[2],
magma_queue_t queue,
magma_queue_t update_queue )

CGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a GPU-only routine. The host CPU is not used.

Parameters
[in]mINTEGER The number of rows of each matrix A. M >= 0.
[in]nINTEGER The number of columns of each matrix A. N >= 0.
[in,out]dAA COMPLEX array on the GPU, dimension (LDDA,N). On entry, an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored.
[in]lddaINTEGER The leading dimension of A. LDDA >= max(1,M).
[out]dipivAn INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]dipivinfoAn INTEGER array, for internal use.
[out]dinfoINTEGER, stored on the GPU
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.
  • > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]gbstepINTEGER internal use.
[in]queuesmagma_queue_t array of size 2. Queues to execute in.
[in]eventsmagma_event_t array of size 2 Internal use.

This is an internal routine.

◆ magma_cgetf2_nopiv_vbatched()

magma_int_t magma_cgetf2_nopiv_vbatched ( magma_int_t * m,
magma_int_t * n,
magma_int_t * minmn,
magma_int_t max_m,
magma_int_t max_n,
magma_int_t max_minmn,
magma_int_t max_mxn,
magmaFloatComplex ** dA_array,
magma_int_t Ai,
magma_int_t Aj,
magma_int_t * ldda,
float * dtol_array,
float eps,
magma_int_t * info_array,
magma_int_t gbstep,
magma_int_t batchCount,
magma_queue_t queue )

CGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters
[in]mINTEGER The number of rows of each matrix A. M >= 0.
[in]nINTEGER The number of columns of each matrix A. N >= 0.
[in,out]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored.
[in]aiINTEGER Row offset for A.
[in]ajINTEGER Column offset for A.
[in]lddaINTEGER The leading dimension of each array A. LDDA >= max(1,M).
[in]dtol_arrayArray of DOUBLEs, dimension (batchCount), for corresponding matrices. Each is an the tolerance that is compared to the diagonal element before the column is scaled by its inverse. If the value of the diagonal is less than the threshold, the diagonal is replaced by the threshold. If the array is set to NULL, then the threshold is set to the eps parameter
[in]epsDOUBLE The value to use for the tolerance for all matrices if the dtol_array is NULL
[out]info_arrayArray of INTEGERs, dimension (batchCount), for corresponding matrices.
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.
  • > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations. If a tolerance array is specified the value shows the number of times a tiny pivot was replaced
[in]gbstepINTEGER internal use.
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

this is an internal routine that might have many assumption.

◆ magma_cgetf2_vbatched()

magma_int_t magma_cgetf2_vbatched ( magma_int_t * m,
magma_int_t * n,
magma_int_t * minmn,
magma_int_t max_m,
magma_int_t max_n,
magma_int_t max_minmn,
magma_int_t max_mxn,
magmaFloatComplex ** dA_array,
magma_int_t Ai,
magma_int_t Aj,
magma_int_t * ldda,
magma_int_t ** ipiv_array,
magma_int_t * info_array,
magma_int_t gbstep,
magma_int_t batchCount,
magma_queue_t queue )

CGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters
[in]mINTEGER The number of rows of each matrix A. M >= 0.
[in]nINTEGER The number of columns of each matrix A. N >= 0.
[in,out]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored.
[in]aiINTEGER Row offset for A.
[in]ajINTEGER Column offset for A.
[in]lddaINTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]ipiv_arrayArray of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]info_arrayArray of INTEGERs, dimension (batchCount), for corresponding matrices.
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.
  • > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]gbstepINTEGER internal use.
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

this is an internal routine that might have many assumption.

◆ magma_dgetf2_batched_v1()

static magma_int_t magma_dgetf2_batched_v1 ( magma_int_t m,
magma_int_t n,
double ** dA_array,
magma_int_t ai,
magma_int_t aj,
magma_int_t ldda,
magma_int_t ** ipiv_array,
magma_int_t * info_array,
magma_int_t gbstep,
magma_int_t batchCount,
magma_queue_t queue )
static

DGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters
[in]mINTEGER The number of rows of each matrix A. M >= 0.
[in]nINTEGER The number of columns of each matrix A. N >= 0.
[in,out]dA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored.
[in]aiINTEGER Row offset for A.
[in]ajINTEGER Column offset for A.
[in]lddaINTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]ipiv_arrayArray of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]info_arrayArray of INTEGERs, dimension (batchCount), for corresponding matrices.
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.
  • > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]gbstepINTEGER internal use.
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

this is an internal routine that might have many assumption.

◆ magma_dgetf2_native()

magma_int_t magma_dgetf2_native ( magma_int_t m,
magma_int_t n,
magmaDouble_ptr dA,
magma_int_t ldda,
magma_int_t * dipiv,
magma_int_t * dipivinfo,
magma_int_t * dinfo,
magma_int_t gbstep,
magma_event_t events[2],
magma_queue_t queue,
magma_queue_t update_queue )

DGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a GPU-only routine. The host CPU is not used.

Parameters
[in]mINTEGER The number of rows of each matrix A. M >= 0.
[in]nINTEGER The number of columns of each matrix A. N >= 0.
[in,out]dAA DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored.
[in]lddaINTEGER The leading dimension of A. LDDA >= max(1,M).
[out]dipivAn INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]dipivinfoAn INTEGER array, for internal use.
[out]dinfoINTEGER, stored on the GPU
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.
  • > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]gbstepINTEGER internal use.
[in]queuesmagma_queue_t array of size 2. Queues to execute in.
[in]eventsmagma_event_t array of size 2 Internal use.

This is an internal routine.

◆ magma_dgetf2_nopiv_vbatched()

magma_int_t magma_dgetf2_nopiv_vbatched ( magma_int_t * m,
magma_int_t * n,
magma_int_t * minmn,
magma_int_t max_m,
magma_int_t max_n,
magma_int_t max_minmn,
magma_int_t max_mxn,
double ** dA_array,
magma_int_t Ai,
magma_int_t Aj,
magma_int_t * ldda,
double * dtol_array,
double eps,
magma_int_t * info_array,
magma_int_t gbstep,
magma_int_t batchCount,
magma_queue_t queue )

DGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters
[in]mINTEGER The number of rows of each matrix A. M >= 0.
[in]nINTEGER The number of columns of each matrix A. N >= 0.
[in,out]dA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored.
[in]aiINTEGER Row offset for A.
[in]ajINTEGER Column offset for A.
[in]lddaINTEGER The leading dimension of each array A. LDDA >= max(1,M).
[in]dtol_arrayArray of DOUBLEs, dimension (batchCount), for corresponding matrices. Each is an the tolerance that is compared to the diagonal element before the column is scaled by its inverse. If the value of the diagonal is less than the threshold, the diagonal is replaced by the threshold. If the array is set to NULL, then the threshold is set to the eps parameter
[in]epsDOUBLE The value to use for the tolerance for all matrices if the dtol_array is NULL
[out]info_arrayArray of INTEGERs, dimension (batchCount), for corresponding matrices.
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.
  • > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations. If a tolerance array is specified the value shows the number of times a tiny pivot was replaced
[in]gbstepINTEGER internal use.
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

this is an internal routine that might have many assumption.

◆ magma_dgetf2_vbatched()

magma_int_t magma_dgetf2_vbatched ( magma_int_t * m,
magma_int_t * n,
magma_int_t * minmn,
magma_int_t max_m,
magma_int_t max_n,
magma_int_t max_minmn,
magma_int_t max_mxn,
double ** dA_array,
magma_int_t Ai,
magma_int_t Aj,
magma_int_t * ldda,
magma_int_t ** ipiv_array,
magma_int_t * info_array,
magma_int_t gbstep,
magma_int_t batchCount,
magma_queue_t queue )

DGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters
[in]mINTEGER The number of rows of each matrix A. M >= 0.
[in]nINTEGER The number of columns of each matrix A. N >= 0.
[in,out]dA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored.
[in]aiINTEGER Row offset for A.
[in]ajINTEGER Column offset for A.
[in]lddaINTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]ipiv_arrayArray of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]info_arrayArray of INTEGERs, dimension (batchCount), for corresponding matrices.
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.
  • > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]gbstepINTEGER internal use.
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

this is an internal routine that might have many assumption.

◆ magma_sgetf2_batched_v1()

static magma_int_t magma_sgetf2_batched_v1 ( magma_int_t m,
magma_int_t n,
float ** dA_array,
magma_int_t ai,
magma_int_t aj,
magma_int_t ldda,
magma_int_t ** ipiv_array,
magma_int_t * info_array,
magma_int_t gbstep,
magma_int_t batchCount,
magma_queue_t queue )
static

SGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters
[in]mINTEGER The number of rows of each matrix A. M >= 0.
[in]nINTEGER The number of columns of each matrix A. N >= 0.
[in,out]dA_arrayArray of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored.
[in]aiINTEGER Row offset for A.
[in]ajINTEGER Column offset for A.
[in]lddaINTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]ipiv_arrayArray of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]info_arrayArray of INTEGERs, dimension (batchCount), for corresponding matrices.
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.
  • > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]gbstepINTEGER internal use.
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

this is an internal routine that might have many assumption.

◆ magma_sgetf2_native()

magma_int_t magma_sgetf2_native ( magma_int_t m,
magma_int_t n,
magmaFloat_ptr dA,
magma_int_t ldda,
magma_int_t * dipiv,
magma_int_t * dipivinfo,
magma_int_t * dinfo,
magma_int_t gbstep,
magma_event_t events[2],
magma_queue_t queue,
magma_queue_t update_queue )

SGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a GPU-only routine. The host CPU is not used.

Parameters
[in]mINTEGER The number of rows of each matrix A. M >= 0.
[in]nINTEGER The number of columns of each matrix A. N >= 0.
[in,out]dAA REAL array on the GPU, dimension (LDDA,N). On entry, an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored.
[in]lddaINTEGER The leading dimension of A. LDDA >= max(1,M).
[out]dipivAn INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]dipivinfoAn INTEGER array, for internal use.
[out]dinfoINTEGER, stored on the GPU
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.
  • > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]gbstepINTEGER internal use.
[in]queuesmagma_queue_t array of size 2. Queues to execute in.
[in]eventsmagma_event_t array of size 2 Internal use.

This is an internal routine.

◆ magma_sgetf2_nopiv_vbatched()

magma_int_t magma_sgetf2_nopiv_vbatched ( magma_int_t * m,
magma_int_t * n,
magma_int_t * minmn,
magma_int_t max_m,
magma_int_t max_n,
magma_int_t max_minmn,
magma_int_t max_mxn,
float ** dA_array,
magma_int_t Ai,
magma_int_t Aj,
magma_int_t * ldda,
float * dtol_array,
float eps,
magma_int_t * info_array,
magma_int_t gbstep,
magma_int_t batchCount,
magma_queue_t queue )

SGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters
[in]mINTEGER The number of rows of each matrix A. M >= 0.
[in]nINTEGER The number of columns of each matrix A. N >= 0.
[in,out]dA_arrayArray of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored.
[in]aiINTEGER Row offset for A.
[in]ajINTEGER Column offset for A.
[in]lddaINTEGER The leading dimension of each array A. LDDA >= max(1,M).
[in]dtol_arrayArray of DOUBLEs, dimension (batchCount), for corresponding matrices. Each is an the tolerance that is compared to the diagonal element before the column is scaled by its inverse. If the value of the diagonal is less than the threshold, the diagonal is replaced by the threshold. If the array is set to NULL, then the threshold is set to the eps parameter
[in]epsDOUBLE The value to use for the tolerance for all matrices if the dtol_array is NULL
[out]info_arrayArray of INTEGERs, dimension (batchCount), for corresponding matrices.
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.
  • > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations. If a tolerance array is specified the value shows the number of times a tiny pivot was replaced
[in]gbstepINTEGER internal use.
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

this is an internal routine that might have many assumption.

◆ magma_sgetf2_vbatched()

magma_int_t magma_sgetf2_vbatched ( magma_int_t * m,
magma_int_t * n,
magma_int_t * minmn,
magma_int_t max_m,
magma_int_t max_n,
magma_int_t max_minmn,
magma_int_t max_mxn,
float ** dA_array,
magma_int_t Ai,
magma_int_t Aj,
magma_int_t * ldda,
magma_int_t ** ipiv_array,
magma_int_t * info_array,
magma_int_t gbstep,
magma_int_t batchCount,
magma_queue_t queue )

SGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters
[in]mINTEGER The number of rows of each matrix A. M >= 0.
[in]nINTEGER The number of columns of each matrix A. N >= 0.
[in,out]dA_arrayArray of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored.
[in]aiINTEGER Row offset for A.
[in]ajINTEGER Column offset for A.
[in]lddaINTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]ipiv_arrayArray of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]info_arrayArray of INTEGERs, dimension (batchCount), for corresponding matrices.
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.
  • > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]gbstepINTEGER internal use.
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

this is an internal routine that might have many assumption.

◆ magma_zgetf2_batched_v1()

static magma_int_t magma_zgetf2_batched_v1 ( magma_int_t m,
magma_int_t n,
magmaDoubleComplex ** dA_array,
magma_int_t ai,
magma_int_t aj,
magma_int_t ldda,
magma_int_t ** ipiv_array,
magma_int_t * info_array,
magma_int_t gbstep,
magma_int_t batchCount,
magma_queue_t queue )
static

ZGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters
[in]mINTEGER The number of rows of each matrix A. M >= 0.
[in]nINTEGER The number of columns of each matrix A. N >= 0.
[in,out]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored.
[in]aiINTEGER Row offset for A.
[in]ajINTEGER Column offset for A.
[in]lddaINTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]ipiv_arrayArray of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]info_arrayArray of INTEGERs, dimension (batchCount), for corresponding matrices.
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.
  • > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]gbstepINTEGER internal use.
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

this is an internal routine that might have many assumption.

◆ magma_zgetf2_native()

magma_int_t magma_zgetf2_native ( magma_int_t m,
magma_int_t n,
magmaDoubleComplex_ptr dA,
magma_int_t ldda,
magma_int_t * dipiv,
magma_int_t * dipivinfo,
magma_int_t * dinfo,
magma_int_t gbstep,
magma_event_t events[2],
magma_queue_t queue,
magma_queue_t update_queue )

ZGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a GPU-only routine. The host CPU is not used.

Parameters
[in]mINTEGER The number of rows of each matrix A. M >= 0.
[in]nINTEGER The number of columns of each matrix A. N >= 0.
[in,out]dAA COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored.
[in]lddaINTEGER The leading dimension of A. LDDA >= max(1,M).
[out]dipivAn INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]dipivinfoAn INTEGER array, for internal use.
[out]dinfoINTEGER, stored on the GPU
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.
  • > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]gbstepINTEGER internal use.
[in]queuesmagma_queue_t array of size 2. Queues to execute in.
[in]eventsmagma_event_t array of size 2 Internal use.

This is an internal routine.

◆ magma_zgetf2_nopiv_vbatched()

magma_int_t magma_zgetf2_nopiv_vbatched ( magma_int_t * m,
magma_int_t * n,
magma_int_t * minmn,
magma_int_t max_m,
magma_int_t max_n,
magma_int_t max_minmn,
magma_int_t max_mxn,
magmaDoubleComplex ** dA_array,
magma_int_t Ai,
magma_int_t Aj,
magma_int_t * ldda,
double * dtol_array,
double eps,
magma_int_t * info_array,
magma_int_t gbstep,
magma_int_t batchCount,
magma_queue_t queue )

ZGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters
[in]mINTEGER The number of rows of each matrix A. M >= 0.
[in]nINTEGER The number of columns of each matrix A. N >= 0.
[in,out]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored.
[in]aiINTEGER Row offset for A.
[in]ajINTEGER Column offset for A.
[in]lddaINTEGER The leading dimension of each array A. LDDA >= max(1,M).
[in]dtol_arrayArray of DOUBLEs, dimension (batchCount), for corresponding matrices. Each is an the tolerance that is compared to the diagonal element before the column is scaled by its inverse. If the value of the diagonal is less than the threshold, the diagonal is replaced by the threshold. If the array is set to NULL, then the threshold is set to the eps parameter
[in]epsDOUBLE The value to use for the tolerance for all matrices if the dtol_array is NULL
[out]info_arrayArray of INTEGERs, dimension (batchCount), for corresponding matrices.
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.
  • > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations. If a tolerance array is specified the value shows the number of times a tiny pivot was replaced
[in]gbstepINTEGER internal use.
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

this is an internal routine that might have many assumption.

◆ magma_zgetf2_vbatched()

magma_int_t magma_zgetf2_vbatched ( magma_int_t * m,
magma_int_t * n,
magma_int_t * minmn,
magma_int_t max_m,
magma_int_t max_n,
magma_int_t max_minmn,
magma_int_t max_mxn,
magmaDoubleComplex ** dA_array,
magma_int_t Ai,
magma_int_t Aj,
magma_int_t * ldda,
magma_int_t ** ipiv_array,
magma_int_t * info_array,
magma_int_t gbstep,
magma_int_t batchCount,
magma_queue_t queue )

ZGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters
[in]mINTEGER The number of rows of each matrix A. M >= 0.
[in]nINTEGER The number of columns of each matrix A. N >= 0.
[in,out]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored.
[in]aiINTEGER Row offset for A.
[in]ajINTEGER Column offset for A.
[in]lddaINTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]ipiv_arrayArray of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]info_arrayArray of INTEGERs, dimension (batchCount), for corresponding matrices.
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.
  • > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]gbstepINTEGER internal use.
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

this is an internal routine that might have many assumption.

◆ magma_cgetf2trsm_batched()

void magma_cgetf2trsm_batched ( magma_int_t ib,
magma_int_t n,
magmaFloatComplex ** dA_array,
magma_int_t step,
magma_int_t ldda,
magma_int_t batchCount,
magma_queue_t queue )

cgetf2trsm solves one of the matrix equations on gpu

B = C^-1 * B

where C, B are part of the matrix A in dA_array,

This version load C, B into shared memory and solve it and copy back to GPU device memory. This is an internal routine that might have many assumption.

Parameters
[in]ibINTEGER The number of rows/columns of each matrix C, and rows of B. ib >= 0.
[in]nINTEGER The number of columns of each matrix B. n >= 0.
[in,out]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored.
[in]lddaINTEGER The leading dimension of each array A. LDDA >= max(1,M).
[in]stepINTEGER The starting address of matrix C in A. LDDA >= max(1,M).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magma_cgetf2_fused_batched()

magma_int_t magma_cgetf2_fused_batched ( magma_int_t m,
magma_int_t n,
magmaFloatComplex ** dA_array,
magma_int_t ai,
magma_int_t aj,
magma_int_t ldda,
magma_int_t ** dipiv_array,
magma_int_t * info_array,
magma_int_t batchCount,
magma_queue_t queue )

magma_cgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

This routine is used for batch LU panel factorization, and has specific assumption about the value of N

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is a right-looking unblocked version of the algorithm. The routine is a batched version that factors batchCount M-by-N matrices in parallel.

This version load an entire matrix (m*n) into registers and factorize it with pivoting and copy back to GPU device memory.

Parameters
[in]mINTEGER The number of rows of each matrix A. M >= 0.
[in]nINTEGER The number of columns of each matrix A. ib >= 0.
[in,out]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored.
[in]aiINTEGER Row offset for A.
[in]ajINTEGER Column offset for A.
[in]lddaINTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]dipiv_arrayArray of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]info_arrayArray of INTEGERs, dimension (batchCount), for corresponding matrices.
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.
  • > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magma_dgetf2trsm_batched()

void magma_dgetf2trsm_batched ( magma_int_t ib,
magma_int_t n,
double ** dA_array,
magma_int_t step,
magma_int_t ldda,
magma_int_t batchCount,
magma_queue_t queue )

dgetf2trsm solves one of the matrix equations on gpu

B = C^-1 * B

where C, B are part of the matrix A in dA_array,

This version load C, B into shared memory and solve it and copy back to GPU device memory. This is an internal routine that might have many assumption.

Parameters
[in]ibINTEGER The number of rows/columns of each matrix C, and rows of B. ib >= 0.
[in]nINTEGER The number of columns of each matrix B. n >= 0.
[in,out]dA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored.
[in]lddaINTEGER The leading dimension of each array A. LDDA >= max(1,M).
[in]stepINTEGER The starting address of matrix C in A. LDDA >= max(1,M).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magma_dgetf2_fused_batched()

magma_int_t magma_dgetf2_fused_batched ( magma_int_t m,
magma_int_t n,
double ** dA_array,
magma_int_t ai,
magma_int_t aj,
magma_int_t ldda,
magma_int_t ** dipiv_array,
magma_int_t * info_array,
magma_int_t batchCount,
magma_queue_t queue )

magma_dgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

This routine is used for batch LU panel factorization, and has specific assumption about the value of N

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is a right-looking unblocked version of the algorithm. The routine is a batched version that factors batchCount M-by-N matrices in parallel.

This version load an entire matrix (m*n) into registers and factorize it with pivoting and copy back to GPU device memory.

Parameters
[in]mINTEGER The number of rows of each matrix A. M >= 0.
[in]nINTEGER The number of columns of each matrix A. ib >= 0.
[in,out]dA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored.
[in]aiINTEGER Row offset for A.
[in]ajINTEGER Column offset for A.
[in]lddaINTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]dipiv_arrayArray of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]info_arrayArray of INTEGERs, dimension (batchCount), for corresponding matrices.
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.
  • > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magma_sgetf2trsm_batched()

void magma_sgetf2trsm_batched ( magma_int_t ib,
magma_int_t n,
float ** dA_array,
magma_int_t step,
magma_int_t ldda,
magma_int_t batchCount,
magma_queue_t queue )

sgetf2trsm solves one of the matrix equations on gpu

B = C^-1 * B

where C, B are part of the matrix A in dA_array,

This version load C, B into shared memory and solve it and copy back to GPU device memory. This is an internal routine that might have many assumption.

Parameters
[in]ibINTEGER The number of rows/columns of each matrix C, and rows of B. ib >= 0.
[in]nINTEGER The number of columns of each matrix B. n >= 0.
[in,out]dA_arrayArray of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored.
[in]lddaINTEGER The leading dimension of each array A. LDDA >= max(1,M).
[in]stepINTEGER The starting address of matrix C in A. LDDA >= max(1,M).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magma_sgetf2_fused_batched()

magma_int_t magma_sgetf2_fused_batched ( magma_int_t m,
magma_int_t n,
float ** dA_array,
magma_int_t ai,
magma_int_t aj,
magma_int_t ldda,
magma_int_t ** dipiv_array,
magma_int_t * info_array,
magma_int_t batchCount,
magma_queue_t queue )

magma_sgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

This routine is used for batch LU panel factorization, and has specific assumption about the value of N

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is a right-looking unblocked version of the algorithm. The routine is a batched version that factors batchCount M-by-N matrices in parallel.

This version load an entire matrix (m*n) into registers and factorize it with pivoting and copy back to GPU device memory.

Parameters
[in]mINTEGER The number of rows of each matrix A. M >= 0.
[in]nINTEGER The number of columns of each matrix A. ib >= 0.
[in,out]dA_arrayArray of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored.
[in]aiINTEGER Row offset for A.
[in]ajINTEGER Column offset for A.
[in]lddaINTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]dipiv_arrayArray of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]info_arrayArray of INTEGERs, dimension (batchCount), for corresponding matrices.
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.
  • > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magma_zgetf2trsm_batched()

void magma_zgetf2trsm_batched ( magma_int_t ib,
magma_int_t n,
magmaDoubleComplex ** dA_array,
magma_int_t step,
magma_int_t ldda,
magma_int_t batchCount,
magma_queue_t queue )

zgetf2trsm solves one of the matrix equations on gpu

B = C^-1 * B

where C, B are part of the matrix A in dA_array,

This version load C, B into shared memory and solve it and copy back to GPU device memory. This is an internal routine that might have many assumption.

Parameters
[in]ibINTEGER The number of rows/columns of each matrix C, and rows of B. ib >= 0.
[in]nINTEGER The number of columns of each matrix B. n >= 0.
[in,out]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored.
[in]lddaINTEGER The leading dimension of each array A. LDDA >= max(1,M).
[in]stepINTEGER The starting address of matrix C in A. LDDA >= max(1,M).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magma_zgetf2_fused_batched()

magma_int_t magma_zgetf2_fused_batched ( magma_int_t m,
magma_int_t n,
magmaDoubleComplex ** dA_array,
magma_int_t ai,
magma_int_t aj,
magma_int_t ldda,
magma_int_t ** dipiv_array,
magma_int_t * info_array,
magma_int_t batchCount,
magma_queue_t queue )

magma_zgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

This routine is used for batch LU panel factorization, and has specific assumption about the value of N

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is a right-looking unblocked version of the algorithm. The routine is a batched version that factors batchCount M-by-N matrices in parallel.

This version load an entire matrix (m*n) into registers and factorize it with pivoting and copy back to GPU device memory.

Parameters
[in]mINTEGER The number of rows of each matrix A. M >= 0.
[in]nINTEGER The number of columns of each matrix A. ib >= 0.
[in,out]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored.
[in]aiINTEGER Row offset for A.
[in]ajINTEGER Column offset for A.
[in]lddaINTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]dipiv_arrayArray of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]info_arrayArray of INTEGERs, dimension (batchCount), for corresponding matrices.
  • = 0: successful exit
  • < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed.
  • > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.