Functions
static magma_int_t	magma_cgetf2_batched_v1 (magma_int_t m, magma_int_t n, magmaFloatComplex dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t ipiv_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue)
	CGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

magma_int_t	magma_cgetf2_native (magma_int_t m, magma_int_t n, magmaFloatComplex_ptr dA, magma_int_t ldda, magma_int_t dipiv, magma_int_t dipivinfo, magma_int_t *dinfo, magma_int_t gbstep, magma_event_t events[2], magma_queue_t queue, magma_queue_t update_queue)
	CGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

magma_int_t	magma_cgetf2_vbatched (magma_int_t m, magma_int_t n, magma_int_t minmn, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, magmaFloatComplex dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda, magma_int_t *ipiv_array, magma_int_t info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue)
	CGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

static magma_int_t	magma_dgetf2_batched_v1 (magma_int_t m, magma_int_t n, double dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t ipiv_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue)
	DGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

magma_int_t	magma_dgetf2_native (magma_int_t m, magma_int_t n, magmaDouble_ptr dA, magma_int_t ldda, magma_int_t dipiv, magma_int_t dipivinfo, magma_int_t *dinfo, magma_int_t gbstep, magma_event_t events[2], magma_queue_t queue, magma_queue_t update_queue)
	DGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

magma_int_t	magma_dgetf2_vbatched (magma_int_t m, magma_int_t n, magma_int_t minmn, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, double dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda, magma_int_t *ipiv_array, magma_int_t info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue)
	DGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

static magma_int_t	magma_sgetf2_batched_v1 (magma_int_t m, magma_int_t n, float dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t ipiv_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue)
	SGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

magma_int_t	magma_sgetf2_native (magma_int_t m, magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda, magma_int_t dipiv, magma_int_t dipivinfo, magma_int_t *dinfo, magma_int_t gbstep, magma_event_t events[2], magma_queue_t queue, magma_queue_t update_queue)
	SGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

magma_int_t	magma_sgetf2_vbatched (magma_int_t m, magma_int_t n, magma_int_t minmn, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, float dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda, magma_int_t *ipiv_array, magma_int_t info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue)
	SGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

static magma_int_t	magma_zgetf2_batched_v1 (magma_int_t m, magma_int_t n, magmaDoubleComplex dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t ipiv_array, magma_int_t *info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue)
	ZGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

magma_int_t	magma_zgetf2_native (magma_int_t m, magma_int_t n, magmaDoubleComplex_ptr dA, magma_int_t ldda, magma_int_t dipiv, magma_int_t dipivinfo, magma_int_t *dinfo, magma_int_t gbstep, magma_event_t events[2], magma_queue_t queue, magma_queue_t update_queue)
	ZGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

magma_int_t	magma_zgetf2_vbatched (magma_int_t m, magma_int_t n, magma_int_t minmn, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, magmaDoubleComplex dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda, magma_int_t *ipiv_array, magma_int_t info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue)
	ZGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

void	magma_cgetf2trsm_batched (magma_int_t ib, magma_int_t n, magmaFloatComplex **dA_array, magma_int_t step, magma_int_t ldda, magma_int_t batchCount, magma_queue_t queue)
	cgetf2trsm solves one of the matrix equations on gpu

magma_int_t	magma_cgetf2_fused_batched (magma_int_t m, magma_int_t n, magmaFloatComplex dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t dipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue)
	magma_cgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

void	magma_dgetf2trsm_batched (magma_int_t ib, magma_int_t n, double **dA_array, magma_int_t step, magma_int_t ldda, magma_int_t batchCount, magma_queue_t queue)
	dgetf2trsm solves one of the matrix equations on gpu

magma_int_t	magma_dgetf2_fused_batched (magma_int_t m, magma_int_t n, double dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t dipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue)
	magma_dgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

void	magma_sgetf2trsm_batched (magma_int_t ib, magma_int_t n, float **dA_array, magma_int_t step, magma_int_t ldda, magma_int_t batchCount, magma_queue_t queue)
	sgetf2trsm solves one of the matrix equations on gpu

magma_int_t	magma_sgetf2_fused_batched (magma_int_t m, magma_int_t n, float dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t dipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue)
	magma_sgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

void	magma_zgetf2trsm_batched (magma_int_t ib, magma_int_t n, magmaDoubleComplex **dA_array, magma_int_t step, magma_int_t ldda, magma_int_t batchCount, magma_queue_t queue)
	zgetf2trsm solves one of the matrix equations on gpu

magma_int_t	magma_zgetf2_fused_batched (magma_int_t m, magma_int_t n, magmaDoubleComplex dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t dipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue)
	magma_zgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

Detailed Description

Function Documentation

◆ magma_cgetf2_batched_v1()

static magma_int_t magma_cgetf2_batched_v1	(	magma_int_t	m,
		magma_int_t	n,
		magmaFloatComplex **	dA_array,
		magma_int_t	ai,
		magma_int_t	aj,
		magma_int_t	ldda,
		magma_int_t **	ipiv_array,
		magma_int_t *	info_array,
		magma_int_t	gbstep,
		magma_int_t	batchCount,
		magma_queue_t	queue )

static

CGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters

[in]	m	INTEGER The number of rows of each matrix A. M >= 0.
[in]	n	INTEGER The number of columns of each matrix A. N >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ai	INTEGER Row offset for A.
[in]	aj	INTEGER Column offset for A.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	ipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	gbstep	INTEGER internal use.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

this is an internal routine that might have many assumption.

◆ magma_cgetf2_native()

magma_int_t magma_cgetf2_native	(	magma_int_t	m,
		magma_int_t	n,
		magmaFloatComplex_ptr	dA,
		magma_int_t	ldda,
		magma_int_t *	dipiv,
		magma_int_t *	dipivinfo,
		magma_int_t *	dinfo,
		magma_int_t	gbstep,
		magma_event_t	events[2],
		magma_queue_t	queue,
		magma_queue_t	update_queue )

CGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a GPU-only routine. The host CPU is not used.

Parameters

[in]	m	INTEGER The number of rows of each matrix A. M >= 0.
[in]	n	INTEGER The number of columns of each matrix A. N >= 0.
[in,out]	dA	A COMPLEX array on the GPU, dimension (LDDA,N). On entry, an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	INTEGER The leading dimension of A. LDDA >= max(1,M).
[out]	dipiv	An INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	dipivinfo	An INTEGER array, for internal use.
[out]	dinfo	INTEGER, stored on the GPU = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	gbstep	INTEGER internal use.
[in]	queues	magma_queue_t array of size 2. Queues to execute in.
[in]	events	magma_event_t array of size 2 Internal use.

This is an internal routine.

◆ magma_cgetf2_vbatched()

magma_int_t magma_cgetf2_vbatched	(	magma_int_t *	m,
		magma_int_t *	n,
		magma_int_t *	minmn,
		magma_int_t	max_m,
		magma_int_t	max_n,
		magma_int_t	max_minmn,
		magma_int_t	max_mxn,
		magmaFloatComplex **	dA_array,
		magma_int_t	Ai,
		magma_int_t	Aj,
		magma_int_t *	ldda,
		magma_int_t **	ipiv_array,
		magma_int_t *	info_array,
		magma_int_t	gbstep,
		magma_int_t	batchCount,
		magma_queue_t	queue )

CGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters

[in]	m	INTEGER The number of rows of each matrix A. M >= 0.
[in]	n	INTEGER The number of columns of each matrix A. N >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ai	INTEGER Row offset for A.
[in]	aj	INTEGER Column offset for A.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	ipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	gbstep	INTEGER internal use.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

this is an internal routine that might have many assumption.

◆ magma_dgetf2_batched_v1()

static magma_int_t magma_dgetf2_batched_v1	(	magma_int_t	m,
		magma_int_t	n,
		double **	dA_array,
		magma_int_t	ai,
		magma_int_t	aj,
		magma_int_t	ldda,
		magma_int_t **	ipiv_array,
		magma_int_t *	info_array,
		magma_int_t	gbstep,
		magma_int_t	batchCount,
		magma_queue_t	queue )

static

DGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters

[in]	m	INTEGER The number of rows of each matrix A. M >= 0.
[in]	n	INTEGER The number of columns of each matrix A. N >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ai	INTEGER Row offset for A.
[in]	aj	INTEGER Column offset for A.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	ipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	gbstep	INTEGER internal use.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

this is an internal routine that might have many assumption.

◆ magma_dgetf2_native()

magma_int_t magma_dgetf2_native	(	magma_int_t	m,
		magma_int_t	n,
		magmaDouble_ptr	dA,
		magma_int_t	ldda,
		magma_int_t *	dipiv,
		magma_int_t *	dipivinfo,
		magma_int_t *	dinfo,
		magma_int_t	gbstep,
		magma_event_t	events[2],
		magma_queue_t	queue,
		magma_queue_t	update_queue )

DGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a GPU-only routine. The host CPU is not used.

Parameters

[in]	m	INTEGER The number of rows of each matrix A. M >= 0.
[in]	n	INTEGER The number of columns of each matrix A. N >= 0.
[in,out]	dA	A DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	INTEGER The leading dimension of A. LDDA >= max(1,M).
[out]	dipiv	An INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	dipivinfo	An INTEGER array, for internal use.
[out]	dinfo	INTEGER, stored on the GPU = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	gbstep	INTEGER internal use.
[in]	queues	magma_queue_t array of size 2. Queues to execute in.
[in]	events	magma_event_t array of size 2 Internal use.

This is an internal routine.

◆ magma_dgetf2_vbatched()

magma_int_t magma_dgetf2_vbatched	(	magma_int_t *	m,
		magma_int_t *	n,
		magma_int_t *	minmn,
		magma_int_t	max_m,
		magma_int_t	max_n,
		magma_int_t	max_minmn,
		magma_int_t	max_mxn,
		double **	dA_array,
		magma_int_t	Ai,
		magma_int_t	Aj,
		magma_int_t *	ldda,
		magma_int_t **	ipiv_array,
		magma_int_t *	info_array,
		magma_int_t	gbstep,
		magma_int_t	batchCount,
		magma_queue_t	queue )

DGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters

[in]	m	INTEGER The number of rows of each matrix A. M >= 0.
[in]	n	INTEGER The number of columns of each matrix A. N >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ai	INTEGER Row offset for A.
[in]	aj	INTEGER Column offset for A.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	ipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	gbstep	INTEGER internal use.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

this is an internal routine that might have many assumption.

◆ magma_sgetf2_batched_v1()

static magma_int_t magma_sgetf2_batched_v1	(	magma_int_t	m,
		magma_int_t	n,
		float **	dA_array,
		magma_int_t	ai,
		magma_int_t	aj,
		magma_int_t	ldda,
		magma_int_t **	ipiv_array,
		magma_int_t *	info_array,
		magma_int_t	gbstep,
		magma_int_t	batchCount,
		magma_queue_t	queue )

static

SGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters

[in]	m	INTEGER The number of rows of each matrix A. M >= 0.
[in]	n	INTEGER The number of columns of each matrix A. N >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ai	INTEGER Row offset for A.
[in]	aj	INTEGER Column offset for A.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	ipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	gbstep	INTEGER internal use.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

this is an internal routine that might have many assumption.

◆ magma_sgetf2_native()

magma_int_t magma_sgetf2_native	(	magma_int_t	m,
		magma_int_t	n,
		magmaFloat_ptr	dA,
		magma_int_t	ldda,
		magma_int_t *	dipiv,
		magma_int_t *	dipivinfo,
		magma_int_t *	dinfo,
		magma_int_t	gbstep,
		magma_event_t	events[2],
		magma_queue_t	queue,
		magma_queue_t	update_queue )

SGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a GPU-only routine. The host CPU is not used.

Parameters

[in]	m	INTEGER The number of rows of each matrix A. M >= 0.
[in]	n	INTEGER The number of columns of each matrix A. N >= 0.
[in,out]	dA	A REAL array on the GPU, dimension (LDDA,N). On entry, an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	INTEGER The leading dimension of A. LDDA >= max(1,M).
[out]	dipiv	An INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	dipivinfo	An INTEGER array, for internal use.
[out]	dinfo	INTEGER, stored on the GPU = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	gbstep	INTEGER internal use.
[in]	queues	magma_queue_t array of size 2. Queues to execute in.
[in]	events	magma_event_t array of size 2 Internal use.

This is an internal routine.

◆ magma_sgetf2_vbatched()

magma_int_t magma_sgetf2_vbatched	(	magma_int_t *	m,
		magma_int_t *	n,
		magma_int_t *	minmn,
		magma_int_t	max_m,
		magma_int_t	max_n,
		magma_int_t	max_minmn,
		magma_int_t	max_mxn,
		float **	dA_array,
		magma_int_t	Ai,
		magma_int_t	Aj,
		magma_int_t *	ldda,
		magma_int_t **	ipiv_array,
		magma_int_t *	info_array,
		magma_int_t	gbstep,
		magma_int_t	batchCount,
		magma_queue_t	queue )

SGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters

[in]	m	INTEGER The number of rows of each matrix A. M >= 0.
[in]	n	INTEGER The number of columns of each matrix A. N >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ai	INTEGER Row offset for A.
[in]	aj	INTEGER Column offset for A.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	ipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	gbstep	INTEGER internal use.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

this is an internal routine that might have many assumption.

◆ magma_zgetf2_batched_v1()

static magma_int_t magma_zgetf2_batched_v1	(	magma_int_t	m,
		magma_int_t	n,
		magmaDoubleComplex **	dA_array,
		magma_int_t	ai,
		magma_int_t	aj,
		magma_int_t	ldda,
		magma_int_t **	ipiv_array,
		magma_int_t *	info_array,
		magma_int_t	gbstep,
		magma_int_t	batchCount,
		magma_queue_t	queue )

static

ZGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters

[in]	m	INTEGER The number of rows of each matrix A. M >= 0.
[in]	n	INTEGER The number of columns of each matrix A. N >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ai	INTEGER Row offset for A.
[in]	aj	INTEGER Column offset for A.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	ipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	gbstep	INTEGER internal use.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

this is an internal routine that might have many assumption.

◆ magma_zgetf2_native()

magma_int_t magma_zgetf2_native	(	magma_int_t	m,
		magma_int_t	n,
		magmaDoubleComplex_ptr	dA,
		magma_int_t	ldda,
		magma_int_t *	dipiv,
		magma_int_t *	dipivinfo,
		magma_int_t *	dinfo,
		magma_int_t	gbstep,
		magma_event_t	events[2],
		magma_queue_t	queue,
		magma_queue_t	update_queue )

ZGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a GPU-only routine. The host CPU is not used.

Parameters

[in]	m	INTEGER The number of rows of each matrix A. M >= 0.
[in]	n	INTEGER The number of columns of each matrix A. N >= 0.
[in,out]	dA	A COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	INTEGER The leading dimension of A. LDDA >= max(1,M).
[out]	dipiv	An INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	dipivinfo	An INTEGER array, for internal use.
[out]	dinfo	INTEGER, stored on the GPU = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	gbstep	INTEGER internal use.
[in]	queues	magma_queue_t array of size 2. Queues to execute in.
[in]	events	magma_event_t array of size 2 Internal use.

This is an internal routine.

◆ magma_zgetf2_vbatched()

magma_int_t magma_zgetf2_vbatched	(	magma_int_t *	m,
		magma_int_t *	n,
		magma_int_t *	minmn,
		magma_int_t	max_m,
		magma_int_t	max_n,
		magma_int_t	max_minmn,
		magma_int_t	max_mxn,
		magmaDoubleComplex **	dA_array,
		magma_int_t	Ai,
		magma_int_t	Aj,
		magma_int_t *	ldda,
		magma_int_t **	ipiv_array,
		magma_int_t *	info_array,
		magma_int_t	gbstep,
		magma_int_t	batchCount,
		magma_queue_t	queue )

ZGETF2 computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters

[in]	m	INTEGER The number of rows of each matrix A. M >= 0.
[in]	n	INTEGER The number of columns of each matrix A. N >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ai	INTEGER Row offset for A.
[in]	aj	INTEGER Column offset for A.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	ipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	gbstep	INTEGER internal use.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

this is an internal routine that might have many assumption.

◆ magma_cgetf2trsm_batched()

void magma_cgetf2trsm_batched	(	magma_int_t	ib,
		magma_int_t	n,
		magmaFloatComplex **	dA_array,
		magma_int_t	step,
		magma_int_t	ldda,
		magma_int_t	batchCount,
		magma_queue_t	queue )

cgetf2trsm solves one of the matrix equations on gpu

B = C^-1 * B

where C, B are part of the matrix A in dA_array,

This version load C, B into shared memory and solve it and copy back to GPU device memory. This is an internal routine that might have many assumption.

Parameters

[in]	ib	INTEGER The number of rows/columns of each matrix C, and rows of B. ib >= 0.
[in]	n	INTEGER The number of columns of each matrix B. n >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[in]	step	INTEGER The starting address of matrix C in A. LDDA >= max(1,M).
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

◆ magma_cgetf2_fused_batched()

magma_int_t magma_cgetf2_fused_batched	(	magma_int_t	m,
		magma_int_t	n,
		magmaFloatComplex **	dA_array,
		magma_int_t	ai,
		magma_int_t	aj,
		magma_int_t	ldda,
		magma_int_t **	dipiv_array,
		magma_int_t *	info_array,
		magma_int_t	batchCount,
		magma_queue_t	queue )

magma_cgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

This routine is used for batch LU panel factorization, and has specific assumption about the value of N

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is a right-looking unblocked version of the algorithm. The routine is a batched version that factors batchCount M-by-N matrices in parallel.

This version load an entire matrix (m*n) into registers and factorize it with pivoting and copy back to GPU device memory.

Parameters

[in]	m	INTEGER The number of rows of each matrix A. M >= 0.
[in]	n	INTEGER The number of columns of each matrix A. ib >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ai	INTEGER Row offset for A.
[in]	aj	INTEGER Column offset for A.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	dipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

◆ magma_dgetf2trsm_batched()

void magma_dgetf2trsm_batched	(	magma_int_t	ib,
		magma_int_t	n,
		double **	dA_array,
		magma_int_t	step,
		magma_int_t	ldda,
		magma_int_t	batchCount,
		magma_queue_t	queue )

dgetf2trsm solves one of the matrix equations on gpu

B = C^-1 * B

where C, B are part of the matrix A in dA_array,

This version load C, B into shared memory and solve it and copy back to GPU device memory. This is an internal routine that might have many assumption.

Parameters

[in]	ib	INTEGER The number of rows/columns of each matrix C, and rows of B. ib >= 0.
[in]	n	INTEGER The number of columns of each matrix B. n >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[in]	step	INTEGER The starting address of matrix C in A. LDDA >= max(1,M).
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

◆ magma_dgetf2_fused_batched()

magma_int_t magma_dgetf2_fused_batched	(	magma_int_t	m,
		magma_int_t	n,
		double **	dA_array,
		magma_int_t	ai,
		magma_int_t	aj,
		magma_int_t	ldda,
		magma_int_t **	dipiv_array,
		magma_int_t *	info_array,
		magma_int_t	batchCount,
		magma_queue_t	queue )

magma_dgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

This routine is used for batch LU panel factorization, and has specific assumption about the value of N

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is a right-looking unblocked version of the algorithm. The routine is a batched version that factors batchCount M-by-N matrices in parallel.

This version load an entire matrix (m*n) into registers and factorize it with pivoting and copy back to GPU device memory.

Parameters

[in]	m	INTEGER The number of rows of each matrix A. M >= 0.
[in]	n	INTEGER The number of columns of each matrix A. ib >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ai	INTEGER Row offset for A.
[in]	aj	INTEGER Column offset for A.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	dipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

◆ magma_sgetf2trsm_batched()

void magma_sgetf2trsm_batched	(	magma_int_t	ib,
		magma_int_t	n,
		float **	dA_array,
		magma_int_t	step,
		magma_int_t	ldda,
		magma_int_t	batchCount,
		magma_queue_t	queue )

sgetf2trsm solves one of the matrix equations on gpu

B = C^-1 * B

where C, B are part of the matrix A in dA_array,

This version load C, B into shared memory and solve it and copy back to GPU device memory. This is an internal routine that might have many assumption.

Parameters

[in]	ib	INTEGER The number of rows/columns of each matrix C, and rows of B. ib >= 0.
[in]	n	INTEGER The number of columns of each matrix B. n >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[in]	step	INTEGER The starting address of matrix C in A. LDDA >= max(1,M).
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

◆ magma_sgetf2_fused_batched()

magma_int_t magma_sgetf2_fused_batched	(	magma_int_t	m,
		magma_int_t	n,
		float **	dA_array,
		magma_int_t	ai,
		magma_int_t	aj,
		magma_int_t	ldda,
		magma_int_t **	dipiv_array,
		magma_int_t *	info_array,
		magma_int_t	batchCount,
		magma_queue_t	queue )

magma_sgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

This routine is used for batch LU panel factorization, and has specific assumption about the value of N

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is a right-looking unblocked version of the algorithm. The routine is a batched version that factors batchCount M-by-N matrices in parallel.

This version load an entire matrix (m*n) into registers and factorize it with pivoting and copy back to GPU device memory.

Parameters

[in]	m	INTEGER The number of rows of each matrix A. M >= 0.
[in]	n	INTEGER The number of columns of each matrix A. ib >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ai	INTEGER Row offset for A.
[in]	aj	INTEGER Column offset for A.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	dipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

◆ magma_zgetf2trsm_batched()

void magma_zgetf2trsm_batched	(	magma_int_t	ib,
		magma_int_t	n,
		magmaDoubleComplex **	dA_array,
		magma_int_t	step,
		magma_int_t	ldda,
		magma_int_t	batchCount,
		magma_queue_t	queue )

zgetf2trsm solves one of the matrix equations on gpu

B = C^-1 * B

where C, B are part of the matrix A in dA_array,

This version load C, B into shared memory and solve it and copy back to GPU device memory. This is an internal routine that might have many assumption.

Parameters

[in]	ib	INTEGER The number of rows/columns of each matrix C, and rows of B. ib >= 0.
[in]	n	INTEGER The number of columns of each matrix B. n >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[in]	step	INTEGER The starting address of matrix C in A. LDDA >= max(1,M).
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

◆ magma_zgetf2_fused_batched()

magma_int_t magma_zgetf2_fused_batched	(	magma_int_t	m,
		magma_int_t	n,
		magmaDoubleComplex **	dA_array,
		magma_int_t	ai,
		magma_int_t	aj,
		magma_int_t	ldda,
		magma_int_t **	dipiv_array,
		magma_int_t *	info_array,
		magma_int_t	batchCount,
		magma_queue_t	queue )

magma_zgetf2_reg_batched computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

This routine is used for batch LU panel factorization, and has specific assumption about the value of N

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is a right-looking unblocked version of the algorithm. The routine is a batched version that factors batchCount M-by-N matrices in parallel.

This version load an entire matrix (m*n) into registers and factorize it with pivoting and copy back to GPU device memory.

Parameters

[in]	m	INTEGER The number of rows of each matrix A. M >= 0.
[in]	n	INTEGER The number of columns of each matrix A. ib >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ai	INTEGER Row offset for A.
[in]	aj	INTEGER Column offset for A.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	dipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

Functions

Detailed Description

Function Documentation

◆ magma_cgetf2_batched_v1()

◆ magma_cgetf2_native()

◆ magma_cgetf2_vbatched()

◆ magma_dgetf2_batched_v1()

◆ magma_dgetf2_native()

◆ magma_dgetf2_vbatched()

◆ magma_sgetf2_batched_v1()

◆ magma_sgetf2_native()

◆ magma_sgetf2_vbatched()

◆ magma_zgetf2_batched_v1()

◆ magma_zgetf2_native()

◆ magma_zgetf2_vbatched()

◆ magma_cgetf2trsm_batched()

◆ magma_cgetf2_fused_batched()

◆ magma_dgetf2trsm_batched()

◆ magma_dgetf2_fused_batched()

◆ magma_sgetf2trsm_batched()

◆ magma_sgetf2_fused_batched()

◆ magma_zgetf2trsm_batched()

◆ magma_zgetf2_fused_batched()