Functions
magma_int_t	magma_cgetrf_batched (magma_int_t m, magma_int_t n, magmaFloatComplex dA_array, magma_int_t ldda, magma_int_t ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue)
	CGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. More...

magma_int_t	magma_cgetrf_recpanel_batched (magma_int_t m, magma_int_t n, magma_int_t min_recpnb, magmaFloatComplex dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t dipiv_array, magma_int_t *dpivinfo_array, magma_int_t info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue)
	This is an internal routine that might have many assumption. More...

magma_int_t	magma_cgetrf_recpanel_native (magma_int_t m, magma_int_t n, magmaFloatComplex_ptr dA, magma_int_t ldda, magma_int_t dipiv, magma_int_t dipivinfo, magma_int_t *dinfo, magma_int_t gbstep, magma_queue_t queue, magma_queue_t update_queue)
	This is an internal routine. More...

magma_int_t	magma_cgetrf_vbatched_max_nocheck_work (magma_int_t m, magma_int_t n, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, magmaFloatComplex *dA_array, magma_int_t ldda, magma_int_t *dipiv_array, magma_int_t info_array, void work, magma_int_t lwork, magma_int_t batchCount, magma_queue_t queue)
	CGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. More...

magma_int_t	magma_cgetrf_vbatched (magma_int_t m, magma_int_t n, magmaFloatComplex *dA_array, magma_int_t ldda, magma_int_t *dipiv_array, magma_int_t info_array, magma_int_t batchCount, magma_queue_t queue)
	CGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. More...

magma_int_t	magma_dgetrf_batched (magma_int_t m, magma_int_t n, double dA_array, magma_int_t ldda, magma_int_t ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue)
	DGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. More...

magma_int_t	magma_dgetrf_recpanel_batched (magma_int_t m, magma_int_t n, magma_int_t min_recpnb, double dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t dipiv_array, magma_int_t *dpivinfo_array, magma_int_t info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue)
	This is an internal routine that might have many assumption. More...

magma_int_t	magma_dgetrf_recpanel_native (magma_int_t m, magma_int_t n, magmaDouble_ptr dA, magma_int_t ldda, magma_int_t dipiv, magma_int_t dipivinfo, magma_int_t *dinfo, magma_int_t gbstep, magma_queue_t queue, magma_queue_t update_queue)
	This is an internal routine. More...

magma_int_t	magma_dgetrf_vbatched_max_nocheck_work (magma_int_t m, magma_int_t n, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, double *dA_array, magma_int_t ldda, magma_int_t *dipiv_array, magma_int_t info_array, void work, magma_int_t lwork, magma_int_t batchCount, magma_queue_t queue)
	DGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. More...

magma_int_t	magma_dgetrf_vbatched (magma_int_t m, magma_int_t n, double *dA_array, magma_int_t ldda, magma_int_t *dipiv_array, magma_int_t info_array, magma_int_t batchCount, magma_queue_t queue)
	DGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. More...

magma_int_t	magma_sgetrf_batched (magma_int_t m, magma_int_t n, float dA_array, magma_int_t ldda, magma_int_t ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue)
	SGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. More...

magma_int_t	magma_sgetrf_recpanel_batched (magma_int_t m, magma_int_t n, magma_int_t min_recpnb, float dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t dipiv_array, magma_int_t *dpivinfo_array, magma_int_t info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue)
	This is an internal routine that might have many assumption. More...

magma_int_t	magma_sgetrf_recpanel_native (magma_int_t m, magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda, magma_int_t dipiv, magma_int_t dipivinfo, magma_int_t *dinfo, magma_int_t gbstep, magma_queue_t queue, magma_queue_t update_queue)
	This is an internal routine. More...

magma_int_t	magma_sgetrf_vbatched_max_nocheck_work (magma_int_t m, magma_int_t n, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, float *dA_array, magma_int_t ldda, magma_int_t *dipiv_array, magma_int_t info_array, void work, magma_int_t lwork, magma_int_t batchCount, magma_queue_t queue)
	SGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. More...

magma_int_t	magma_sgetrf_vbatched (magma_int_t m, magma_int_t n, float *dA_array, magma_int_t ldda, magma_int_t *dipiv_array, magma_int_t info_array, magma_int_t batchCount, magma_queue_t queue)
	SGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. More...

magma_int_t	magma_zgetrf_batched (magma_int_t m, magma_int_t n, magmaDoubleComplex dA_array, magma_int_t ldda, magma_int_t ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue)
	ZGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. More...

magma_int_t	magma_zgetrf_recpanel_batched (magma_int_t m, magma_int_t n, magma_int_t min_recpnb, magmaDoubleComplex dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t dipiv_array, magma_int_t *dpivinfo_array, magma_int_t info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue)
	This is an internal routine that might have many assumption. More...

magma_int_t	magma_zgetrf_recpanel_native (magma_int_t m, magma_int_t n, magmaDoubleComplex_ptr dA, magma_int_t ldda, magma_int_t dipiv, magma_int_t dipivinfo, magma_int_t *dinfo, magma_int_t gbstep, magma_queue_t queue, magma_queue_t update_queue)
	This is an internal routine. More...

magma_int_t	magma_zgetrf_vbatched_max_nocheck_work (magma_int_t m, magma_int_t n, magma_int_t max_m, magma_int_t max_n, magma_int_t max_minmn, magma_int_t max_mxn, magmaDoubleComplex *dA_array, magma_int_t ldda, magma_int_t *dipiv_array, magma_int_t info_array, void work, magma_int_t lwork, magma_int_t batchCount, magma_queue_t queue)
	ZGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. More...

magma_int_t	magma_zgetrf_vbatched (magma_int_t m, magma_int_t n, magmaDoubleComplex *dA_array, magma_int_t ldda, magma_int_t *dipiv_array, magma_int_t info_array, magma_int_t batchCount, magma_queue_t queue)
	ZGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. More...

magma_int_t	magma_cgbtrf_batched (magma_int_t use_pivoting, magma_int_t m, magma_int_t n, magma_int_t kl, magma_int_t ku, magmaFloatComplex dA_array, magma_int_t ldda, magma_int_t ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue)
	cgbtrf_batched computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges. More...

magma_int_t	magma_cgetf2_nopiv_internal_batched (magma_int_t m, magma_int_t n, magmaFloatComplex *dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue)
	cgetf2_nopiv computes the non-pivoting LU factorization of an M-by-N matrix A. More...

magma_int_t	magma_cgetrf_batched_smallsq_noshfl (magma_int_t n, magmaFloatComplex dA_array, magma_int_t ldda, magma_int_t ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue)
	cgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges. More...

magma_int_t	magma_cgetrf_batched_smallsq_shfl (magma_int_t n, magmaFloatComplex dA_array, magma_int_t ldda, magma_int_t ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue)
	cgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges. More...

magma_int_t	magma_dgbtrf_batched (magma_int_t use_pivoting, magma_int_t m, magma_int_t n, magma_int_t kl, magma_int_t ku, double dA_array, magma_int_t ldda, magma_int_t ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue)
	dgbtrf_batched computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges. More...

magma_int_t	magma_dgetf2_nopiv_internal_batched (magma_int_t m, magma_int_t n, double *dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue)
	dgetf2_nopiv computes the non-pivoting LU factorization of an M-by-N matrix A. More...

magma_int_t	magma_dgetrf_batched_smallsq_noshfl (magma_int_t n, double dA_array, magma_int_t ldda, magma_int_t ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue)
	dgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges. More...

magma_int_t	magma_dgetrf_batched_smallsq_shfl (magma_int_t n, double dA_array, magma_int_t ldda, magma_int_t ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue)
	dgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges. More...

magma_int_t	magma_sgbtrf_batched (magma_int_t use_pivoting, magma_int_t m, magma_int_t n, magma_int_t kl, magma_int_t ku, float dA_array, magma_int_t ldda, magma_int_t ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue)
	sgbtrf_batched computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges. More...

magma_int_t	magma_sgetf2_nopiv_internal_batched (magma_int_t m, magma_int_t n, float *dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue)
	sgetf2_nopiv computes the non-pivoting LU factorization of an M-by-N matrix A. More...

magma_int_t	magma_sgetrf_batched_smallsq_noshfl (magma_int_t n, float dA_array, magma_int_t ldda, magma_int_t ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue)
	sgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges. More...

magma_int_t	magma_sgetrf_batched_smallsq_shfl (magma_int_t n, float dA_array, magma_int_t ldda, magma_int_t ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue)
	sgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges. More...

magma_int_t	magma_zgbtrf_batched (magma_int_t use_pivoting, magma_int_t m, magma_int_t n, magma_int_t kl, magma_int_t ku, magmaDoubleComplex dA_array, magma_int_t ldda, magma_int_t ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue)
	zgbtrf_batched computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges. More...

magma_int_t	magma_zgetf2_nopiv_internal_batched (magma_int_t m, magma_int_t n, magmaDoubleComplex *dA_array, magma_int_t ai, magma_int_t aj, magma_int_t ldda, magma_int_t info_array, magma_int_t gbstep, magma_int_t batchCount, magma_queue_t queue)
	zgetf2_nopiv computes the non-pivoting LU factorization of an M-by-N matrix A. More...

magma_int_t	magma_zgetrf_batched_smallsq_noshfl (magma_int_t n, magmaDoubleComplex dA_array, magma_int_t ldda, magma_int_t ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue)
	zgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges. More...

magma_int_t	magma_zgetrf_batched_smallsq_shfl (magma_int_t n, magmaDoubleComplex dA_array, magma_int_t ldda, magma_int_t ipiv_array, magma_int_t *info_array, magma_int_t batchCount, magma_queue_t queue)
	zgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges. More...

Detailed Description

Function Documentation

magma_int_t magma_cgetrf_batched	(	magma_int_t	m,
		magma_int_t	n,
		magmaFloatComplex **	dA_array,
		magma_int_t	ldda,
		magma_int_t **	ipiv_array,
		magma_int_t *	info_array,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

CGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters

[in]	m	INTEGER The number of rows of each matrix A. M >= 0.
[in]	n	INTEGER The number of columns of each matrix A. N >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	ipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_cgetrf_recpanel_batched	(	magma_int_t	m,
		magma_int_t	n,
		magma_int_t	min_recpnb,
		magmaFloatComplex **	dA_array,
		magma_int_t	ai,
		magma_int_t	aj,
		magma_int_t	ldda,
		magma_int_t **	dipiv_array,
		magma_int_t **	dpivinfo_array,
		magma_int_t *	info_array,
		magma_int_t	gbstep,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

This is an internal routine that might have many assumption.

Documentation is not fully completed

CGETRF_PANEL computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters

[in]	m	INTEGER The number of rows of each matrix A. M >= 0.
[in]	n	INTEGER The number of columns of each matrix A. N >= 0.
[in]	min_recpnb	INTEGER. Internal use. The recursive nb
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ai	INTEGER Row offset for A.
[in]	aj	INTEGER Column offset for A.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	dipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	dpivinfo_array	Array of pointers, dimension (batchCount), for internal use.
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	gbstep	INTEGER internal use.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_cgetrf_recpanel_native	(	magma_int_t	m,
		magma_int_t	n,
		magmaFloatComplex_ptr	dA,
		magma_int_t	ldda,
		magma_int_t *	dipiv,
		magma_int_t *	dipivinfo,
		magma_int_t *	dinfo,
		magma_int_t	gbstep,
		magma_queue_t	queue,
		magma_queue_t	update_queue
	)

This is an internal routine.

CGETRF_PANEL computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a GPU-only routine. The host CPU is not used.

Parameters

[in]	m	INTEGER The number of rows the matrix A. M >= 0.
[in]	n	INTEGER The number of columns the matrix A. N >= 0.
[in,out]	dA	A COMPLEX array on the GPU, dimension (LDDA,N). On entry, an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	INTEGER The leading dimension of A. LDDA >= max(1,M).
[out]	dipiv	An INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	dipivinfo	An INTEGER array, for internal use.
[out]	dinfo	INTEGER, stored on the GPU = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	gbstep	INTEGER internal use.
[in]	queue	magma_queue_t Queue to execute in.
[in]	update_queue	magma_queue_t Internal use.

magma_int_t magma_cgetrf_vbatched_max_nocheck_work	(	magma_int_t *	m,
		magma_int_t *	n,
		magma_int_t	max_m,
		magma_int_t	max_n,
		magma_int_t	max_minmn,
		magma_int_t	max_mxn,
		magmaFloatComplex **	dA_array,
		magma_int_t *	ldda,
		magma_int_t **	dipiv_array,
		magma_int_t *	info_array,
		void *	work,
		magma_int_t *	lwork,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

CGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is the variable-size batched version, which factors batchCount matrices of different sizes in parallel. Each matrix is assumed to have its own size and leading dimension.

Parameters

[in]	M	Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of rows of each matrix A. M[i] >= 0.
[in]	N	Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of columns of each matrix A. N[i] >= 0.
[in]	MAX_M	INTEGER The maximum number of rows across the batch
[in]	MAX_N	INTEGER The maximum number of columns across the batch
[in]	MAX_MINMN	INTEGER The maximum value of min(Mi, Ni) for i = 1, 2, ..., batchCount
[in]	MAX_MxN	INTEGER The maximum value of the product (Mi x Ni) for i = 1, 2, ..., batchCount
[in,out]	dA_array	Array of pointers on the GPU, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA[i],N[i]). On entry, each pointer is an M[i]-by-N[i] matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	Array of INTEGERs on the GPU Each is the leading dimension of each array A. LDDA[i] >= max(1,M[i]).
[out]	dipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M[i],N[i])) The pivot indices; for 1 <= p <= min(M[i],N[i]), row p of the matrix was interchanged with row IPIV(p).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	WORK	VOID pointer A workspace of size LWORK[0]
[in,out]	LWORK	INTEGER pointer If lwork[0] < 0, a workspace query is assumed, and lwork[0] is overwritten by the required workspace size in bytes. Otherwise, lwork[0] is the size of work
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_cgetrf_vbatched	(	magma_int_t *	m,
		magma_int_t *	n,
		magmaFloatComplex **	dA_array,
		magma_int_t *	ldda,
		magma_int_t **	dipiv_array,
		magma_int_t *	info_array,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

CGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is the variable-size batched version, which factors batchCount matrices of different sizes in parallel. Each matrix is assumed to have its own size and leading dimension.

Parameters

[in]	M	Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of rows of each matrix A. M[i] >= 0.
[in]	N	Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of columns of each matrix A. N[i] >= 0.
[in,out]	dA_array	Array of pointers on the GPU, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA[i],N[i]). On entry, each pointer is an M[i]-by-N[i] matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	Array of INTEGERs on the GPU Each is the leading dimension of each array A. LDDA[i] >= max(1,M[i]).
[out]	dipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M[i],N[i])) The pivot indices; for 1 <= p <= min(M[i],N[i]), row p of the matrix was interchanged with row IPIV(p).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_dgetrf_batched	(	magma_int_t	m,
		magma_int_t	n,
		double **	dA_array,
		magma_int_t	ldda,
		magma_int_t **	ipiv_array,
		magma_int_t *	info_array,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

DGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters

[in]	m	INTEGER The number of rows of each matrix A. M >= 0.
[in]	n	INTEGER The number of columns of each matrix A. N >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	ipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_dgetrf_recpanel_batched	(	magma_int_t	m,
		magma_int_t	n,
		magma_int_t	min_recpnb,
		double **	dA_array,
		magma_int_t	ai,
		magma_int_t	aj,
		magma_int_t	ldda,
		magma_int_t **	dipiv_array,
		magma_int_t **	dpivinfo_array,
		magma_int_t *	info_array,
		magma_int_t	gbstep,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

This is an internal routine that might have many assumption.

Documentation is not fully completed

DGETRF_PANEL computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters

[in]	m	INTEGER The number of rows of each matrix A. M >= 0.
[in]	n	INTEGER The number of columns of each matrix A. N >= 0.
[in]	min_recpnb	INTEGER. Internal use. The recursive nb
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ai	INTEGER Row offset for A.
[in]	aj	INTEGER Column offset for A.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	dipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	dpivinfo_array	Array of pointers, dimension (batchCount), for internal use.
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	gbstep	INTEGER internal use.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_dgetrf_recpanel_native	(	magma_int_t	m,
		magma_int_t	n,
		magmaDouble_ptr	dA,
		magma_int_t	ldda,
		magma_int_t *	dipiv,
		magma_int_t *	dipivinfo,
		magma_int_t *	dinfo,
		magma_int_t	gbstep,
		magma_queue_t	queue,
		magma_queue_t	update_queue
	)

This is an internal routine.

DGETRF_PANEL computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a GPU-only routine. The host CPU is not used.

Parameters

[in]	m	INTEGER The number of rows the matrix A. M >= 0.
[in]	n	INTEGER The number of columns the matrix A. N >= 0.
[in,out]	dA	A DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	INTEGER The leading dimension of A. LDDA >= max(1,M).
[out]	dipiv	An INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	dipivinfo	An INTEGER array, for internal use.
[out]	dinfo	INTEGER, stored on the GPU = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	gbstep	INTEGER internal use.
[in]	queue	magma_queue_t Queue to execute in.
[in]	update_queue	magma_queue_t Internal use.

magma_int_t magma_dgetrf_vbatched_max_nocheck_work	(	magma_int_t *	m,
		magma_int_t *	n,
		magma_int_t	max_m,
		magma_int_t	max_n,
		magma_int_t	max_minmn,
		magma_int_t	max_mxn,
		double **	dA_array,
		magma_int_t *	ldda,
		magma_int_t **	dipiv_array,
		magma_int_t *	info_array,
		void *	work,
		magma_int_t *	lwork,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

DGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is the variable-size batched version, which factors batchCount matrices of different sizes in parallel. Each matrix is assumed to have its own size and leading dimension.

Parameters

[in]	M	Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of rows of each matrix A. M[i] >= 0.
[in]	N	Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of columns of each matrix A. N[i] >= 0.
[in]	MAX_M	INTEGER The maximum number of rows across the batch
[in]	MAX_N	INTEGER The maximum number of columns across the batch
[in]	MAX_MINMN	INTEGER The maximum value of min(Mi, Ni) for i = 1, 2, ..., batchCount
[in]	MAX_MxN	INTEGER The maximum value of the product (Mi x Ni) for i = 1, 2, ..., batchCount
[in,out]	dA_array	Array of pointers on the GPU, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA[i],N[i]). On entry, each pointer is an M[i]-by-N[i] matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	Array of INTEGERs on the GPU Each is the leading dimension of each array A. LDDA[i] >= max(1,M[i]).
[out]	dipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M[i],N[i])) The pivot indices; for 1 <= p <= min(M[i],N[i]), row p of the matrix was interchanged with row IPIV(p).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	WORK	VOID pointer A workspace of size LWORK[0]
[in,out]	LWORK	INTEGER pointer If lwork[0] < 0, a workspace query is assumed, and lwork[0] is overwritten by the required workspace size in bytes. Otherwise, lwork[0] is the size of work
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_dgetrf_vbatched	(	magma_int_t *	m,
		magma_int_t *	n,
		double **	dA_array,
		magma_int_t *	ldda,
		magma_int_t **	dipiv_array,
		magma_int_t *	info_array,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

DGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is the variable-size batched version, which factors batchCount matrices of different sizes in parallel. Each matrix is assumed to have its own size and leading dimension.

Parameters

[in]	M	Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of rows of each matrix A. M[i] >= 0.
[in]	N	Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of columns of each matrix A. N[i] >= 0.
[in,out]	dA_array	Array of pointers on the GPU, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA[i],N[i]). On entry, each pointer is an M[i]-by-N[i] matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	Array of INTEGERs on the GPU Each is the leading dimension of each array A. LDDA[i] >= max(1,M[i]).
[out]	dipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M[i],N[i])) The pivot indices; for 1 <= p <= min(M[i],N[i]), row p of the matrix was interchanged with row IPIV(p).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_sgetrf_batched	(	magma_int_t	m,
		magma_int_t	n,
		float **	dA_array,
		magma_int_t	ldda,
		magma_int_t **	ipiv_array,
		magma_int_t *	info_array,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

SGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters

[in]	m	INTEGER The number of rows of each matrix A. M >= 0.
[in]	n	INTEGER The number of columns of each matrix A. N >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	ipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_sgetrf_recpanel_batched	(	magma_int_t	m,
		magma_int_t	n,
		magma_int_t	min_recpnb,
		float **	dA_array,
		magma_int_t	ai,
		magma_int_t	aj,
		magma_int_t	ldda,
		magma_int_t **	dipiv_array,
		magma_int_t **	dpivinfo_array,
		magma_int_t *	info_array,
		magma_int_t	gbstep,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

This is an internal routine that might have many assumption.

Documentation is not fully completed

SGETRF_PANEL computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters

[in]	m	INTEGER The number of rows of each matrix A. M >= 0.
[in]	n	INTEGER The number of columns of each matrix A. N >= 0.
[in]	min_recpnb	INTEGER. Internal use. The recursive nb
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ai	INTEGER Row offset for A.
[in]	aj	INTEGER Column offset for A.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	dipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	dpivinfo_array	Array of pointers, dimension (batchCount), for internal use.
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	gbstep	INTEGER internal use.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_sgetrf_recpanel_native	(	magma_int_t	m,
		magma_int_t	n,
		magmaFloat_ptr	dA,
		magma_int_t	ldda,
		magma_int_t *	dipiv,
		magma_int_t *	dipivinfo,
		magma_int_t *	dinfo,
		magma_int_t	gbstep,
		magma_queue_t	queue,
		magma_queue_t	update_queue
	)

This is an internal routine.

SGETRF_PANEL computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a GPU-only routine. The host CPU is not used.

Parameters

[in]	m	INTEGER The number of rows the matrix A. M >= 0.
[in]	n	INTEGER The number of columns the matrix A. N >= 0.
[in,out]	dA	A REAL array on the GPU, dimension (LDDA,N). On entry, an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	INTEGER The leading dimension of A. LDDA >= max(1,M).
[out]	dipiv	An INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	dipivinfo	An INTEGER array, for internal use.
[out]	dinfo	INTEGER, stored on the GPU = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	gbstep	INTEGER internal use.
[in]	queue	magma_queue_t Queue to execute in.
[in]	update_queue	magma_queue_t Internal use.

magma_int_t magma_sgetrf_vbatched_max_nocheck_work	(	magma_int_t *	m,
		magma_int_t *	n,
		magma_int_t	max_m,
		magma_int_t	max_n,
		magma_int_t	max_minmn,
		magma_int_t	max_mxn,
		float **	dA_array,
		magma_int_t *	ldda,
		magma_int_t **	dipiv_array,
		magma_int_t *	info_array,
		void *	work,
		magma_int_t *	lwork,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

SGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is the variable-size batched version, which factors batchCount matrices of different sizes in parallel. Each matrix is assumed to have its own size and leading dimension.

Parameters

[in]	M	Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of rows of each matrix A. M[i] >= 0.
[in]	N	Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of columns of each matrix A. N[i] >= 0.
[in]	MAX_M	INTEGER The maximum number of rows across the batch
[in]	MAX_N	INTEGER The maximum number of columns across the batch
[in]	MAX_MINMN	INTEGER The maximum value of min(Mi, Ni) for i = 1, 2, ..., batchCount
[in]	MAX_MxN	INTEGER The maximum value of the product (Mi x Ni) for i = 1, 2, ..., batchCount
[in,out]	dA_array	Array of pointers on the GPU, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA[i],N[i]). On entry, each pointer is an M[i]-by-N[i] matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	Array of INTEGERs on the GPU Each is the leading dimension of each array A. LDDA[i] >= max(1,M[i]).
[out]	dipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M[i],N[i])) The pivot indices; for 1 <= p <= min(M[i],N[i]), row p of the matrix was interchanged with row IPIV(p).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	WORK	VOID pointer A workspace of size LWORK[0]
[in,out]	LWORK	INTEGER pointer If lwork[0] < 0, a workspace query is assumed, and lwork[0] is overwritten by the required workspace size in bytes. Otherwise, lwork[0] is the size of work
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_sgetrf_vbatched	(	magma_int_t *	m,
		magma_int_t *	n,
		float **	dA_array,
		magma_int_t *	ldda,
		magma_int_t **	dipiv_array,
		magma_int_t *	info_array,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

SGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is the variable-size batched version, which factors batchCount matrices of different sizes in parallel. Each matrix is assumed to have its own size and leading dimension.

Parameters

[in]	M	Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of rows of each matrix A. M[i] >= 0.
[in]	N	Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of columns of each matrix A. N[i] >= 0.
[in,out]	dA_array	Array of pointers on the GPU, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA[i],N[i]). On entry, each pointer is an M[i]-by-N[i] matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	Array of INTEGERs on the GPU Each is the leading dimension of each array A. LDDA[i] >= max(1,M[i]).
[out]	dipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M[i],N[i])) The pivot indices; for 1 <= p <= min(M[i],N[i]), row p of the matrix was interchanged with row IPIV(p).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_zgetrf_batched	(	magma_int_t	m,
		magma_int_t	n,
		magmaDoubleComplex **	dA_array,
		magma_int_t	ldda,
		magma_int_t **	ipiv_array,
		magma_int_t *	info_array,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

ZGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters

[in]	m	INTEGER The number of rows of each matrix A. M >= 0.
[in]	n	INTEGER The number of columns of each matrix A. N >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	ipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_zgetrf_recpanel_batched	(	magma_int_t	m,
		magma_int_t	n,
		magma_int_t	min_recpnb,
		magmaDoubleComplex **	dA_array,
		magma_int_t	ai,
		magma_int_t	aj,
		magma_int_t	ldda,
		magma_int_t **	dipiv_array,
		magma_int_t **	dpivinfo_array,
		magma_int_t *	info_array,
		magma_int_t	gbstep,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

This is an internal routine that might have many assumption.

Documentation is not fully completed

ZGETRF_PANEL computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters

[in]	m	INTEGER The number of rows of each matrix A. M >= 0.
[in]	n	INTEGER The number of columns of each matrix A. N >= 0.
[in]	min_recpnb	INTEGER. Internal use. The recursive nb
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ai	INTEGER Row offset for A.
[in]	aj	INTEGER Column offset for A.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	dipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	dpivinfo_array	Array of pointers, dimension (batchCount), for internal use.
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	gbstep	INTEGER internal use.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_zgetrf_recpanel_native	(	magma_int_t	m,
		magma_int_t	n,
		magmaDoubleComplex_ptr	dA,
		magma_int_t	ldda,
		magma_int_t *	dipiv,
		magma_int_t *	dipivinfo,
		magma_int_t *	dinfo,
		magma_int_t	gbstep,
		magma_queue_t	queue,
		magma_queue_t	update_queue
	)

This is an internal routine.

ZGETRF_PANEL computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a GPU-only routine. The host CPU is not used.

Parameters

[in]	m	INTEGER The number of rows the matrix A. M >= 0.
[in]	n	INTEGER The number of columns the matrix A. N >= 0.
[in,out]	dA	A COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	INTEGER The leading dimension of A. LDDA >= max(1,M).
[out]	dipiv	An INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	dipivinfo	An INTEGER array, for internal use.
[out]	dinfo	INTEGER, stored on the GPU = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	gbstep	INTEGER internal use.
[in]	queue	magma_queue_t Queue to execute in.
[in]	update_queue	magma_queue_t Internal use.

magma_int_t magma_zgetrf_vbatched_max_nocheck_work	(	magma_int_t *	m,
		magma_int_t *	n,
		magma_int_t	max_m,
		magma_int_t	max_n,
		magma_int_t	max_minmn,
		magma_int_t	max_mxn,
		magmaDoubleComplex **	dA_array,
		magma_int_t *	ldda,
		magma_int_t **	dipiv_array,
		magma_int_t *	info_array,
		void *	work,
		magma_int_t *	lwork,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

ZGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is the variable-size batched version, which factors batchCount matrices of different sizes in parallel. Each matrix is assumed to have its own size and leading dimension.

Parameters

[in]	M	Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of rows of each matrix A. M[i] >= 0.
[in]	N	Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of columns of each matrix A. N[i] >= 0.
[in]	MAX_M	INTEGER The maximum number of rows across the batch
[in]	MAX_N	INTEGER The maximum number of columns across the batch
[in]	MAX_MINMN	INTEGER The maximum value of min(Mi, Ni) for i = 1, 2, ..., batchCount
[in]	MAX_MxN	INTEGER The maximum value of the product (Mi x Ni) for i = 1, 2, ..., batchCount
[in,out]	dA_array	Array of pointers on the GPU, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA[i],N[i]). On entry, each pointer is an M[i]-by-N[i] matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	Array of INTEGERs on the GPU Each is the leading dimension of each array A. LDDA[i] >= max(1,M[i]).
[out]	dipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M[i],N[i])) The pivot indices; for 1 <= p <= min(M[i],N[i]), row p of the matrix was interchanged with row IPIV(p).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	WORK	VOID pointer A workspace of size LWORK[0]
[in,out]	LWORK	INTEGER pointer If lwork[0] < 0, a workspace query is assumed, and lwork[0] is overwritten by the required workspace size in bytes. Otherwise, lwork[0] is the size of work
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_zgetrf_vbatched	(	magma_int_t *	m,
		magma_int_t *	n,
		magmaDoubleComplex **	dA_array,
		magma_int_t *	ldda,
		magma_int_t **	dipiv_array,
		magma_int_t *	info_array,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

ZGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is the variable-size batched version, which factors batchCount matrices of different sizes in parallel. Each matrix is assumed to have its own size and leading dimension.

Parameters

[in]	M	Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of rows of each matrix A. M[i] >= 0.
[in]	N	Array of INTEGERs on the GPU, dimension (batchCount) Each is the number of columns of each matrix A. N[i] >= 0.
[in,out]	dA_array	Array of pointers on the GPU, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA[i],N[i]). On entry, each pointer is an M[i]-by-N[i] matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	Array of INTEGERs on the GPU Each is the leading dimension of each array A. LDDA[i] >= max(1,M[i]).
[out]	dipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M[i],N[i])) The pivot indices; for 1 <= p <= min(M[i],N[i]), row p of the matrix was interchanged with row IPIV(p).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_cgbtrf_batched	(	magma_int_t	use_pivoting,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	kl,
		magma_int_t	ku,
		magmaFloatComplex **	dA_array,
		magma_int_t	ldda,
		magma_int_t **	ipiv_array,
		magma_int_t *	info_array,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

cgbtrf_batched computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges.

This routine can deal only with square matrices of size up to 32

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters

[in]	n	INTEGER The size of each matrix A. N >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	ipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_cgetf2_nopiv_internal_batched	(	magma_int_t	m,
		magma_int_t	n,
		magmaFloatComplex **	dA_array,
		magma_int_t	ai,
		magma_int_t	aj,
		magma_int_t	ldda,
		magma_int_t *	info_array,
		magma_int_t	gbstep,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

cgetf2_nopiv computes the non-pivoting LU factorization of an M-by-N matrix A.

This routine can deal with matrices of limited widths, so it is for internal use.

The factorization has the form A = L * U where L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is a batched version that factors batchCount M-by-N matrices in parallel.

Parameters

[in]	m	INTEGER The number of rows the matrix A. N >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = L*U; the unit diagonal elements of L are not stored.
[in]	ai	INTEGER Row offset for dA_array.
[in]	aj	INTEGER Column offset for dA_array.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	gbstep	INTEGER Internal use.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_cgetrf_batched_smallsq_noshfl	(	magma_int_t	n,
		magmaFloatComplex **	dA_array,
		magma_int_t	ldda,
		magma_int_t **	ipiv_array,
		magma_int_t *	info_array,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

cgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges.

This routine can deal only with square matrices of size up to 32

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters

[in]	n	INTEGER The size of each matrix A. N >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	ipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_cgetrf_batched_smallsq_shfl	(	magma_int_t	n,
		magmaFloatComplex **	dA_array,
		magma_int_t	ldda,
		magma_int_t **	ipiv_array,
		magma_int_t *	info_array,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

cgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges.

This routine can deal only with square matrices of size up to 32

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters

[in]	n	INTEGER The size of each matrix A. N >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	ipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_dgbtrf_batched	(	magma_int_t	use_pivoting,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	kl,
		magma_int_t	ku,
		double **	dA_array,
		magma_int_t	ldda,
		magma_int_t **	ipiv_array,
		magma_int_t *	info_array,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

dgbtrf_batched computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges.

This routine can deal only with square matrices of size up to 32

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters

[in]	n	INTEGER The size of each matrix A. N >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	ipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_dgetf2_nopiv_internal_batched	(	magma_int_t	m,
		magma_int_t	n,
		double **	dA_array,
		magma_int_t	ai,
		magma_int_t	aj,
		magma_int_t	ldda,
		magma_int_t *	info_array,
		magma_int_t	gbstep,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

dgetf2_nopiv computes the non-pivoting LU factorization of an M-by-N matrix A.

This routine can deal with matrices of limited widths, so it is for internal use.

The factorization has the form A = L * U where L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is a batched version that factors batchCount M-by-N matrices in parallel.

Parameters

[in]	m	INTEGER The number of rows the matrix A. N >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = L*U; the unit diagonal elements of L are not stored.
[in]	ai	INTEGER Row offset for dA_array.
[in]	aj	INTEGER Column offset for dA_array.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	gbstep	INTEGER Internal use.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_dgetrf_batched_smallsq_noshfl	(	magma_int_t	n,
		double **	dA_array,
		magma_int_t	ldda,
		magma_int_t **	ipiv_array,
		magma_int_t *	info_array,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

dgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges.

This routine can deal only with square matrices of size up to 32

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters

[in]	n	INTEGER The size of each matrix A. N >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	ipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_dgetrf_batched_smallsq_shfl	(	magma_int_t	n,
		double **	dA_array,
		magma_int_t	ldda,
		magma_int_t **	ipiv_array,
		magma_int_t *	info_array,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

dgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges.

This routine can deal only with square matrices of size up to 32

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters

[in]	n	INTEGER The size of each matrix A. N >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	ipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_sgbtrf_batched	(	magma_int_t	use_pivoting,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	kl,
		magma_int_t	ku,
		float **	dA_array,
		magma_int_t	ldda,
		magma_int_t **	ipiv_array,
		magma_int_t *	info_array,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

sgbtrf_batched computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges.

This routine can deal only with square matrices of size up to 32

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters

[in]	n	INTEGER The size of each matrix A. N >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	ipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_sgetf2_nopiv_internal_batched	(	magma_int_t	m,
		magma_int_t	n,
		float **	dA_array,
		magma_int_t	ai,
		magma_int_t	aj,
		magma_int_t	ldda,
		magma_int_t *	info_array,
		magma_int_t	gbstep,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

sgetf2_nopiv computes the non-pivoting LU factorization of an M-by-N matrix A.

This routine can deal with matrices of limited widths, so it is for internal use.

The factorization has the form A = L * U where L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is a batched version that factors batchCount M-by-N matrices in parallel.

Parameters

[in]	m	INTEGER The number of rows the matrix A. N >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = L*U; the unit diagonal elements of L are not stored.
[in]	ai	INTEGER Row offset for dA_array.
[in]	aj	INTEGER Column offset for dA_array.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	gbstep	INTEGER Internal use.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_sgetrf_batched_smallsq_noshfl	(	magma_int_t	n,
		float **	dA_array,
		magma_int_t	ldda,
		magma_int_t **	ipiv_array,
		magma_int_t *	info_array,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

sgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges.

This routine can deal only with square matrices of size up to 32

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters

[in]	n	INTEGER The size of each matrix A. N >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	ipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_sgetrf_batched_smallsq_shfl	(	magma_int_t	n,
		float **	dA_array,
		magma_int_t	ldda,
		magma_int_t **	ipiv_array,
		magma_int_t *	info_array,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

sgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges.

This routine can deal only with square matrices of size up to 32

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters

[in]	n	INTEGER The size of each matrix A. N >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a REAL array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	ipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_zgbtrf_batched	(	magma_int_t	use_pivoting,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	kl,
		magma_int_t	ku,
		magmaDoubleComplex **	dA_array,
		magma_int_t	ldda,
		magma_int_t **	ipiv_array,
		magma_int_t *	info_array,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

zgbtrf_batched computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges.

This routine can deal only with square matrices of size up to 32

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters

[in]	n	INTEGER The size of each matrix A. N >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	ipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_zgetf2_nopiv_internal_batched	(	magma_int_t	m,
		magma_int_t	n,
		magmaDoubleComplex **	dA_array,
		magma_int_t	ai,
		magma_int_t	aj,
		magma_int_t	ldda,
		magma_int_t *	info_array,
		magma_int_t	gbstep,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

zgetf2_nopiv computes the non-pivoting LU factorization of an M-by-N matrix A.

This routine can deal with matrices of limited widths, so it is for internal use.

The factorization has the form A = L * U where L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is a batched version that factors batchCount M-by-N matrices in parallel.

Parameters

[in]	m	INTEGER The number of rows the matrix A. N >= 0.
[in]	n	INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = L*U; the unit diagonal elements of L are not stored.
[in]	ai	INTEGER Row offset for dA_array.
[in]	aj	INTEGER Column offset for dA_array.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	gbstep	INTEGER Internal use.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_zgetrf_batched_smallsq_noshfl	(	magma_int_t	n,
		magmaDoubleComplex **	dA_array,
		magma_int_t	ldda,
		magma_int_t **	ipiv_array,
		magma_int_t *	info_array,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

zgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges.

This routine can deal only with square matrices of size up to 32

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters

[in]	n	INTEGER The size of each matrix A. N >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	ipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

magma_int_t magma_zgetrf_batched_smallsq_shfl	(	magma_int_t	n,
		magmaDoubleComplex **	dA_array,
		magma_int_t	ldda,
		magma_int_t **	ipiv_array,
		magma_int_t *	info_array,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

zgetrf_batched_smallsq_noshfl computes the LU factorization of a square N-by-N matrix A using partial pivoting with row interchanges.

This routine can deal only with square matrices of size up to 32

The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 3 BLAS version of the algorithm.

This is a batched version that factors batchCount M-by-N matrices in parallel. dA, ipiv, and info become arrays with one entry per matrix.

Parameters

[in]	n	INTEGER The size of each matrix A. N >= 0.
[in,out]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, each pointer is an M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	ldda	INTEGER The leading dimension of each array A. LDDA >= max(1,M).
[out]	ipiv_array	Array of pointers, dimension (batchCount), for corresponding matrices. Each is an INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	info_array	Array of INTEGERs, dimension (batchCount), for corresponding matrices. = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

Functions

Detailed Description

Function Documentation