\( C = op(A)^{-1} B \) or \( C = B \;op(A)^{-1} \) where \( A \) is triangular More...

Functions
void	magmablas_ctrsm_inv_outofplace_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex dA_array, magma_int_t ldda, magmaFloatComplex dB_array, magma_int_t lddb, magmaFloatComplex dX_array, magma_int_t lddx, magmaFloatComplex dinvA_array, magma_int_t dinvA_length, magmaFloatComplex dA_displ, magmaFloatComplex dB_displ, magmaFloatComplex dX_displ, magmaFloatComplex dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
	ctrsm_outofplace solves one of the matrix equations on gpu More...

void	magmablas_ctrsm_inv_work_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex dA_array, magma_int_t ldda, magmaFloatComplex dB_array, magma_int_t lddb, magmaFloatComplex dX_array, magma_int_t lddx, magmaFloatComplex dinvA_array, magma_int_t dinvA_length, magmaFloatComplex dA_displ, magmaFloatComplex dB_displ, magmaFloatComplex dX_displ, magmaFloatComplex dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
	ctrsm_work solves one of the matrix equations on gpu More...

void	magmablas_ctrsm_inv_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex dA_array, magma_int_t ldda, magmaFloatComplex dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue)
	ctrsm solves one of the matrix equations on gpu More...

void	magmablas_ctrsm_inv_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex *dA_array, magma_int_t ldda, magmaFloatComplex *dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue)
	ctrsm solves one of the matrix equations on gpu More...

void	magmablas_ctrsm_inv_outofplace_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex *dA_array, magma_int_t ldda, magmaFloatComplex *dB_array, magma_int_t lddb, magmaFloatComplex *dX_array, magma_int_t lddx, magmaFloatComplex *dinvA_array, magma_int_t dinvA_length, magmaFloatComplex dA_displ, magmaFloatComplex dB_displ, magmaFloatComplex dX_displ, magmaFloatComplex dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
	ctrsm_outofplace solves one of the matrix equations on gpu More...

void	magmablas_ctrsm_inv_work_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex *dA_array, magma_int_t ldda, magmaFloatComplex *dB_array, magma_int_t lddb, magmaFloatComplex *dX_array, magma_int_t lddx, magmaFloatComplex *dinvA_array, magma_int_t dinvA_length, magmaFloatComplex dA_displ, magmaFloatComplex dB_displ, magmaFloatComplex dX_displ, magmaFloatComplex dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
	ctrsm_work solves one of the matrix equations on gpu More...

void	magmablas_ctrsm_inv_vbatched_max_nocheck (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex *dA_array, magma_int_t ldda, magmaFloatComplex *dB_array, magma_int_t lddb, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)

void	magmablas_ctrsm_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex *dA_array, magma_int_t ldda, magmaFloatComplex *dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue)
	ctrsm solves one of the matrix equations on gpu More...

void	magmablas_dtrsm_inv_outofplace_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, double alpha, double dA_array, magma_int_t ldda, double dB_array, magma_int_t lddb, double dX_array, magma_int_t lddx, double dinvA_array, magma_int_t dinvA_length, double dA_displ, double dB_displ, double dX_displ, double dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
	dtrsm_outofplace solves one of the matrix equations on gpu More...

void	magmablas_dtrsm_inv_work_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, double alpha, double dA_array, magma_int_t ldda, double dB_array, magma_int_t lddb, double dX_array, magma_int_t lddx, double dinvA_array, magma_int_t dinvA_length, double dA_displ, double dB_displ, double dX_displ, double dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
	dtrsm_work solves one of the matrix equations on gpu More...

void	magmablas_dtrsm_inv_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, double alpha, double dA_array, magma_int_t ldda, double dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue)
	dtrsm solves one of the matrix equations on gpu More...

void	magmablas_dtrsm_inv_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, double alpha, double *dA_array, magma_int_t ldda, double *dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue)
	dtrsm solves one of the matrix equations on gpu More...

void	magmablas_dtrsm_inv_outofplace_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, double alpha, double *dA_array, magma_int_t ldda, double *dB_array, magma_int_t lddb, double *dX_array, magma_int_t lddx, double *dinvA_array, magma_int_t dinvA_length, double dA_displ, double dB_displ, double dX_displ, double dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
	dtrsm_outofplace solves one of the matrix equations on gpu More...

void	magmablas_dtrsm_inv_work_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, double alpha, double *dA_array, magma_int_t ldda, double *dB_array, magma_int_t lddb, double *dX_array, magma_int_t lddx, double *dinvA_array, magma_int_t dinvA_length, double dA_displ, double dB_displ, double dX_displ, double dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
	dtrsm_work solves one of the matrix equations on gpu More...

void	magmablas_dtrsm_inv_vbatched_max_nocheck (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, double alpha, double *dA_array, magma_int_t ldda, double *dB_array, magma_int_t lddb, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)

void	magmablas_dtrsm_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, double alpha, double *dA_array, magma_int_t ldda, double *dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue)
	dtrsm solves one of the matrix equations on gpu More...

void	magmablas_strsm_inv_outofplace_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, float alpha, float dA_array, magma_int_t ldda, float dB_array, magma_int_t lddb, float dX_array, magma_int_t lddx, float dinvA_array, magma_int_t dinvA_length, float dA_displ, float dB_displ, float dX_displ, float dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
	strsm_outofplace solves one of the matrix equations on gpu More...

void	magmablas_strsm_inv_work_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, float alpha, float dA_array, magma_int_t ldda, float dB_array, magma_int_t lddb, float dX_array, magma_int_t lddx, float dinvA_array, magma_int_t dinvA_length, float dA_displ, float dB_displ, float dX_displ, float dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
	strsm_work solves one of the matrix equations on gpu More...

void	magmablas_strsm_inv_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, float alpha, float dA_array, magma_int_t ldda, float dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue)
	strsm solves one of the matrix equations on gpu More...

void	magmablas_strsm_inv_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, float alpha, float *dA_array, magma_int_t ldda, float *dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue)
	strsm solves one of the matrix equations on gpu More...

void	magmablas_strsm_inv_outofplace_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, float alpha, float *dA_array, magma_int_t ldda, float *dB_array, magma_int_t lddb, float *dX_array, magma_int_t lddx, float *dinvA_array, magma_int_t dinvA_length, float dA_displ, float dB_displ, float dX_displ, float dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
	strsm_outofplace solves one of the matrix equations on gpu More...

void	magmablas_strsm_inv_work_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, float alpha, float *dA_array, magma_int_t ldda, float *dB_array, magma_int_t lddb, float *dX_array, magma_int_t lddx, float *dinvA_array, magma_int_t dinvA_length, float dA_displ, float dB_displ, float dX_displ, float dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
	strsm_work solves one of the matrix equations on gpu More...

void	magmablas_strsm_inv_vbatched_max_nocheck (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, float alpha, float *dA_array, magma_int_t ldda, float *dB_array, magma_int_t lddb, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)

void	magmablas_strsm_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, float alpha, float *dA_array, magma_int_t ldda, float *dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue)
	strsm solves one of the matrix equations on gpu More...

void	magmablas_ztrsm_inv_outofplace_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex dA_array, magma_int_t ldda, magmaDoubleComplex dB_array, magma_int_t lddb, magmaDoubleComplex dX_array, magma_int_t lddx, magmaDoubleComplex dinvA_array, magma_int_t dinvA_length, magmaDoubleComplex dA_displ, magmaDoubleComplex dB_displ, magmaDoubleComplex dX_displ, magmaDoubleComplex dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
	ztrsm_outofplace solves one of the matrix equations on gpu More...

void	magmablas_ztrsm_inv_work_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex dA_array, magma_int_t ldda, magmaDoubleComplex dB_array, magma_int_t lddb, magmaDoubleComplex dX_array, magma_int_t lddx, magmaDoubleComplex dinvA_array, magma_int_t dinvA_length, magmaDoubleComplex dA_displ, magmaDoubleComplex dB_displ, magmaDoubleComplex dX_displ, magmaDoubleComplex dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
	ztrsm_work solves one of the matrix equations on gpu More...

void	magmablas_ztrsm_inv_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex dA_array, magma_int_t ldda, magmaDoubleComplex dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue)
	ztrsm solves one of the matrix equations on gpu More...

void	magmablas_ztrsm_inv_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex *dA_array, magma_int_t ldda, magmaDoubleComplex *dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue)
	ztrsm solves one of the matrix equations on gpu More...

void	magmablas_ztrsm_inv_outofplace_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex *dA_array, magma_int_t ldda, magmaDoubleComplex *dB_array, magma_int_t lddb, magmaDoubleComplex *dX_array, magma_int_t lddx, magmaDoubleComplex *dinvA_array, magma_int_t dinvA_length, magmaDoubleComplex dA_displ, magmaDoubleComplex dB_displ, magmaDoubleComplex dX_displ, magmaDoubleComplex dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
	ztrsm_outofplace solves one of the matrix equations on gpu More...

void	magmablas_ztrsm_inv_work_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex *dA_array, magma_int_t ldda, magmaDoubleComplex *dB_array, magma_int_t lddb, magmaDoubleComplex *dX_array, magma_int_t lddx, magmaDoubleComplex *dinvA_array, magma_int_t dinvA_length, magmaDoubleComplex dA_displ, magmaDoubleComplex dB_displ, magmaDoubleComplex dX_displ, magmaDoubleComplex dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
	ztrsm_work solves one of the matrix equations on gpu More...

void	magmablas_ztrsm_inv_vbatched_max_nocheck (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex *dA_array, magma_int_t ldda, magmaDoubleComplex *dB_array, magma_int_t lddb, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)

void	magmablas_ztrsm_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex *dA_array, magma_int_t ldda, magmaDoubleComplex *dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue)
	ztrsm solves one of the matrix equations on gpu More...

Detailed Description

\( C = op(A)^{-1} B \) or \( C = B \;op(A)^{-1} \) where \( A \) is triangular

Function Documentation

void magmablas_ctrsm_inv_outofplace_batched	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t	flag,
		magma_int_t	m,
		magma_int_t	n,
		magmaFloatComplex	alpha,
		magmaFloatComplex **	dA_array,
		magma_int_t	ldda,
		magmaFloatComplex **	dB_array,
		magma_int_t	lddb,
		magmaFloatComplex **	dX_array,
		magma_int_t	lddx,
		magmaFloatComplex **	dinvA_array,
		magma_int_t	dinvA_length,
		magmaFloatComplex **	dA_displ,
		magmaFloatComplex **	dB_displ,
		magmaFloatComplex **	dX_displ,
		magmaFloatComplex **	dinvA_displ,
		magma_int_t	resetozero,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

ctrsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters

[in]	side	magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)X = alphaB. = MagmaRight: Xop(A) = alphaB.
[in]	uplo	magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix.
[in]	transA	magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H.
[in]	diag	magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular.
[in]	flag	BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]	m	INTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]	n	INTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]	alpha	COMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]	ldda	INTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in]	dB_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
[in]	lddb	INTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in,out]	dX_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X
[in]	lddx	INTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ).
	dinvA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/CTRTRI_BATCHED_NB)CTRTRI_BATCHED_NBCTRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(n/CTRTRI_BATCHED_NB)CTRTRI_BATCHED_NBCTRTRI_BATCHED_NB
[in]	dinvA_length	INTEGER The size of each workspace matrix dinvA
	dA_displ	(workspace) Array of pointers, dimension (batchCount).
	dB_displ	(workspace) Array of pointers, dimension (batchCount).
	dX_displ	(workspace) Array of pointers, dimension (batchCount).
	dinvA_displ	(workspace) Array of pointers, dimension (batchCount).
[in]	resetozero	INTEGER Used internally by CTRTRI_DIAG routine
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_ctrsm_inv_work_batched	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t	flag,
		magma_int_t	m,
		magma_int_t	n,
		magmaFloatComplex	alpha,
		magmaFloatComplex **	dA_array,
		magma_int_t	ldda,
		magmaFloatComplex **	dB_array,
		magma_int_t	lddb,
		magmaFloatComplex **	dX_array,
		magma_int_t	lddx,
		magmaFloatComplex **	dinvA_array,
		magma_int_t	dinvA_length,
		magmaFloatComplex **	dA_displ,
		magmaFloatComplex **	dB_displ,
		magmaFloatComplex **	dX_displ,
		magmaFloatComplex **	dinvA_displ,
		magma_int_t	resetozero,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

ctrsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters

[in]	side	magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)X = alphaB. = MagmaRight: Xop(A) = alphaB.
[in]	uplo	magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix.
[in]	transA	magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H.
[in]	diag	magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular.
[in]	flag	BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]	m	INTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]	n	INTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]	alpha	COMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]	ldda	INTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in,out]	dB_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X
[in]	lddb	INTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in,out]	dX_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X
[in]	lddx	INTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ).
	dinvA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/CTRTRI_BATCHED_NB)CTRTRI_BATCHED_NBCTRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(n/CTRTRI_BATCHED_NB)CTRTRI_BATCHED_NBCTRTRI_BATCHED_NB, where CTRTRI_BATCHED_NB = 128.
[in]	dinvA_length	INTEGER The size of each workspace matrix dinvA
	dA_displ	(workspace) Array of pointers, dimension (batchCount).
	dB_displ	(workspace) Array of pointers, dimension (batchCount).
	dX_displ	(workspace) Array of pointers, dimension (batchCount).
	dinvA_displ	(workspace) Array of pointers, dimension (batchCount).
[in]	resetozero	INTEGER Used internally by CTRTRI_DIAG routine
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_ctrsm_inv_batched	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t	m,
		magma_int_t	n,
		magmaFloatComplex	alpha,
		magmaFloatComplex **	dA_array,
		magma_int_t	ldda,
		magmaFloatComplex **	dB_array,
		magma_int_t	lddb,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

ctrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters

[in]	side	magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)X = alphaB. = MagmaRight: Xop(A) = alphaB.
[in]	uplo	magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix.
[in]	transA	magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H.
[in]	diag	magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular.
[in]	m	INTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]	n	INTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]	alpha	COMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]	ldda	INTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in,out]	dB_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X
[in]	lddb	INTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_ctrsm_inv_vbatched	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t *	m,
		magma_int_t *	n,
		magmaFloatComplex	alpha,
		magmaFloatComplex **	dA_array,
		magma_int_t *	ldda,
		magmaFloatComplex **	dB_array,
		magma_int_t *	lddb,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

ctrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters

[in]	side	magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)X = alphaB. = MagmaRight: Xop(A) = alphaB.
[in]	uplo	magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix.
[in]	transA	magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H.
[in]	diag	magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular.
[in]	m	INTEGER array, dimension(batchCount + 1). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]	n	INTEGER array, dimension(batchCount + 1). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]	alpha	COMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]	ldda	INTEGER array, dimension(batchCount + 1). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]	dB_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X
[in]	lddb	INTEGER array, dimension(batchCount + 1). On entry, LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_ctrsm_inv_outofplace_vbatched	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t	flag,
		magma_int_t *	m,
		magma_int_t *	n,
		magmaFloatComplex	alpha,
		magmaFloatComplex **	dA_array,
		magma_int_t *	ldda,
		magmaFloatComplex **	dB_array,
		magma_int_t *	lddb,
		magmaFloatComplex **	dX_array,
		magma_int_t *	lddx,
		magmaFloatComplex **	dinvA_array,
		magma_int_t *	dinvA_length,
		magmaFloatComplex **	dA_displ,
		magmaFloatComplex **	dB_displ,
		magmaFloatComplex **	dX_displ,
		magmaFloatComplex **	dinvA_displ,
		magma_int_t	resetozero,
		magma_int_t	batchCount,
		magma_int_t	max_m,
		magma_int_t	max_n,
		magma_queue_t	queue
	)

ctrsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters

[in]	side	magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)X = alphaB. = MagmaRight: Xop(A) = alphaB.
[in]	uplo	magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix.
[in]	transA	magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H.
[in]	diag	magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular.
[in]	flag	BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]	m	INTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]	n	INTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]	alpha	COMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]	ldda	INTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in]	dB_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
[in]	lddb	INTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. LDDB >= max( 1, M ).
[in,out]	dX_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X
[in]	lddx	INTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. LDDX >= max( 1, M ).
	dinvA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/CTRTRI_BATCHED_NB)CTRTRI_BATCHED_NBCTRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(N/CTRTRI_BATCHED_NB)CTRTRI_BATCHED_NBCTRTRI_BATCHED_NB
[in]	dinvA_length	INTEGER array, dimension(batchCount). The size of each workspace matrix dinvA
	dA_displ	(workspace) Array of pointers, dimension (batchCount).
	dB_displ	(workspace) Array of pointers, dimension (batchCount).
	dX_displ	(workspace) Array of pointers, dimension (batchCount).
	dinvA_displ	(workspace) Array of pointers, dimension (batchCount).
[in]	resetozero	INTEGER Used internally by CTRTRI_DIAG routine
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	max_m	INTEGER The maximum value in m.
[in]	max_n	INTEGER The maximum value in n.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_ctrsm_inv_work_vbatched	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t	flag,
		magma_int_t *	m,
		magma_int_t *	n,
		magmaFloatComplex	alpha,
		magmaFloatComplex **	dA_array,
		magma_int_t *	ldda,
		magmaFloatComplex **	dB_array,
		magma_int_t *	lddb,
		magmaFloatComplex **	dX_array,
		magma_int_t *	lddx,
		magmaFloatComplex **	dinvA_array,
		magma_int_t *	dinvA_length,
		magmaFloatComplex **	dA_displ,
		magmaFloatComplex **	dB_displ,
		magmaFloatComplex **	dX_displ,
		magmaFloatComplex **	dinvA_displ,
		magma_int_t	resetozero,
		magma_int_t	batchCount,
		magma_int_t	max_m,
		magma_int_t	max_n,
		magma_queue_t	queue
	)

ctrsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters

[in]	side	magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)X = alphaB. = MagmaRight: Xop(A) = alphaB.
[in]	uplo	magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix.
[in]	transA	magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H.
[in]	diag	magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular.
[in]	flag	BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]	m	INTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of each B. M >= 0.
[in]	n	INTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of each B. N >= 0.
[in]	alpha	COMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]	ldda	INTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]	dB_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X
[in]	lddb	INTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in,out]	dX_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X
[in]	lddx	INTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. lddx >= max( 1, M ).
	dinvA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/CTRTRI_BATCHED_NB)CTRTRI_BATCHED_NBCTRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(N/CTRTRI_BATCHED_NB)CTRTRI_BATCHED_NBCTRTRI_BATCHED_NB.
[in]	dinvA_length	INTEGER array, dimension(batchCount). The size of each workspace matrix dinvA
	dA_displ	(workspace) Array of pointers, dimension (batchCount).
	dB_displ	(workspace) Array of pointers, dimension (batchCount).
	dX_displ	(workspace) Array of pointers, dimension (batchCount).
	dinvA_displ	(workspace) Array of pointers, dimension (batchCount).
[in]	resetozero	INTEGER Used internally by CTRTRI_DIAG routine
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	max_m	INTEGER The maximum value in m.
[in]	max_n	INTEGER The maximum value in n.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_ctrsm_inv_vbatched_max_nocheck	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t *	m,
		magma_int_t *	n,
		magmaFloatComplex	alpha,
		magmaFloatComplex **	dA_array,
		magma_int_t *	ldda,
		magmaFloatComplex **	dB_array,
		magma_int_t *	lddb,
		magma_int_t	batchCount,
		magma_int_t	max_m,
		magma_int_t	max_n,
		magma_queue_t	queue
	)

See Also: magmablas_ctrsm_inv_work_vbatched

void magmablas_ctrsm_vbatched	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t *	m,
		magma_int_t *	n,
		magmaFloatComplex	alpha,
		magmaFloatComplex **	dA_array,
		magma_int_t *	ldda,
		magmaFloatComplex **	dB_array,
		magma_int_t *	lddb,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

ctrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters

[in]	side	magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)X = alphaB. = MagmaRight: Xop(A) = alphaB.
[in]	uplo	magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix.
[in]	transA	magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H.
[in]	diag	magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular.
[in]	m	INTEGER array, dimension(batchCount + 1). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]	n	INTEGER array, dimension(batchCount + 1). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]	alpha	COMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]	ldda	INTEGER array, dimension(batchCount + 1). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]	dB_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X
[in]	lddb	INTEGER array, dimension(batchCount + 1). On entry, LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_dtrsm_inv_outofplace_batched	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t	flag,
		magma_int_t	m,
		magma_int_t	n,
		double	alpha,
		double **	dA_array,
		magma_int_t	ldda,
		double **	dB_array,
		magma_int_t	lddb,
		double **	dX_array,
		magma_int_t	lddx,
		double **	dinvA_array,
		magma_int_t	dinvA_length,
		double **	dA_displ,
		double **	dB_displ,
		double **	dX_displ,
		double **	dinvA_displ,
		magma_int_t	resetozero,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

dtrsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_dtrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters

[in]	side	magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)X = alphaB. = MagmaRight: Xop(A) = alphaB.
[in]	uplo	magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix.
[in]	transA	magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H.
[in]	diag	magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular.
[in]	flag	BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]	m	INTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]	n	INTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]	alpha	DOUBLE PRECISION. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]	ldda	INTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in]	dB_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
[in]	lddb	INTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in,out]	dX_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X
[in]	lddx	INTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ).
	dinvA_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/DTRTRI_BATCHED_NB)DTRTRI_BATCHED_NBDTRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(n/DTRTRI_BATCHED_NB)DTRTRI_BATCHED_NBDTRTRI_BATCHED_NB
[in]	dinvA_length	INTEGER The size of each workspace matrix dinvA
	dA_displ	(workspace) Array of pointers, dimension (batchCount).
	dB_displ	(workspace) Array of pointers, dimension (batchCount).
	dX_displ	(workspace) Array of pointers, dimension (batchCount).
	dinvA_displ	(workspace) Array of pointers, dimension (batchCount).
[in]	resetozero	INTEGER Used internally by DTRTRI_DIAG routine
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_dtrsm_inv_work_batched	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t	flag,
		magma_int_t	m,
		magma_int_t	n,
		double	alpha,
		double **	dA_array,
		magma_int_t	ldda,
		double **	dB_array,
		magma_int_t	lddb,
		double **	dX_array,
		magma_int_t	lddx,
		double **	dinvA_array,
		magma_int_t	dinvA_length,
		double **	dA_displ,
		double **	dB_displ,
		double **	dX_displ,
		double **	dinvA_displ,
		magma_int_t	resetozero,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

dtrsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_dtrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters

[in]	side	magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)X = alphaB. = MagmaRight: Xop(A) = alphaB.
[in]	uplo	magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix.
[in]	transA	magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H.
[in]	diag	magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular.
[in]	flag	BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]	m	INTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]	n	INTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]	alpha	DOUBLE PRECISION. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]	ldda	INTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in,out]	dB_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X
[in]	lddb	INTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in,out]	dX_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X
[in]	lddx	INTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ).
	dinvA_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/DTRTRI_BATCHED_NB)DTRTRI_BATCHED_NBDTRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(n/DTRTRI_BATCHED_NB)DTRTRI_BATCHED_NBDTRTRI_BATCHED_NB, where DTRTRI_BATCHED_NB = 128.
[in]	dinvA_length	INTEGER The size of each workspace matrix dinvA
	dA_displ	(workspace) Array of pointers, dimension (batchCount).
	dB_displ	(workspace) Array of pointers, dimension (batchCount).
	dX_displ	(workspace) Array of pointers, dimension (batchCount).
	dinvA_displ	(workspace) Array of pointers, dimension (batchCount).
[in]	resetozero	INTEGER Used internally by DTRTRI_DIAG routine
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_dtrsm_inv_batched	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t	m,
		magma_int_t	n,
		double	alpha,
		double **	dA_array,
		magma_int_t	ldda,
		double **	dB_array,
		magma_int_t	lddb,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

dtrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_dtrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters

[in]	side	magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)X = alphaB. = MagmaRight: Xop(A) = alphaB.
[in]	uplo	magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix.
[in]	transA	magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H.
[in]	diag	magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular.
[in]	m	INTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]	n	INTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]	alpha	DOUBLE PRECISION. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]	ldda	INTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in,out]	dB_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X
[in]	lddb	INTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_dtrsm_inv_vbatched	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t *	m,
		magma_int_t *	n,
		double	alpha,
		double **	dA_array,
		magma_int_t *	ldda,
		double **	dB_array,
		magma_int_t *	lddb,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

dtrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_dtrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters

[in]	side	magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)X = alphaB. = MagmaRight: Xop(A) = alphaB.
[in]	uplo	magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix.
[in]	transA	magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H.
[in]	diag	magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular.
[in]	m	INTEGER array, dimension(batchCount + 1). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]	n	INTEGER array, dimension(batchCount + 1). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]	alpha	DOUBLE PRECISION. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]	ldda	INTEGER array, dimension(batchCount + 1). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]	dB_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X
[in]	lddb	INTEGER array, dimension(batchCount + 1). On entry, LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_dtrsm_inv_outofplace_vbatched	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t	flag,
		magma_int_t *	m,
		magma_int_t *	n,
		double	alpha,
		double **	dA_array,
		magma_int_t *	ldda,
		double **	dB_array,
		magma_int_t *	lddb,
		double **	dX_array,
		magma_int_t *	lddx,
		double **	dinvA_array,
		magma_int_t *	dinvA_length,
		double **	dA_displ,
		double **	dB_displ,
		double **	dX_displ,
		double **	dinvA_displ,
		magma_int_t	resetozero,
		magma_int_t	batchCount,
		magma_int_t	max_m,
		magma_int_t	max_n,
		magma_queue_t	queue
	)

dtrsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_dtrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters

[in]	side	magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)X = alphaB. = MagmaRight: Xop(A) = alphaB.
[in]	uplo	magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix.
[in]	transA	magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H.
[in]	diag	magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular.
[in]	flag	BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]	m	INTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]	n	INTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]	alpha	DOUBLE PRECISION. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]	ldda	INTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in]	dB_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
[in]	lddb	INTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. LDDB >= max( 1, M ).
[in,out]	dX_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X
[in]	lddx	INTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. LDDX >= max( 1, M ).
	dinvA_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/DTRTRI_BATCHED_NB)DTRTRI_BATCHED_NBDTRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(N/DTRTRI_BATCHED_NB)DTRTRI_BATCHED_NBDTRTRI_BATCHED_NB
[in]	dinvA_length	INTEGER array, dimension(batchCount). The size of each workspace matrix dinvA
	dA_displ	(workspace) Array of pointers, dimension (batchCount).
	dB_displ	(workspace) Array of pointers, dimension (batchCount).
	dX_displ	(workspace) Array of pointers, dimension (batchCount).
	dinvA_displ	(workspace) Array of pointers, dimension (batchCount).
[in]	resetozero	INTEGER Used internally by DTRTRI_DIAG routine
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	max_m	INTEGER The maximum value in m.
[in]	max_n	INTEGER The maximum value in n.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_dtrsm_inv_work_vbatched	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t	flag,
		magma_int_t *	m,
		magma_int_t *	n,
		double	alpha,
		double **	dA_array,
		magma_int_t *	ldda,
		double **	dB_array,
		magma_int_t *	lddb,
		double **	dX_array,
		magma_int_t *	lddx,
		double **	dinvA_array,
		magma_int_t *	dinvA_length,
		double **	dA_displ,
		double **	dB_displ,
		double **	dX_displ,
		double **	dinvA_displ,
		magma_int_t	resetozero,
		magma_int_t	batchCount,
		magma_int_t	max_m,
		magma_int_t	max_n,
		magma_queue_t	queue
	)

dtrsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_dtrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters

[in]	side	magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)X = alphaB. = MagmaRight: Xop(A) = alphaB.
[in]	uplo	magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix.
[in]	transA	magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H.
[in]	diag	magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular.
[in]	flag	BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]	m	INTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of each B. M >= 0.
[in]	n	INTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of each B. N >= 0.
[in]	alpha	DOUBLE PRECISION. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]	ldda	INTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]	dB_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X
[in]	lddb	INTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in,out]	dX_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X
[in]	lddx	INTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. lddx >= max( 1, M ).
	dinvA_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/DTRTRI_BATCHED_NB)DTRTRI_BATCHED_NBDTRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(N/DTRTRI_BATCHED_NB)DTRTRI_BATCHED_NBDTRTRI_BATCHED_NB.
[in]	dinvA_length	INTEGER array, dimension(batchCount). The size of each workspace matrix dinvA
	dA_displ	(workspace) Array of pointers, dimension (batchCount).
	dB_displ	(workspace) Array of pointers, dimension (batchCount).
	dX_displ	(workspace) Array of pointers, dimension (batchCount).
	dinvA_displ	(workspace) Array of pointers, dimension (batchCount).
[in]	resetozero	INTEGER Used internally by DTRTRI_DIAG routine
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	max_m	INTEGER The maximum value in m.
[in]	max_n	INTEGER The maximum value in n.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_dtrsm_inv_vbatched_max_nocheck	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t *	m,
		magma_int_t *	n,
		double	alpha,
		double **	dA_array,
		magma_int_t *	ldda,
		double **	dB_array,
		magma_int_t *	lddb,
		magma_int_t	batchCount,
		magma_int_t	max_m,
		magma_int_t	max_n,
		magma_queue_t	queue
	)

See Also: magmablas_dtrsm_inv_work_vbatched

void magmablas_dtrsm_vbatched	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t *	m,
		magma_int_t *	n,
		double	alpha,
		double **	dA_array,
		magma_int_t *	ldda,
		double **	dB_array,
		magma_int_t *	lddb,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

dtrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_dtrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters

[in]	side	magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)X = alphaB. = MagmaRight: Xop(A) = alphaB.
[in]	uplo	magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix.
[in]	transA	magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H.
[in]	diag	magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular.
[in]	m	INTEGER array, dimension(batchCount + 1). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]	n	INTEGER array, dimension(batchCount + 1). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]	alpha	DOUBLE PRECISION. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]	ldda	INTEGER array, dimension(batchCount + 1). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]	dB_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X
[in]	lddb	INTEGER array, dimension(batchCount + 1). On entry, LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_strsm_inv_outofplace_batched	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t	flag,
		magma_int_t	m,
		magma_int_t	n,
		float	alpha,
		float **	dA_array,
		magma_int_t	ldda,
		float **	dB_array,
		magma_int_t	lddb,
		float **	dX_array,
		magma_int_t	lddx,
		float **	dinvA_array,
		magma_int_t	dinvA_length,
		float **	dA_displ,
		float **	dB_displ,
		float **	dX_displ,
		float **	dinvA_displ,
		magma_int_t	resetozero,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

strsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_strsm with flag, d_dinvA and dX workspaces as arguments.

Parameters

[in]	side	magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)X = alphaB. = MagmaRight: Xop(A) = alphaB.
[in]	uplo	magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix.
[in]	transA	magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H.
[in]	diag	magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular.
[in]	flag	BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]	m	INTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]	n	INTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]	alpha	REAL. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a REAL array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]	ldda	INTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in]	dB_array	Array of pointers, dimension (batchCount). Each is a REAL array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
[in]	lddb	INTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in,out]	dX_array	Array of pointers, dimension (batchCount). Each is a REAL array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X
[in]	lddx	INTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ).
	dinvA_array	Array of pointers, dimension (batchCount). Each is a REAL array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/STRTRI_BATCHED_NB)STRTRI_BATCHED_NBSTRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(n/STRTRI_BATCHED_NB)STRTRI_BATCHED_NBSTRTRI_BATCHED_NB
[in]	dinvA_length	INTEGER The size of each workspace matrix dinvA
	dA_displ	(workspace) Array of pointers, dimension (batchCount).
	dB_displ	(workspace) Array of pointers, dimension (batchCount).
	dX_displ	(workspace) Array of pointers, dimension (batchCount).
	dinvA_displ	(workspace) Array of pointers, dimension (batchCount).
[in]	resetozero	INTEGER Used internally by STRTRI_DIAG routine
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_strsm_inv_work_batched	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t	flag,
		magma_int_t	m,
		magma_int_t	n,
		float	alpha,
		float **	dA_array,
		magma_int_t	ldda,
		float **	dB_array,
		magma_int_t	lddb,
		float **	dX_array,
		magma_int_t	lddx,
		float **	dinvA_array,
		magma_int_t	dinvA_length,
		float **	dA_displ,
		float **	dB_displ,
		float **	dX_displ,
		float **	dinvA_displ,
		magma_int_t	resetozero,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

strsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_strsm with flag, d_dinvA and dX workspaces as arguments.

Parameters

[in]	side	magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)X = alphaB. = MagmaRight: Xop(A) = alphaB.
[in]	uplo	magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix.
[in]	transA	magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H.
[in]	diag	magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular.
[in]	flag	BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]	m	INTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]	n	INTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]	alpha	REAL. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a REAL array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]	ldda	INTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in,out]	dB_array	Array of pointers, dimension (batchCount). Each is a REAL array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X
[in]	lddb	INTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in,out]	dX_array	Array of pointers, dimension (batchCount). Each is a REAL array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X
[in]	lddx	INTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ).
	dinvA_array	Array of pointers, dimension (batchCount). Each is a REAL array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/STRTRI_BATCHED_NB)STRTRI_BATCHED_NBSTRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(n/STRTRI_BATCHED_NB)STRTRI_BATCHED_NBSTRTRI_BATCHED_NB, where STRTRI_BATCHED_NB = 128.
[in]	dinvA_length	INTEGER The size of each workspace matrix dinvA
	dA_displ	(workspace) Array of pointers, dimension (batchCount).
	dB_displ	(workspace) Array of pointers, dimension (batchCount).
	dX_displ	(workspace) Array of pointers, dimension (batchCount).
	dinvA_displ	(workspace) Array of pointers, dimension (batchCount).
[in]	resetozero	INTEGER Used internally by STRTRI_DIAG routine
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_strsm_inv_batched	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t	m,
		magma_int_t	n,
		float	alpha,
		float **	dA_array,
		magma_int_t	ldda,
		float **	dB_array,
		magma_int_t	lddb,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

strsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_strsm with flag, d_dinvA and dX workspaces as arguments.

Parameters

[in]	side	magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)X = alphaB. = MagmaRight: Xop(A) = alphaB.
[in]	uplo	magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix.
[in]	transA	magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H.
[in]	diag	magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular.
[in]	m	INTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]	n	INTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]	alpha	REAL. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a REAL array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]	ldda	INTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in,out]	dB_array	Array of pointers, dimension (batchCount). Each is a REAL array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X
[in]	lddb	INTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_strsm_inv_vbatched	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t *	m,
		magma_int_t *	n,
		float	alpha,
		float **	dA_array,
		magma_int_t *	ldda,
		float **	dB_array,
		magma_int_t *	lddb,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

strsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_strsm with flag, d_dinvA and dX workspaces as arguments.

Parameters

[in]	side	magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)X = alphaB. = MagmaRight: Xop(A) = alphaB.
[in]	uplo	magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix.
[in]	transA	magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H.
[in]	diag	magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular.
[in]	m	INTEGER array, dimension(batchCount + 1). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]	n	INTEGER array, dimension(batchCount + 1). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]	alpha	REAL. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a REAL array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]	ldda	INTEGER array, dimension(batchCount + 1). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]	dB_array	Array of pointers, dimension (batchCount). Each is a REAL array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X
[in]	lddb	INTEGER array, dimension(batchCount + 1). On entry, LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_strsm_inv_outofplace_vbatched	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t	flag,
		magma_int_t *	m,
		magma_int_t *	n,
		float	alpha,
		float **	dA_array,
		magma_int_t *	ldda,
		float **	dB_array,
		magma_int_t *	lddb,
		float **	dX_array,
		magma_int_t *	lddx,
		float **	dinvA_array,
		magma_int_t *	dinvA_length,
		float **	dA_displ,
		float **	dB_displ,
		float **	dX_displ,
		float **	dinvA_displ,
		magma_int_t	resetozero,
		magma_int_t	batchCount,
		magma_int_t	max_m,
		magma_int_t	max_n,
		magma_queue_t	queue
	)

strsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_strsm with flag, d_dinvA and dX workspaces as arguments.

Parameters

[in]	side	magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)X = alphaB. = MagmaRight: Xop(A) = alphaB.
[in]	uplo	magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix.
[in]	transA	magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H.
[in]	diag	magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular.
[in]	flag	BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]	m	INTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]	n	INTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]	alpha	REAL. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a REAL array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]	ldda	INTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in]	dB_array	Array of pointers, dimension (batchCount). Each is a REAL array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
[in]	lddb	INTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. LDDB >= max( 1, M ).
[in,out]	dX_array	Array of pointers, dimension (batchCount). Each is a REAL array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X
[in]	lddx	INTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. LDDX >= max( 1, M ).
	dinvA_array	Array of pointers, dimension (batchCount). Each is a REAL array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/STRTRI_BATCHED_NB)STRTRI_BATCHED_NBSTRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(N/STRTRI_BATCHED_NB)STRTRI_BATCHED_NBSTRTRI_BATCHED_NB
[in]	dinvA_length	INTEGER array, dimension(batchCount). The size of each workspace matrix dinvA
	dA_displ	(workspace) Array of pointers, dimension (batchCount).
	dB_displ	(workspace) Array of pointers, dimension (batchCount).
	dX_displ	(workspace) Array of pointers, dimension (batchCount).
	dinvA_displ	(workspace) Array of pointers, dimension (batchCount).
[in]	resetozero	INTEGER Used internally by STRTRI_DIAG routine
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	max_m	INTEGER The maximum value in m.
[in]	max_n	INTEGER The maximum value in n.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_strsm_inv_work_vbatched	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t	flag,
		magma_int_t *	m,
		magma_int_t *	n,
		float	alpha,
		float **	dA_array,
		magma_int_t *	ldda,
		float **	dB_array,
		magma_int_t *	lddb,
		float **	dX_array,
		magma_int_t *	lddx,
		float **	dinvA_array,
		magma_int_t *	dinvA_length,
		float **	dA_displ,
		float **	dB_displ,
		float **	dX_displ,
		float **	dinvA_displ,
		magma_int_t	resetozero,
		magma_int_t	batchCount,
		magma_int_t	max_m,
		magma_int_t	max_n,
		magma_queue_t	queue
	)

strsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_strsm with flag, d_dinvA and dX workspaces as arguments.

Parameters

[in]	side	magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)X = alphaB. = MagmaRight: Xop(A) = alphaB.
[in]	uplo	magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix.
[in]	transA	magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H.
[in]	diag	magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular.
[in]	flag	BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]	m	INTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of each B. M >= 0.
[in]	n	INTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of each B. N >= 0.
[in]	alpha	REAL. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a REAL array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]	ldda	INTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]	dB_array	Array of pointers, dimension (batchCount). Each is a REAL array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X
[in]	lddb	INTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in,out]	dX_array	Array of pointers, dimension (batchCount). Each is a REAL array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X
[in]	lddx	INTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. lddx >= max( 1, M ).
	dinvA_array	Array of pointers, dimension (batchCount). Each is a REAL array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/STRTRI_BATCHED_NB)STRTRI_BATCHED_NBSTRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(N/STRTRI_BATCHED_NB)STRTRI_BATCHED_NBSTRTRI_BATCHED_NB.
[in]	dinvA_length	INTEGER array, dimension(batchCount). The size of each workspace matrix dinvA
	dA_displ	(workspace) Array of pointers, dimension (batchCount).
	dB_displ	(workspace) Array of pointers, dimension (batchCount).
	dX_displ	(workspace) Array of pointers, dimension (batchCount).
	dinvA_displ	(workspace) Array of pointers, dimension (batchCount).
[in]	resetozero	INTEGER Used internally by STRTRI_DIAG routine
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	max_m	INTEGER The maximum value in m.
[in]	max_n	INTEGER The maximum value in n.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_strsm_inv_vbatched_max_nocheck	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t *	m,
		magma_int_t *	n,
		float	alpha,
		float **	dA_array,
		magma_int_t *	ldda,
		float **	dB_array,
		magma_int_t *	lddb,
		magma_int_t	batchCount,
		magma_int_t	max_m,
		magma_int_t	max_n,
		magma_queue_t	queue
	)

See Also: magmablas_strsm_inv_work_vbatched

void magmablas_strsm_vbatched	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t *	m,
		magma_int_t *	n,
		float	alpha,
		float **	dA_array,
		magma_int_t *	ldda,
		float **	dB_array,
		magma_int_t *	lddb,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

strsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_strsm with flag, d_dinvA and dX workspaces as arguments.

Parameters

[in]	side	magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)X = alphaB. = MagmaRight: Xop(A) = alphaB.
[in]	uplo	magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix.
[in]	transA	magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H.
[in]	diag	magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular.
[in]	m	INTEGER array, dimension(batchCount + 1). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]	n	INTEGER array, dimension(batchCount + 1). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]	alpha	REAL. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a REAL array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]	ldda	INTEGER array, dimension(batchCount + 1). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]	dB_array	Array of pointers, dimension (batchCount). Each is a REAL array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X
[in]	lddb	INTEGER array, dimension(batchCount + 1). On entry, LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_ztrsm_inv_outofplace_batched	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t	flag,
		magma_int_t	m,
		magma_int_t	n,
		magmaDoubleComplex	alpha,
		magmaDoubleComplex **	dA_array,
		magma_int_t	ldda,
		magmaDoubleComplex **	dB_array,
		magma_int_t	lddb,
		magmaDoubleComplex **	dX_array,
		magma_int_t	lddx,
		magmaDoubleComplex **	dinvA_array,
		magma_int_t	dinvA_length,
		magmaDoubleComplex **	dA_displ,
		magmaDoubleComplex **	dB_displ,
		magmaDoubleComplex **	dX_displ,
		magmaDoubleComplex **	dinvA_displ,
		magma_int_t	resetozero,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

ztrsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters

[in]	side	magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)X = alphaB. = MagmaRight: Xop(A) = alphaB.
[in]	uplo	magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix.
[in]	transA	magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H.
[in]	diag	magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular.
[in]	flag	BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]	m	INTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]	n	INTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]	alpha	COMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]	ldda	INTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in]	dB_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
[in]	lddb	INTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in,out]	dX_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X
[in]	lddx	INTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ).
	dinvA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/ZTRTRI_BATCHED_NB)ZTRTRI_BATCHED_NBZTRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(n/ZTRTRI_BATCHED_NB)ZTRTRI_BATCHED_NBZTRTRI_BATCHED_NB
[in]	dinvA_length	INTEGER The size of each workspace matrix dinvA
	dA_displ	(workspace) Array of pointers, dimension (batchCount).
	dB_displ	(workspace) Array of pointers, dimension (batchCount).
	dX_displ	(workspace) Array of pointers, dimension (batchCount).
	dinvA_displ	(workspace) Array of pointers, dimension (batchCount).
[in]	resetozero	INTEGER Used internally by ZTRTRI_DIAG routine
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_ztrsm_inv_work_batched	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t	flag,
		magma_int_t	m,
		magma_int_t	n,
		magmaDoubleComplex	alpha,
		magmaDoubleComplex **	dA_array,
		magma_int_t	ldda,
		magmaDoubleComplex **	dB_array,
		magma_int_t	lddb,
		magmaDoubleComplex **	dX_array,
		magma_int_t	lddx,
		magmaDoubleComplex **	dinvA_array,
		magma_int_t	dinvA_length,
		magmaDoubleComplex **	dA_displ,
		magmaDoubleComplex **	dB_displ,
		magmaDoubleComplex **	dX_displ,
		magmaDoubleComplex **	dinvA_displ,
		magma_int_t	resetozero,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

ztrsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters

[in]	side	magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)X = alphaB. = MagmaRight: Xop(A) = alphaB.
[in]	uplo	magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix.
[in]	transA	magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H.
[in]	diag	magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular.
[in]	flag	BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]	m	INTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]	n	INTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]	alpha	COMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]	ldda	INTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in,out]	dB_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X
[in]	lddb	INTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in,out]	dX_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X
[in]	lddx	INTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ).
	dinvA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/ZTRTRI_BATCHED_NB)ZTRTRI_BATCHED_NBZTRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(n/ZTRTRI_BATCHED_NB)ZTRTRI_BATCHED_NBZTRTRI_BATCHED_NB, where ZTRTRI_BATCHED_NB = 128.
[in]	dinvA_length	INTEGER The size of each workspace matrix dinvA
	dA_displ	(workspace) Array of pointers, dimension (batchCount).
	dB_displ	(workspace) Array of pointers, dimension (batchCount).
	dX_displ	(workspace) Array of pointers, dimension (batchCount).
	dinvA_displ	(workspace) Array of pointers, dimension (batchCount).
[in]	resetozero	INTEGER Used internally by ZTRTRI_DIAG routine
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_ztrsm_inv_batched	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t	m,
		magma_int_t	n,
		magmaDoubleComplex	alpha,
		magmaDoubleComplex **	dA_array,
		magma_int_t	ldda,
		magmaDoubleComplex **	dB_array,
		magma_int_t	lddb,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

ztrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters

[in]	side	magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)X = alphaB. = MagmaRight: Xop(A) = alphaB.
[in]	uplo	magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix.
[in]	transA	magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H.
[in]	diag	magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular.
[in]	m	INTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]	n	INTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]	alpha	COMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]	ldda	INTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in,out]	dB_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X
[in]	lddb	INTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_ztrsm_inv_vbatched	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t *	m,
		magma_int_t *	n,
		magmaDoubleComplex	alpha,
		magmaDoubleComplex **	dA_array,
		magma_int_t *	ldda,
		magmaDoubleComplex **	dB_array,
		magma_int_t *	lddb,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

ztrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters

[in]	side	magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)X = alphaB. = MagmaRight: Xop(A) = alphaB.
[in]	uplo	magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix.
[in]	transA	magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H.
[in]	diag	magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular.
[in]	m	INTEGER array, dimension(batchCount + 1). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]	n	INTEGER array, dimension(batchCount + 1). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]	alpha	COMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]	ldda	INTEGER array, dimension(batchCount + 1). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]	dB_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X
[in]	lddb	INTEGER array, dimension(batchCount + 1). On entry, LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_ztrsm_inv_outofplace_vbatched	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t	flag,
		magma_int_t *	m,
		magma_int_t *	n,
		magmaDoubleComplex	alpha,
		magmaDoubleComplex **	dA_array,
		magma_int_t *	ldda,
		magmaDoubleComplex **	dB_array,
		magma_int_t *	lddb,
		magmaDoubleComplex **	dX_array,
		magma_int_t *	lddx,
		magmaDoubleComplex **	dinvA_array,
		magma_int_t *	dinvA_length,
		magmaDoubleComplex **	dA_displ,
		magmaDoubleComplex **	dB_displ,
		magmaDoubleComplex **	dX_displ,
		magmaDoubleComplex **	dinvA_displ,
		magma_int_t	resetozero,
		magma_int_t	batchCount,
		magma_int_t	max_m,
		magma_int_t	max_n,
		magma_queue_t	queue
	)

ztrsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters

[in]	side	magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)X = alphaB. = MagmaRight: Xop(A) = alphaB.
[in]	uplo	magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix.
[in]	transA	magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H.
[in]	diag	magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular.
[in]	flag	BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]	m	INTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]	n	INTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]	alpha	COMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]	ldda	INTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in]	dB_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
[in]	lddb	INTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. LDDB >= max( 1, M ).
[in,out]	dX_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X
[in]	lddx	INTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. LDDX >= max( 1, M ).
	dinvA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/ZTRTRI_BATCHED_NB)ZTRTRI_BATCHED_NBZTRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(N/ZTRTRI_BATCHED_NB)ZTRTRI_BATCHED_NBZTRTRI_BATCHED_NB
[in]	dinvA_length	INTEGER array, dimension(batchCount). The size of each workspace matrix dinvA
	dA_displ	(workspace) Array of pointers, dimension (batchCount).
	dB_displ	(workspace) Array of pointers, dimension (batchCount).
	dX_displ	(workspace) Array of pointers, dimension (batchCount).
	dinvA_displ	(workspace) Array of pointers, dimension (batchCount).
[in]	resetozero	INTEGER Used internally by ZTRTRI_DIAG routine
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	max_m	INTEGER The maximum value in m.
[in]	max_n	INTEGER The maximum value in n.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_ztrsm_inv_work_vbatched	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t	flag,
		magma_int_t *	m,
		magma_int_t *	n,
		magmaDoubleComplex	alpha,
		magmaDoubleComplex **	dA_array,
		magma_int_t *	ldda,
		magmaDoubleComplex **	dB_array,
		magma_int_t *	lddb,
		magmaDoubleComplex **	dX_array,
		magma_int_t *	lddx,
		magmaDoubleComplex **	dinvA_array,
		magma_int_t *	dinvA_length,
		magmaDoubleComplex **	dA_displ,
		magmaDoubleComplex **	dB_displ,
		magmaDoubleComplex **	dX_displ,
		magmaDoubleComplex **	dinvA_displ,
		magma_int_t	resetozero,
		magma_int_t	batchCount,
		magma_int_t	max_m,
		magma_int_t	max_n,
		magma_queue_t	queue
	)

ztrsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters

[in]	side	magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)X = alphaB. = MagmaRight: Xop(A) = alphaB.
[in]	uplo	magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix.
[in]	transA	magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H.
[in]	diag	magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular.
[in]	flag	BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]	m	INTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of each B. M >= 0.
[in]	n	INTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of each B. N >= 0.
[in]	alpha	COMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]	ldda	INTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]	dB_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X
[in]	lddb	INTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in,out]	dX_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X
[in]	lddx	INTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. lddx >= max( 1, M ).
	dinvA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/ZTRTRI_BATCHED_NB)ZTRTRI_BATCHED_NBZTRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(N/ZTRTRI_BATCHED_NB)ZTRTRI_BATCHED_NBZTRTRI_BATCHED_NB.
[in]	dinvA_length	INTEGER array, dimension(batchCount). The size of each workspace matrix dinvA
	dA_displ	(workspace) Array of pointers, dimension (batchCount).
	dB_displ	(workspace) Array of pointers, dimension (batchCount).
	dX_displ	(workspace) Array of pointers, dimension (batchCount).
	dinvA_displ	(workspace) Array of pointers, dimension (batchCount).
[in]	resetozero	INTEGER Used internally by ZTRTRI_DIAG routine
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	max_m	INTEGER The maximum value in m.
[in]	max_n	INTEGER The maximum value in n.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_ztrsm_inv_vbatched_max_nocheck	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t *	m,
		magma_int_t *	n,
		magmaDoubleComplex	alpha,
		magmaDoubleComplex **	dA_array,
		magma_int_t *	ldda,
		magmaDoubleComplex **	dB_array,
		magma_int_t *	lddb,
		magma_int_t	batchCount,
		magma_int_t	max_m,
		magma_int_t	max_n,
		magma_queue_t	queue
	)

See Also: magmablas_ztrsm_inv_work_vbatched

void magmablas_ztrsm_vbatched	(	magma_side_t	side,
		magma_uplo_t	uplo,
		magma_trans_t	transA,
		magma_diag_t	diag,
		magma_int_t *	m,
		magma_int_t *	n,
		magmaDoubleComplex	alpha,
		magmaDoubleComplex **	dA_array,
		magma_int_t *	ldda,
		magmaDoubleComplex **	dB_array,
		magma_int_t *	lddb,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

ztrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters

[in]	side	magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)X = alphaB. = MagmaRight: Xop(A) = alphaB.
[in]	uplo	magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix.
[in]	transA	magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H.
[in]	diag	magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular.
[in]	m	INTEGER array, dimension(batchCount + 1). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]	n	INTEGER array, dimension(batchCount + 1). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]	alpha	COMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]	ldda	INTEGER array, dimension(batchCount + 1). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]	dB_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X
[in]	lddb	INTEGER array, dimension(batchCount + 1). On entry, LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

Functions

Detailed Description

Function Documentation