MAGMA 2.9.0
Matrix Algebra for GPU and Multicore Architectures
Loading...
Searching...
No Matches

\(C = op(A)^{-1} B \) or \(C = B \;op(A)^{-1}\) where \(A\) is triangular More...

Functions

void magmablas_ctrsm_inv_outofplace_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex **dA_array, magma_int_t ldda, magmaFloatComplex **dB_array, magma_int_t lddb, magmaFloatComplex **dX_array, magma_int_t lddx, magmaFloatComplex **dinvA_array, magma_int_t dinvA_length, magmaFloatComplex **dA_displ, magmaFloatComplex **dB_displ, magmaFloatComplex **dX_displ, magmaFloatComplex **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
 ctrsm_outofplace solves one of the matrix equations on gpu
 
void magmablas_ctrsm_inv_work_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex **dA_array, magma_int_t ldda, magmaFloatComplex **dB_array, magma_int_t lddb, magmaFloatComplex **dX_array, magma_int_t lddx, magmaFloatComplex **dinvA_array, magma_int_t dinvA_length, magmaFloatComplex **dA_displ, magmaFloatComplex **dB_displ, magmaFloatComplex **dX_displ, magmaFloatComplex **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
 ctrsm_work solves one of the matrix equations on gpu
 
void magmablas_ctrsm_inv_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex **dA_array, magma_int_t ldda, magmaFloatComplex **dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue)
 ctrsm solves one of the matrix equations on gpu
 
void magmablas_ctrsm_inv_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, magmaFloatComplex alpha, magmaFloatComplex **dA_array, magma_int_t *ldda, magmaFloatComplex **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_queue_t queue)
 ctrsm solves one of the matrix equations on gpu
 
void magmablas_ctrsm_inv_outofplace_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t *m, magma_int_t *n, magmaFloatComplex alpha, magmaFloatComplex **dA_array, magma_int_t *ldda, magmaFloatComplex **dB_array, magma_int_t *lddb, magmaFloatComplex **dX_array, magma_int_t *lddx, magmaFloatComplex **dinvA_array, magma_int_t *dinvA_length, magmaFloatComplex **dA_displ, magmaFloatComplex **dB_displ, magmaFloatComplex **dX_displ, magmaFloatComplex **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
 ctrsm_outofplace solves one of the matrix equations on gpu
 
void magmablas_ctrsm_inv_work_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t *m, magma_int_t *n, magmaFloatComplex alpha, magmaFloatComplex **dA_array, magma_int_t *ldda, magmaFloatComplex **dB_array, magma_int_t *lddb, magmaFloatComplex **dX_array, magma_int_t *lddx, magmaFloatComplex **dinvA_array, magma_int_t *dinvA_length, magmaFloatComplex **dA_displ, magmaFloatComplex **dB_displ, magmaFloatComplex **dX_displ, magmaFloatComplex **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
 ctrsm_work solves one of the matrix equations on gpu
 
void magmablas_ctrsm_inv_vbatched_max_nocheck (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, magmaFloatComplex alpha, magmaFloatComplex **dA_array, magma_int_t *ldda, magmaFloatComplex **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
 
void magmablas_ctrsm_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, magmaFloatComplex alpha, magmaFloatComplex **dA_array, magma_int_t *ldda, magmaFloatComplex **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_queue_t queue)
 ctrsm solves one of the matrix equations on gpu
 
void magmablas_dtrsm_inv_outofplace_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, double alpha, double **dA_array, magma_int_t ldda, double **dB_array, magma_int_t lddb, double **dX_array, magma_int_t lddx, double **dinvA_array, magma_int_t dinvA_length, double **dA_displ, double **dB_displ, double **dX_displ, double **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
 dtrsm_outofplace solves one of the matrix equations on gpu
 
void magmablas_dtrsm_inv_work_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, double alpha, double **dA_array, magma_int_t ldda, double **dB_array, magma_int_t lddb, double **dX_array, magma_int_t lddx, double **dinvA_array, magma_int_t dinvA_length, double **dA_displ, double **dB_displ, double **dX_displ, double **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
 dtrsm_work solves one of the matrix equations on gpu
 
void magmablas_dtrsm_inv_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, double alpha, double **dA_array, magma_int_t ldda, double **dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue)
 dtrsm solves one of the matrix equations on gpu
 
void magmablas_dtrsm_inv_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, double alpha, double **dA_array, magma_int_t *ldda, double **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_queue_t queue)
 dtrsm solves one of the matrix equations on gpu
 
void magmablas_dtrsm_inv_outofplace_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t *m, magma_int_t *n, double alpha, double **dA_array, magma_int_t *ldda, double **dB_array, magma_int_t *lddb, double **dX_array, magma_int_t *lddx, double **dinvA_array, magma_int_t *dinvA_length, double **dA_displ, double **dB_displ, double **dX_displ, double **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
 dtrsm_outofplace solves one of the matrix equations on gpu
 
void magmablas_dtrsm_inv_work_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t *m, magma_int_t *n, double alpha, double **dA_array, magma_int_t *ldda, double **dB_array, magma_int_t *lddb, double **dX_array, magma_int_t *lddx, double **dinvA_array, magma_int_t *dinvA_length, double **dA_displ, double **dB_displ, double **dX_displ, double **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
 dtrsm_work solves one of the matrix equations on gpu
 
void magmablas_dtrsm_inv_vbatched_max_nocheck (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, double alpha, double **dA_array, magma_int_t *ldda, double **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
 
void magmablas_dtrsm_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, double alpha, double **dA_array, magma_int_t *ldda, double **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_queue_t queue)
 dtrsm solves one of the matrix equations on gpu
 
void magmablas_strsm_inv_outofplace_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, float alpha, float **dA_array, magma_int_t ldda, float **dB_array, magma_int_t lddb, float **dX_array, magma_int_t lddx, float **dinvA_array, magma_int_t dinvA_length, float **dA_displ, float **dB_displ, float **dX_displ, float **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
 strsm_outofplace solves one of the matrix equations on gpu
 
void magmablas_strsm_inv_work_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, float alpha, float **dA_array, magma_int_t ldda, float **dB_array, magma_int_t lddb, float **dX_array, magma_int_t lddx, float **dinvA_array, magma_int_t dinvA_length, float **dA_displ, float **dB_displ, float **dX_displ, float **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
 strsm_work solves one of the matrix equations on gpu
 
void magmablas_strsm_inv_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, float alpha, float **dA_array, magma_int_t ldda, float **dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue)
 strsm solves one of the matrix equations on gpu
 
void magmablas_strsm_inv_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, float alpha, float **dA_array, magma_int_t *ldda, float **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_queue_t queue)
 strsm solves one of the matrix equations on gpu
 
void magmablas_strsm_inv_outofplace_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t *m, magma_int_t *n, float alpha, float **dA_array, magma_int_t *ldda, float **dB_array, magma_int_t *lddb, float **dX_array, magma_int_t *lddx, float **dinvA_array, magma_int_t *dinvA_length, float **dA_displ, float **dB_displ, float **dX_displ, float **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
 strsm_outofplace solves one of the matrix equations on gpu
 
void magmablas_strsm_inv_work_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t *m, magma_int_t *n, float alpha, float **dA_array, magma_int_t *ldda, float **dB_array, magma_int_t *lddb, float **dX_array, magma_int_t *lddx, float **dinvA_array, magma_int_t *dinvA_length, float **dA_displ, float **dB_displ, float **dX_displ, float **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
 strsm_work solves one of the matrix equations on gpu
 
void magmablas_strsm_inv_vbatched_max_nocheck (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, float alpha, float **dA_array, magma_int_t *ldda, float **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
 
void magmablas_strsm_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, float alpha, float **dA_array, magma_int_t *ldda, float **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_queue_t queue)
 strsm solves one of the matrix equations on gpu
 
void magmablas_ztrsm_inv_outofplace_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex **dA_array, magma_int_t ldda, magmaDoubleComplex **dB_array, magma_int_t lddb, magmaDoubleComplex **dX_array, magma_int_t lddx, magmaDoubleComplex **dinvA_array, magma_int_t dinvA_length, magmaDoubleComplex **dA_displ, magmaDoubleComplex **dB_displ, magmaDoubleComplex **dX_displ, magmaDoubleComplex **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
 ztrsm_outofplace solves one of the matrix equations on gpu
 
void magmablas_ztrsm_inv_work_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex **dA_array, magma_int_t ldda, magmaDoubleComplex **dB_array, magma_int_t lddb, magmaDoubleComplex **dX_array, magma_int_t lddx, magmaDoubleComplex **dinvA_array, magma_int_t dinvA_length, magmaDoubleComplex **dA_displ, magmaDoubleComplex **dB_displ, magmaDoubleComplex **dX_displ, magmaDoubleComplex **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
 ztrsm_work solves one of the matrix equations on gpu
 
void magmablas_ztrsm_inv_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex **dA_array, magma_int_t ldda, magmaDoubleComplex **dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue)
 ztrsm solves one of the matrix equations on gpu
 
void magmablas_ztrsm_inv_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, magmaDoubleComplex alpha, magmaDoubleComplex **dA_array, magma_int_t *ldda, magmaDoubleComplex **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_queue_t queue)
 ztrsm solves one of the matrix equations on gpu
 
void magmablas_ztrsm_inv_outofplace_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t *m, magma_int_t *n, magmaDoubleComplex alpha, magmaDoubleComplex **dA_array, magma_int_t *ldda, magmaDoubleComplex **dB_array, magma_int_t *lddb, magmaDoubleComplex **dX_array, magma_int_t *lddx, magmaDoubleComplex **dinvA_array, magma_int_t *dinvA_length, magmaDoubleComplex **dA_displ, magmaDoubleComplex **dB_displ, magmaDoubleComplex **dX_displ, magmaDoubleComplex **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
 ztrsm_outofplace solves one of the matrix equations on gpu
 
void magmablas_ztrsm_inv_work_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t *m, magma_int_t *n, magmaDoubleComplex alpha, magmaDoubleComplex **dA_array, magma_int_t *ldda, magmaDoubleComplex **dB_array, magma_int_t *lddb, magmaDoubleComplex **dX_array, magma_int_t *lddx, magmaDoubleComplex **dinvA_array, magma_int_t *dinvA_length, magmaDoubleComplex **dA_displ, magmaDoubleComplex **dB_displ, magmaDoubleComplex **dX_displ, magmaDoubleComplex **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
 ztrsm_work solves one of the matrix equations on gpu
 
void magmablas_ztrsm_inv_vbatched_max_nocheck (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, magmaDoubleComplex alpha, magmaDoubleComplex **dA_array, magma_int_t *ldda, magmaDoubleComplex **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
 
void magmablas_ztrsm_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, magmaDoubleComplex alpha, magmaDoubleComplex **dA_array, magma_int_t *ldda, magmaDoubleComplex **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_queue_t queue)
 ztrsm solves one of the matrix equations on gpu
 

Detailed Description

\(C = op(A)^{-1} B \) or \(C = B \;op(A)^{-1}\) where \(A\) is triangular

Function Documentation

◆ magmablas_ctrsm_inv_outofplace_batched()

void magmablas_ctrsm_inv_outofplace_batched ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t flag,
magma_int_t m,
magma_int_t n,
magmaFloatComplex alpha,
magmaFloatComplex ** dA_array,
magma_int_t ldda,
magmaFloatComplex ** dB_array,
magma_int_t lddb,
magmaFloatComplex ** dX_array,
magma_int_t lddx,
magmaFloatComplex ** dinvA_array,
magma_int_t dinvA_length,
magmaFloatComplex ** dA_displ,
magmaFloatComplex ** dB_displ,
magmaFloatComplex ** dX_displ,
magmaFloatComplex ** dinvA_displ,
magma_int_t resetozero,
magma_int_t batchCount,
magma_queue_t queue )

ctrsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]nINTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]alphaCOMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/CTRTRI_BATCHED_NB)*CTRTRI_BATCHED_NB*CTRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(n/CTRTRI_BATCHED_NB)*CTRTRI_BATCHED_NB*CTRTRI_BATCHED_NB
[in]dinvA_lengthINTEGER The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by CTRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magmablas_ctrsm_inv_work_batched()

void magmablas_ctrsm_inv_work_batched ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t flag,
magma_int_t m,
magma_int_t n,
magmaFloatComplex alpha,
magmaFloatComplex ** dA_array,
magma_int_t ldda,
magmaFloatComplex ** dB_array,
magma_int_t lddb,
magmaFloatComplex ** dX_array,
magma_int_t lddx,
magmaFloatComplex ** dinvA_array,
magma_int_t dinvA_length,
magmaFloatComplex ** dA_displ,
magmaFloatComplex ** dB_displ,
magmaFloatComplex ** dX_displ,
magmaFloatComplex ** dinvA_displ,
magma_int_t resetozero,
magma_int_t batchCount,
magma_queue_t queue )

ctrsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]nINTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]alphaCOMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/CTRTRI_BATCHED_NB)*CTRTRI_BATCHED_NB*CTRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(n/CTRTRI_BATCHED_NB)*CTRTRI_BATCHED_NB*CTRTRI_BATCHED_NB, where CTRTRI_BATCHED_NB = 128.
[in]dinvA_lengthINTEGER The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by CTRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magmablas_ctrsm_inv_batched()

void magmablas_ctrsm_inv_batched ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t m,
magma_int_t n,
magmaFloatComplex alpha,
magmaFloatComplex ** dA_array,
magma_int_t ldda,
magmaFloatComplex ** dB_array,
magma_int_t lddb,
magma_int_t batchCount,
magma_queue_t queue )

ctrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]mINTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]nINTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]alphaCOMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magmablas_ctrsm_inv_vbatched()

void magmablas_ctrsm_inv_vbatched ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t * m,
magma_int_t * n,
magmaFloatComplex alpha,
magmaFloatComplex ** dA_array,
magma_int_t * ldda,
magmaFloatComplex ** dB_array,
magma_int_t * lddb,
magma_int_t batchCount,
magma_queue_t queue )

ctrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]mINTEGER array, dimension(batchCount + 1). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]nINTEGER array, dimension(batchCount + 1). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]alphaCOMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount + 1). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER array, dimension(batchCount + 1). On entry, LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magmablas_ctrsm_inv_outofplace_vbatched()

void magmablas_ctrsm_inv_outofplace_vbatched ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t flag,
magma_int_t * m,
magma_int_t * n,
magmaFloatComplex alpha,
magmaFloatComplex ** dA_array,
magma_int_t * ldda,
magmaFloatComplex ** dB_array,
magma_int_t * lddb,
magmaFloatComplex ** dX_array,
magma_int_t * lddx,
magmaFloatComplex ** dinvA_array,
magma_int_t * dinvA_length,
magmaFloatComplex ** dA_displ,
magmaFloatComplex ** dB_displ,
magmaFloatComplex ** dX_displ,
magmaFloatComplex ** dinvA_displ,
magma_int_t resetozero,
magma_int_t batchCount,
magma_int_t max_m,
magma_int_t max_n,
magma_queue_t queue )

ctrsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]nINTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]alphaCOMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
[in]lddbINTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. LDDB >= max( 1, M ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. LDDX >= max( 1, M ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/CTRTRI_BATCHED_NB)*CTRTRI_BATCHED_NB*CTRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(N/CTRTRI_BATCHED_NB)*CTRTRI_BATCHED_NB*CTRTRI_BATCHED_NB
[in]dinvA_lengthINTEGER array, dimension(batchCount). The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by CTRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]max_mINTEGER The maximum value in m.
[in]max_nINTEGER The maximum value in n.
[in]queuemagma_queue_t Queue to execute in.

◆ magmablas_ctrsm_inv_work_vbatched()

void magmablas_ctrsm_inv_work_vbatched ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t flag,
magma_int_t * m,
magma_int_t * n,
magmaFloatComplex alpha,
magmaFloatComplex ** dA_array,
magma_int_t * ldda,
magmaFloatComplex ** dB_array,
magma_int_t * lddb,
magmaFloatComplex ** dX_array,
magma_int_t * lddx,
magmaFloatComplex ** dinvA_array,
magma_int_t * dinvA_length,
magmaFloatComplex ** dA_displ,
magmaFloatComplex ** dB_displ,
magmaFloatComplex ** dX_displ,
magmaFloatComplex ** dinvA_displ,
magma_int_t resetozero,
magma_int_t batchCount,
magma_int_t max_m,
magma_int_t max_n,
magma_queue_t queue )

ctrsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of each B. M >= 0.
[in]nINTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of each B. N >= 0.
[in]alphaCOMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. lddx >= max( 1, M ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/CTRTRI_BATCHED_NB)*CTRTRI_BATCHED_NB*CTRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(N/CTRTRI_BATCHED_NB)*CTRTRI_BATCHED_NB*CTRTRI_BATCHED_NB.
[in]dinvA_lengthINTEGER array, dimension(batchCount). The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by CTRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]max_mINTEGER The maximum value in m.
[in]max_nINTEGER The maximum value in n.
[in]queuemagma_queue_t Queue to execute in.

◆ magmablas_ctrsm_inv_vbatched_max_nocheck()

void magmablas_ctrsm_inv_vbatched_max_nocheck ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t * m,
magma_int_t * n,
magmaFloatComplex alpha,
magmaFloatComplex ** dA_array,
magma_int_t * ldda,
magmaFloatComplex ** dB_array,
magma_int_t * lddb,
magma_int_t batchCount,
magma_int_t max_m,
magma_int_t max_n,
magma_queue_t queue )

◆ magmablas_ctrsm_vbatched()

void magmablas_ctrsm_vbatched ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t * m,
magma_int_t * n,
magmaFloatComplex alpha,
magmaFloatComplex ** dA_array,
magma_int_t * ldda,
magmaFloatComplex ** dB_array,
magma_int_t * lddb,
magma_int_t batchCount,
magma_queue_t queue )

ctrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]mINTEGER array, dimension(batchCount + 1). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]nINTEGER array, dimension(batchCount + 1). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]alphaCOMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount + 1). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER array, dimension(batchCount + 1). On entry, LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magmablas_dtrsm_inv_outofplace_batched()

void magmablas_dtrsm_inv_outofplace_batched ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t flag,
magma_int_t m,
magma_int_t n,
double alpha,
double ** dA_array,
magma_int_t ldda,
double ** dB_array,
magma_int_t lddb,
double ** dX_array,
magma_int_t lddx,
double ** dinvA_array,
magma_int_t dinvA_length,
double ** dA_displ,
double ** dB_displ,
double ** dX_displ,
double ** dinvA_displ,
magma_int_t resetozero,
magma_int_t batchCount,
magma_queue_t queue )

dtrsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_dtrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]nINTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]alphaDOUBLE PRECISION. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/DTRTRI_BATCHED_NB)*DTRTRI_BATCHED_NB*DTRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(n/DTRTRI_BATCHED_NB)*DTRTRI_BATCHED_NB*DTRTRI_BATCHED_NB
[in]dinvA_lengthINTEGER The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by DTRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magmablas_dtrsm_inv_work_batched()

void magmablas_dtrsm_inv_work_batched ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t flag,
magma_int_t m,
magma_int_t n,
double alpha,
double ** dA_array,
magma_int_t ldda,
double ** dB_array,
magma_int_t lddb,
double ** dX_array,
magma_int_t lddx,
double ** dinvA_array,
magma_int_t dinvA_length,
double ** dA_displ,
double ** dB_displ,
double ** dX_displ,
double ** dinvA_displ,
magma_int_t resetozero,
magma_int_t batchCount,
magma_queue_t queue )

dtrsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_dtrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]nINTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]alphaDOUBLE PRECISION. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/DTRTRI_BATCHED_NB)*DTRTRI_BATCHED_NB*DTRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(n/DTRTRI_BATCHED_NB)*DTRTRI_BATCHED_NB*DTRTRI_BATCHED_NB, where DTRTRI_BATCHED_NB = 128.
[in]dinvA_lengthINTEGER The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by DTRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magmablas_dtrsm_inv_batched()

void magmablas_dtrsm_inv_batched ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t m,
magma_int_t n,
double alpha,
double ** dA_array,
magma_int_t ldda,
double ** dB_array,
magma_int_t lddb,
magma_int_t batchCount,
magma_queue_t queue )

dtrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_dtrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]mINTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]nINTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]alphaDOUBLE PRECISION. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magmablas_dtrsm_inv_vbatched()

void magmablas_dtrsm_inv_vbatched ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t * m,
magma_int_t * n,
double alpha,
double ** dA_array,
magma_int_t * ldda,
double ** dB_array,
magma_int_t * lddb,
magma_int_t batchCount,
magma_queue_t queue )

dtrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_dtrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]mINTEGER array, dimension(batchCount + 1). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]nINTEGER array, dimension(batchCount + 1). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]alphaDOUBLE PRECISION. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount + 1). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER array, dimension(batchCount + 1). On entry, LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magmablas_dtrsm_inv_outofplace_vbatched()

void magmablas_dtrsm_inv_outofplace_vbatched ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t flag,
magma_int_t * m,
magma_int_t * n,
double alpha,
double ** dA_array,
magma_int_t * ldda,
double ** dB_array,
magma_int_t * lddb,
double ** dX_array,
magma_int_t * lddx,
double ** dinvA_array,
magma_int_t * dinvA_length,
double ** dA_displ,
double ** dB_displ,
double ** dX_displ,
double ** dinvA_displ,
magma_int_t resetozero,
magma_int_t batchCount,
magma_int_t max_m,
magma_int_t max_n,
magma_queue_t queue )

dtrsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_dtrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]nINTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]alphaDOUBLE PRECISION. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
[in]lddbINTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. LDDB >= max( 1, M ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. LDDX >= max( 1, M ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/DTRTRI_BATCHED_NB)*DTRTRI_BATCHED_NB*DTRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(N/DTRTRI_BATCHED_NB)*DTRTRI_BATCHED_NB*DTRTRI_BATCHED_NB
[in]dinvA_lengthINTEGER array, dimension(batchCount). The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by DTRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]max_mINTEGER The maximum value in m.
[in]max_nINTEGER The maximum value in n.
[in]queuemagma_queue_t Queue to execute in.

◆ magmablas_dtrsm_inv_work_vbatched()

void magmablas_dtrsm_inv_work_vbatched ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t flag,
magma_int_t * m,
magma_int_t * n,
double alpha,
double ** dA_array,
magma_int_t * ldda,
double ** dB_array,
magma_int_t * lddb,
double ** dX_array,
magma_int_t * lddx,
double ** dinvA_array,
magma_int_t * dinvA_length,
double ** dA_displ,
double ** dB_displ,
double ** dX_displ,
double ** dinvA_displ,
magma_int_t resetozero,
magma_int_t batchCount,
magma_int_t max_m,
magma_int_t max_n,
magma_queue_t queue )

dtrsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_dtrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of each B. M >= 0.
[in]nINTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of each B. N >= 0.
[in]alphaDOUBLE PRECISION. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. lddx >= max( 1, M ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/DTRTRI_BATCHED_NB)*DTRTRI_BATCHED_NB*DTRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(N/DTRTRI_BATCHED_NB)*DTRTRI_BATCHED_NB*DTRTRI_BATCHED_NB.
[in]dinvA_lengthINTEGER array, dimension(batchCount). The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by DTRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]max_mINTEGER The maximum value in m.
[in]max_nINTEGER The maximum value in n.
[in]queuemagma_queue_t Queue to execute in.

◆ magmablas_dtrsm_inv_vbatched_max_nocheck()

void magmablas_dtrsm_inv_vbatched_max_nocheck ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t * m,
magma_int_t * n,
double alpha,
double ** dA_array,
magma_int_t * ldda,
double ** dB_array,
magma_int_t * lddb,
magma_int_t batchCount,
magma_int_t max_m,
magma_int_t max_n,
magma_queue_t queue )

◆ magmablas_dtrsm_vbatched()

void magmablas_dtrsm_vbatched ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t * m,
magma_int_t * n,
double alpha,
double ** dA_array,
magma_int_t * ldda,
double ** dB_array,
magma_int_t * lddb,
magma_int_t batchCount,
magma_queue_t queue )

dtrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_dtrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]mINTEGER array, dimension(batchCount + 1). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]nINTEGER array, dimension(batchCount + 1). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]alphaDOUBLE PRECISION. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount + 1). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER array, dimension(batchCount + 1). On entry, LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magmablas_strsm_inv_outofplace_batched()

void magmablas_strsm_inv_outofplace_batched ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t flag,
magma_int_t m,
magma_int_t n,
float alpha,
float ** dA_array,
magma_int_t ldda,
float ** dB_array,
magma_int_t lddb,
float ** dX_array,
magma_int_t lddx,
float ** dinvA_array,
magma_int_t dinvA_length,
float ** dA_displ,
float ** dB_displ,
float ** dX_displ,
float ** dinvA_displ,
magma_int_t resetozero,
magma_int_t batchCount,
magma_queue_t queue )

strsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_strsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]nINTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]alphaREAL. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a REAL array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a REAL array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a REAL array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a REAL array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/STRTRI_BATCHED_NB)*STRTRI_BATCHED_NB*STRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(n/STRTRI_BATCHED_NB)*STRTRI_BATCHED_NB*STRTRI_BATCHED_NB
[in]dinvA_lengthINTEGER The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by STRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magmablas_strsm_inv_work_batched()

void magmablas_strsm_inv_work_batched ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t flag,
magma_int_t m,
magma_int_t n,
float alpha,
float ** dA_array,
magma_int_t ldda,
float ** dB_array,
magma_int_t lddb,
float ** dX_array,
magma_int_t lddx,
float ** dinvA_array,
magma_int_t dinvA_length,
float ** dA_displ,
float ** dB_displ,
float ** dX_displ,
float ** dinvA_displ,
magma_int_t resetozero,
magma_int_t batchCount,
magma_queue_t queue )

strsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_strsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]nINTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]alphaREAL. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a REAL array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a REAL array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a REAL array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a REAL array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/STRTRI_BATCHED_NB)*STRTRI_BATCHED_NB*STRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(n/STRTRI_BATCHED_NB)*STRTRI_BATCHED_NB*STRTRI_BATCHED_NB, where STRTRI_BATCHED_NB = 128.
[in]dinvA_lengthINTEGER The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by STRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magmablas_strsm_inv_batched()

void magmablas_strsm_inv_batched ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t m,
magma_int_t n,
float alpha,
float ** dA_array,
magma_int_t ldda,
float ** dB_array,
magma_int_t lddb,
magma_int_t batchCount,
magma_queue_t queue )

strsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_strsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]mINTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]nINTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]alphaREAL. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a REAL array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a REAL array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magmablas_strsm_inv_vbatched()

void magmablas_strsm_inv_vbatched ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t * m,
magma_int_t * n,
float alpha,
float ** dA_array,
magma_int_t * ldda,
float ** dB_array,
magma_int_t * lddb,
magma_int_t batchCount,
magma_queue_t queue )

strsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_strsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]mINTEGER array, dimension(batchCount + 1). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]nINTEGER array, dimension(batchCount + 1). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]alphaREAL. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a REAL array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount + 1). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a REAL array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER array, dimension(batchCount + 1). On entry, LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magmablas_strsm_inv_outofplace_vbatched()

void magmablas_strsm_inv_outofplace_vbatched ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t flag,
magma_int_t * m,
magma_int_t * n,
float alpha,
float ** dA_array,
magma_int_t * ldda,
float ** dB_array,
magma_int_t * lddb,
float ** dX_array,
magma_int_t * lddx,
float ** dinvA_array,
magma_int_t * dinvA_length,
float ** dA_displ,
float ** dB_displ,
float ** dX_displ,
float ** dinvA_displ,
magma_int_t resetozero,
magma_int_t batchCount,
magma_int_t max_m,
magma_int_t max_n,
magma_queue_t queue )

strsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_strsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]nINTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]alphaREAL. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a REAL array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a REAL array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
[in]lddbINTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. LDDB >= max( 1, M ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a REAL array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. LDDX >= max( 1, M ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a REAL array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/STRTRI_BATCHED_NB)*STRTRI_BATCHED_NB*STRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(N/STRTRI_BATCHED_NB)*STRTRI_BATCHED_NB*STRTRI_BATCHED_NB
[in]dinvA_lengthINTEGER array, dimension(batchCount). The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by STRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]max_mINTEGER The maximum value in m.
[in]max_nINTEGER The maximum value in n.
[in]queuemagma_queue_t Queue to execute in.

◆ magmablas_strsm_inv_work_vbatched()

void magmablas_strsm_inv_work_vbatched ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t flag,
magma_int_t * m,
magma_int_t * n,
float alpha,
float ** dA_array,
magma_int_t * ldda,
float ** dB_array,
magma_int_t * lddb,
float ** dX_array,
magma_int_t * lddx,
float ** dinvA_array,
magma_int_t * dinvA_length,
float ** dA_displ,
float ** dB_displ,
float ** dX_displ,
float ** dinvA_displ,
magma_int_t resetozero,
magma_int_t batchCount,
magma_int_t max_m,
magma_int_t max_n,
magma_queue_t queue )

strsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_strsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of each B. M >= 0.
[in]nINTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of each B. N >= 0.
[in]alphaREAL. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a REAL array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a REAL array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a REAL array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. lddx >= max( 1, M ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a REAL array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/STRTRI_BATCHED_NB)*STRTRI_BATCHED_NB*STRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(N/STRTRI_BATCHED_NB)*STRTRI_BATCHED_NB*STRTRI_BATCHED_NB.
[in]dinvA_lengthINTEGER array, dimension(batchCount). The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by STRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]max_mINTEGER The maximum value in m.
[in]max_nINTEGER The maximum value in n.
[in]queuemagma_queue_t Queue to execute in.

◆ magmablas_strsm_inv_vbatched_max_nocheck()

void magmablas_strsm_inv_vbatched_max_nocheck ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t * m,
magma_int_t * n,
float alpha,
float ** dA_array,
magma_int_t * ldda,
float ** dB_array,
magma_int_t * lddb,
magma_int_t batchCount,
magma_int_t max_m,
magma_int_t max_n,
magma_queue_t queue )

◆ magmablas_strsm_vbatched()

void magmablas_strsm_vbatched ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t * m,
magma_int_t * n,
float alpha,
float ** dA_array,
magma_int_t * ldda,
float ** dB_array,
magma_int_t * lddb,
magma_int_t batchCount,
magma_queue_t queue )

strsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_strsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]mINTEGER array, dimension(batchCount + 1). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]nINTEGER array, dimension(batchCount + 1). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]alphaREAL. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a REAL array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount + 1). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a REAL array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER array, dimension(batchCount + 1). On entry, LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magmablas_ztrsm_inv_outofplace_batched()

void magmablas_ztrsm_inv_outofplace_batched ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t flag,
magma_int_t m,
magma_int_t n,
magmaDoubleComplex alpha,
magmaDoubleComplex ** dA_array,
magma_int_t ldda,
magmaDoubleComplex ** dB_array,
magma_int_t lddb,
magmaDoubleComplex ** dX_array,
magma_int_t lddx,
magmaDoubleComplex ** dinvA_array,
magma_int_t dinvA_length,
magmaDoubleComplex ** dA_displ,
magmaDoubleComplex ** dB_displ,
magmaDoubleComplex ** dX_displ,
magmaDoubleComplex ** dinvA_displ,
magma_int_t resetozero,
magma_int_t batchCount,
magma_queue_t queue )

ztrsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]nINTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]alphaCOMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/ZTRTRI_BATCHED_NB)*ZTRTRI_BATCHED_NB*ZTRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(n/ZTRTRI_BATCHED_NB)*ZTRTRI_BATCHED_NB*ZTRTRI_BATCHED_NB
[in]dinvA_lengthINTEGER The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by ZTRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magmablas_ztrsm_inv_work_batched()

void magmablas_ztrsm_inv_work_batched ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t flag,
magma_int_t m,
magma_int_t n,
magmaDoubleComplex alpha,
magmaDoubleComplex ** dA_array,
magma_int_t ldda,
magmaDoubleComplex ** dB_array,
magma_int_t lddb,
magmaDoubleComplex ** dX_array,
magma_int_t lddx,
magmaDoubleComplex ** dinvA_array,
magma_int_t dinvA_length,
magmaDoubleComplex ** dA_displ,
magmaDoubleComplex ** dB_displ,
magmaDoubleComplex ** dX_displ,
magmaDoubleComplex ** dinvA_displ,
magma_int_t resetozero,
magma_int_t batchCount,
magma_queue_t queue )

ztrsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]nINTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]alphaCOMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/ZTRTRI_BATCHED_NB)*ZTRTRI_BATCHED_NB*ZTRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(n/ZTRTRI_BATCHED_NB)*ZTRTRI_BATCHED_NB*ZTRTRI_BATCHED_NB, where ZTRTRI_BATCHED_NB = 128.
[in]dinvA_lengthINTEGER The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by ZTRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magmablas_ztrsm_inv_batched()

void magmablas_ztrsm_inv_batched ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t m,
magma_int_t n,
magmaDoubleComplex alpha,
magmaDoubleComplex ** dA_array,
magma_int_t ldda,
magmaDoubleComplex ** dB_array,
magma_int_t lddb,
magma_int_t batchCount,
magma_queue_t queue )

ztrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]mINTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]nINTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]alphaCOMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magmablas_ztrsm_inv_vbatched()

void magmablas_ztrsm_inv_vbatched ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t * m,
magma_int_t * n,
magmaDoubleComplex alpha,
magmaDoubleComplex ** dA_array,
magma_int_t * ldda,
magmaDoubleComplex ** dB_array,
magma_int_t * lddb,
magma_int_t batchCount,
magma_queue_t queue )

ztrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]mINTEGER array, dimension(batchCount + 1). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]nINTEGER array, dimension(batchCount + 1). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]alphaCOMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount + 1). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER array, dimension(batchCount + 1). On entry, LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.

◆ magmablas_ztrsm_inv_outofplace_vbatched()

void magmablas_ztrsm_inv_outofplace_vbatched ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t flag,
magma_int_t * m,
magma_int_t * n,
magmaDoubleComplex alpha,
magmaDoubleComplex ** dA_array,
magma_int_t * ldda,
magmaDoubleComplex ** dB_array,
magma_int_t * lddb,
magmaDoubleComplex ** dX_array,
magma_int_t * lddx,
magmaDoubleComplex ** dinvA_array,
magma_int_t * dinvA_length,
magmaDoubleComplex ** dA_displ,
magmaDoubleComplex ** dB_displ,
magmaDoubleComplex ** dX_displ,
magmaDoubleComplex ** dinvA_displ,
magma_int_t resetozero,
magma_int_t batchCount,
magma_int_t max_m,
magma_int_t max_n,
magma_queue_t queue )

ztrsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]nINTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]alphaCOMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
[in]lddbINTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. LDDB >= max( 1, M ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. LDDX >= max( 1, M ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/ZTRTRI_BATCHED_NB)*ZTRTRI_BATCHED_NB*ZTRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(N/ZTRTRI_BATCHED_NB)*ZTRTRI_BATCHED_NB*ZTRTRI_BATCHED_NB
[in]dinvA_lengthINTEGER array, dimension(batchCount). The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by ZTRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]max_mINTEGER The maximum value in m.
[in]max_nINTEGER The maximum value in n.
[in]queuemagma_queue_t Queue to execute in.

◆ magmablas_ztrsm_inv_work_vbatched()

void magmablas_ztrsm_inv_work_vbatched ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t flag,
magma_int_t * m,
magma_int_t * n,
magmaDoubleComplex alpha,
magmaDoubleComplex ** dA_array,
magma_int_t * ldda,
magmaDoubleComplex ** dB_array,
magma_int_t * lddb,
magmaDoubleComplex ** dX_array,
magma_int_t * lddx,
magmaDoubleComplex ** dinvA_array,
magma_int_t * dinvA_length,
magmaDoubleComplex ** dA_displ,
magmaDoubleComplex ** dB_displ,
magmaDoubleComplex ** dX_displ,
magmaDoubleComplex ** dinvA_displ,
magma_int_t resetozero,
magma_int_t batchCount,
magma_int_t max_m,
magma_int_t max_n,
magma_queue_t queue )

ztrsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of each B. M >= 0.
[in]nINTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of each B. N >= 0.
[in]alphaCOMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. lddx >= max( 1, M ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/ZTRTRI_BATCHED_NB)*ZTRTRI_BATCHED_NB*ZTRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(N/ZTRTRI_BATCHED_NB)*ZTRTRI_BATCHED_NB*ZTRTRI_BATCHED_NB.
[in]dinvA_lengthINTEGER array, dimension(batchCount). The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by ZTRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]max_mINTEGER The maximum value in m.
[in]max_nINTEGER The maximum value in n.
[in]queuemagma_queue_t Queue to execute in.

◆ magmablas_ztrsm_inv_vbatched_max_nocheck()

void magmablas_ztrsm_inv_vbatched_max_nocheck ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t * m,
magma_int_t * n,
magmaDoubleComplex alpha,
magmaDoubleComplex ** dA_array,
magma_int_t * ldda,
magmaDoubleComplex ** dB_array,
magma_int_t * lddb,
magma_int_t batchCount,
magma_int_t max_m,
magma_int_t max_n,
magma_queue_t queue )

◆ magmablas_ztrsm_vbatched()

void magmablas_ztrsm_vbatched ( magma_side_t side,
magma_uplo_t uplo,
magma_trans_t transA,
magma_diag_t diag,
magma_int_t * m,
magma_int_t * n,
magmaDoubleComplex alpha,
magmaDoubleComplex ** dA_array,
magma_int_t * ldda,
magmaDoubleComplex ** dB_array,
magma_int_t * lddb,
magma_int_t batchCount,
magma_queue_t queue )

ztrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]mINTEGER array, dimension(batchCount + 1). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]nINTEGER array, dimension(batchCount + 1). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]alphaCOMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount + 1). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER array, dimension(batchCount + 1). On entry, LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.