MAGMA  2.7.1
Matrix Algebra for GPU and Multicore Architectures
 All Classes Files Functions Friends Groups Pages

\( C = op(A)^{-1} B \) or \( C = B \;op(A)^{-1} \) where \( A \) is triangular More...

Functions

void magmablas_ctrsm_inv_outofplace_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex **dA_array, magma_int_t ldda, magmaFloatComplex **dB_array, magma_int_t lddb, magmaFloatComplex **dX_array, magma_int_t lddx, magmaFloatComplex **dinvA_array, magma_int_t dinvA_length, magmaFloatComplex **dA_displ, magmaFloatComplex **dB_displ, magmaFloatComplex **dX_displ, magmaFloatComplex **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
 ctrsm_outofplace solves one of the matrix equations on gpu More...
 
void magmablas_ctrsm_inv_work_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex **dA_array, magma_int_t ldda, magmaFloatComplex **dB_array, magma_int_t lddb, magmaFloatComplex **dX_array, magma_int_t lddx, magmaFloatComplex **dinvA_array, magma_int_t dinvA_length, magmaFloatComplex **dA_displ, magmaFloatComplex **dB_displ, magmaFloatComplex **dX_displ, magmaFloatComplex **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
 ctrsm_work solves one of the matrix equations on gpu More...
 
void magmablas_ctrsm_inv_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex **dA_array, magma_int_t ldda, magmaFloatComplex **dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue)
 ctrsm solves one of the matrix equations on gpu More...
 
void magmablas_ctrsm_inv_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, magmaFloatComplex alpha, magmaFloatComplex **dA_array, magma_int_t *ldda, magmaFloatComplex **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_queue_t queue)
 ctrsm solves one of the matrix equations on gpu More...
 
void magmablas_ctrsm_inv_outofplace_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t *m, magma_int_t *n, magmaFloatComplex alpha, magmaFloatComplex **dA_array, magma_int_t *ldda, magmaFloatComplex **dB_array, magma_int_t *lddb, magmaFloatComplex **dX_array, magma_int_t *lddx, magmaFloatComplex **dinvA_array, magma_int_t *dinvA_length, magmaFloatComplex **dA_displ, magmaFloatComplex **dB_displ, magmaFloatComplex **dX_displ, magmaFloatComplex **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
 ctrsm_outofplace solves one of the matrix equations on gpu More...
 
void magmablas_ctrsm_inv_work_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t *m, magma_int_t *n, magmaFloatComplex alpha, magmaFloatComplex **dA_array, magma_int_t *ldda, magmaFloatComplex **dB_array, magma_int_t *lddb, magmaFloatComplex **dX_array, magma_int_t *lddx, magmaFloatComplex **dinvA_array, magma_int_t *dinvA_length, magmaFloatComplex **dA_displ, magmaFloatComplex **dB_displ, magmaFloatComplex **dX_displ, magmaFloatComplex **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
 ctrsm_work solves one of the matrix equations on gpu More...
 
void magmablas_ctrsm_inv_vbatched_max_nocheck (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, magmaFloatComplex alpha, magmaFloatComplex **dA_array, magma_int_t *ldda, magmaFloatComplex **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
 
void magmablas_ctrsm_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, magmaFloatComplex alpha, magmaFloatComplex **dA_array, magma_int_t *ldda, magmaFloatComplex **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_queue_t queue)
 ctrsm solves one of the matrix equations on gpu More...
 
void magmablas_dtrsm_inv_outofplace_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, double alpha, double **dA_array, magma_int_t ldda, double **dB_array, magma_int_t lddb, double **dX_array, magma_int_t lddx, double **dinvA_array, magma_int_t dinvA_length, double **dA_displ, double **dB_displ, double **dX_displ, double **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
 dtrsm_outofplace solves one of the matrix equations on gpu More...
 
void magmablas_dtrsm_inv_work_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, double alpha, double **dA_array, magma_int_t ldda, double **dB_array, magma_int_t lddb, double **dX_array, magma_int_t lddx, double **dinvA_array, magma_int_t dinvA_length, double **dA_displ, double **dB_displ, double **dX_displ, double **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
 dtrsm_work solves one of the matrix equations on gpu More...
 
void magmablas_dtrsm_inv_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, double alpha, double **dA_array, magma_int_t ldda, double **dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue)
 dtrsm solves one of the matrix equations on gpu More...
 
void magmablas_dtrsm_inv_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, double alpha, double **dA_array, magma_int_t *ldda, double **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_queue_t queue)
 dtrsm solves one of the matrix equations on gpu More...
 
void magmablas_dtrsm_inv_outofplace_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t *m, magma_int_t *n, double alpha, double **dA_array, magma_int_t *ldda, double **dB_array, magma_int_t *lddb, double **dX_array, magma_int_t *lddx, double **dinvA_array, magma_int_t *dinvA_length, double **dA_displ, double **dB_displ, double **dX_displ, double **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
 dtrsm_outofplace solves one of the matrix equations on gpu More...
 
void magmablas_dtrsm_inv_work_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t *m, magma_int_t *n, double alpha, double **dA_array, magma_int_t *ldda, double **dB_array, magma_int_t *lddb, double **dX_array, magma_int_t *lddx, double **dinvA_array, magma_int_t *dinvA_length, double **dA_displ, double **dB_displ, double **dX_displ, double **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
 dtrsm_work solves one of the matrix equations on gpu More...
 
void magmablas_dtrsm_inv_vbatched_max_nocheck (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, double alpha, double **dA_array, magma_int_t *ldda, double **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
 
void magmablas_dtrsm_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, double alpha, double **dA_array, magma_int_t *ldda, double **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_queue_t queue)
 dtrsm solves one of the matrix equations on gpu More...
 
void magmablas_strsm_inv_outofplace_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, float alpha, float **dA_array, magma_int_t ldda, float **dB_array, magma_int_t lddb, float **dX_array, magma_int_t lddx, float **dinvA_array, magma_int_t dinvA_length, float **dA_displ, float **dB_displ, float **dX_displ, float **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
 strsm_outofplace solves one of the matrix equations on gpu More...
 
void magmablas_strsm_inv_work_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, float alpha, float **dA_array, magma_int_t ldda, float **dB_array, magma_int_t lddb, float **dX_array, magma_int_t lddx, float **dinvA_array, magma_int_t dinvA_length, float **dA_displ, float **dB_displ, float **dX_displ, float **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
 strsm_work solves one of the matrix equations on gpu More...
 
void magmablas_strsm_inv_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, float alpha, float **dA_array, magma_int_t ldda, float **dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue)
 strsm solves one of the matrix equations on gpu More...
 
void magmablas_strsm_inv_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, float alpha, float **dA_array, magma_int_t *ldda, float **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_queue_t queue)
 strsm solves one of the matrix equations on gpu More...
 
void magmablas_strsm_inv_outofplace_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t *m, magma_int_t *n, float alpha, float **dA_array, magma_int_t *ldda, float **dB_array, magma_int_t *lddb, float **dX_array, magma_int_t *lddx, float **dinvA_array, magma_int_t *dinvA_length, float **dA_displ, float **dB_displ, float **dX_displ, float **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
 strsm_outofplace solves one of the matrix equations on gpu More...
 
void magmablas_strsm_inv_work_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t *m, magma_int_t *n, float alpha, float **dA_array, magma_int_t *ldda, float **dB_array, magma_int_t *lddb, float **dX_array, magma_int_t *lddx, float **dinvA_array, magma_int_t *dinvA_length, float **dA_displ, float **dB_displ, float **dX_displ, float **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
 strsm_work solves one of the matrix equations on gpu More...
 
void magmablas_strsm_inv_vbatched_max_nocheck (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, float alpha, float **dA_array, magma_int_t *ldda, float **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
 
void magmablas_strsm_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, float alpha, float **dA_array, magma_int_t *ldda, float **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_queue_t queue)
 strsm solves one of the matrix equations on gpu More...
 
void magmablas_ztrsm_inv_outofplace_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex **dA_array, magma_int_t ldda, magmaDoubleComplex **dB_array, magma_int_t lddb, magmaDoubleComplex **dX_array, magma_int_t lddx, magmaDoubleComplex **dinvA_array, magma_int_t dinvA_length, magmaDoubleComplex **dA_displ, magmaDoubleComplex **dB_displ, magmaDoubleComplex **dX_displ, magmaDoubleComplex **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
 ztrsm_outofplace solves one of the matrix equations on gpu More...
 
void magmablas_ztrsm_inv_work_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex **dA_array, magma_int_t ldda, magmaDoubleComplex **dB_array, magma_int_t lddb, magmaDoubleComplex **dX_array, magma_int_t lddx, magmaDoubleComplex **dinvA_array, magma_int_t dinvA_length, magmaDoubleComplex **dA_displ, magmaDoubleComplex **dB_displ, magmaDoubleComplex **dX_displ, magmaDoubleComplex **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
 ztrsm_work solves one of the matrix equations on gpu More...
 
void magmablas_ztrsm_inv_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex **dA_array, magma_int_t ldda, magmaDoubleComplex **dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue)
 ztrsm solves one of the matrix equations on gpu More...
 
void magmablas_ztrsm_inv_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, magmaDoubleComplex alpha, magmaDoubleComplex **dA_array, magma_int_t *ldda, magmaDoubleComplex **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_queue_t queue)
 ztrsm solves one of the matrix equations on gpu More...
 
void magmablas_ztrsm_inv_outofplace_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t *m, magma_int_t *n, magmaDoubleComplex alpha, magmaDoubleComplex **dA_array, magma_int_t *ldda, magmaDoubleComplex **dB_array, magma_int_t *lddb, magmaDoubleComplex **dX_array, magma_int_t *lddx, magmaDoubleComplex **dinvA_array, magma_int_t *dinvA_length, magmaDoubleComplex **dA_displ, magmaDoubleComplex **dB_displ, magmaDoubleComplex **dX_displ, magmaDoubleComplex **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
 ztrsm_outofplace solves one of the matrix equations on gpu More...
 
void magmablas_ztrsm_inv_work_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t *m, magma_int_t *n, magmaDoubleComplex alpha, magmaDoubleComplex **dA_array, magma_int_t *ldda, magmaDoubleComplex **dB_array, magma_int_t *lddb, magmaDoubleComplex **dX_array, magma_int_t *lddx, magmaDoubleComplex **dinvA_array, magma_int_t *dinvA_length, magmaDoubleComplex **dA_displ, magmaDoubleComplex **dB_displ, magmaDoubleComplex **dX_displ, magmaDoubleComplex **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
 ztrsm_work solves one of the matrix equations on gpu More...
 
void magmablas_ztrsm_inv_vbatched_max_nocheck (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, magmaDoubleComplex alpha, magmaDoubleComplex **dA_array, magma_int_t *ldda, magmaDoubleComplex **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
 
void magmablas_ztrsm_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, magmaDoubleComplex alpha, magmaDoubleComplex **dA_array, magma_int_t *ldda, magmaDoubleComplex **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_queue_t queue)
 ztrsm solves one of the matrix equations on gpu More...
 

Detailed Description

\( C = op(A)^{-1} B \) or \( C = B \;op(A)^{-1} \) where \( A \) is triangular

Function Documentation

void magmablas_ctrsm_inv_outofplace_batched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  flag,
magma_int_t  m,
magma_int_t  n,
magmaFloatComplex  alpha,
magmaFloatComplex **  dA_array,
magma_int_t  ldda,
magmaFloatComplex **  dB_array,
magma_int_t  lddb,
magmaFloatComplex **  dX_array,
magma_int_t  lddx,
magmaFloatComplex **  dinvA_array,
magma_int_t  dinvA_length,
magmaFloatComplex **  dA_displ,
magmaFloatComplex **  dB_displ,
magmaFloatComplex **  dX_displ,
magmaFloatComplex **  dinvA_displ,
magma_int_t  resetozero,
magma_int_t  batchCount,
magma_queue_t  queue 
)

ctrsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]nINTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]alphaCOMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/CTRTRI_BATCHED_NB)*CTRTRI_BATCHED_NB*CTRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(n/CTRTRI_BATCHED_NB)*CTRTRI_BATCHED_NB*CTRTRI_BATCHED_NB
[in]dinvA_lengthINTEGER The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by CTRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_ctrsm_inv_work_batched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  flag,
magma_int_t  m,
magma_int_t  n,
magmaFloatComplex  alpha,
magmaFloatComplex **  dA_array,
magma_int_t  ldda,
magmaFloatComplex **  dB_array,
magma_int_t  lddb,
magmaFloatComplex **  dX_array,
magma_int_t  lddx,
magmaFloatComplex **  dinvA_array,
magma_int_t  dinvA_length,
magmaFloatComplex **  dA_displ,
magmaFloatComplex **  dB_displ,
magmaFloatComplex **  dX_displ,
magmaFloatComplex **  dinvA_displ,
magma_int_t  resetozero,
magma_int_t  batchCount,
magma_queue_t  queue 
)

ctrsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]nINTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]alphaCOMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/CTRTRI_BATCHED_NB)*CTRTRI_BATCHED_NB*CTRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(n/CTRTRI_BATCHED_NB)*CTRTRI_BATCHED_NB*CTRTRI_BATCHED_NB, where CTRTRI_BATCHED_NB = 128.
[in]dinvA_lengthINTEGER The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by CTRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_ctrsm_inv_batched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  m,
magma_int_t  n,
magmaFloatComplex  alpha,
magmaFloatComplex **  dA_array,
magma_int_t  ldda,
magmaFloatComplex **  dB_array,
magma_int_t  lddb,
magma_int_t  batchCount,
magma_queue_t  queue 
)

ctrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]mINTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]nINTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]alphaCOMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_ctrsm_inv_vbatched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t *  m,
magma_int_t *  n,
magmaFloatComplex  alpha,
magmaFloatComplex **  dA_array,
magma_int_t *  ldda,
magmaFloatComplex **  dB_array,
magma_int_t *  lddb,
magma_int_t  batchCount,
magma_queue_t  queue 
)

ctrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]mINTEGER array, dimension(batchCount + 1). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]nINTEGER array, dimension(batchCount + 1). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]alphaCOMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount + 1). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER array, dimension(batchCount + 1). On entry, LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_ctrsm_inv_outofplace_vbatched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  flag,
magma_int_t *  m,
magma_int_t *  n,
magmaFloatComplex  alpha,
magmaFloatComplex **  dA_array,
magma_int_t *  ldda,
magmaFloatComplex **  dB_array,
magma_int_t *  lddb,
magmaFloatComplex **  dX_array,
magma_int_t *  lddx,
magmaFloatComplex **  dinvA_array,
magma_int_t *  dinvA_length,
magmaFloatComplex **  dA_displ,
magmaFloatComplex **  dB_displ,
magmaFloatComplex **  dX_displ,
magmaFloatComplex **  dinvA_displ,
magma_int_t  resetozero,
magma_int_t  batchCount,
magma_int_t  max_m,
magma_int_t  max_n,
magma_queue_t  queue 
)

ctrsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]nINTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]alphaCOMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
[in]lddbINTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. LDDB >= max( 1, M ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. LDDX >= max( 1, M ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/CTRTRI_BATCHED_NB)*CTRTRI_BATCHED_NB*CTRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(N/CTRTRI_BATCHED_NB)*CTRTRI_BATCHED_NB*CTRTRI_BATCHED_NB
[in]dinvA_lengthINTEGER array, dimension(batchCount). The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by CTRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]max_mINTEGER The maximum value in m.
[in]max_nINTEGER The maximum value in n.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_ctrsm_inv_work_vbatched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  flag,
magma_int_t *  m,
magma_int_t *  n,
magmaFloatComplex  alpha,
magmaFloatComplex **  dA_array,
magma_int_t *  ldda,
magmaFloatComplex **  dB_array,
magma_int_t *  lddb,
magmaFloatComplex **  dX_array,
magma_int_t *  lddx,
magmaFloatComplex **  dinvA_array,
magma_int_t *  dinvA_length,
magmaFloatComplex **  dA_displ,
magmaFloatComplex **  dB_displ,
magmaFloatComplex **  dX_displ,
magmaFloatComplex **  dinvA_displ,
magma_int_t  resetozero,
magma_int_t  batchCount,
magma_int_t  max_m,
magma_int_t  max_n,
magma_queue_t  queue 
)

ctrsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of each B. M >= 0.
[in]nINTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of each B. N >= 0.
[in]alphaCOMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. lddx >= max( 1, M ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/CTRTRI_BATCHED_NB)*CTRTRI_BATCHED_NB*CTRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(N/CTRTRI_BATCHED_NB)*CTRTRI_BATCHED_NB*CTRTRI_BATCHED_NB.
[in]dinvA_lengthINTEGER array, dimension(batchCount). The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by CTRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]max_mINTEGER The maximum value in m.
[in]max_nINTEGER The maximum value in n.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_ctrsm_inv_vbatched_max_nocheck ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t *  m,
magma_int_t *  n,
magmaFloatComplex  alpha,
magmaFloatComplex **  dA_array,
magma_int_t *  ldda,
magmaFloatComplex **  dB_array,
magma_int_t *  lddb,
magma_int_t  batchCount,
magma_int_t  max_m,
magma_int_t  max_n,
magma_queue_t  queue 
)
void magmablas_ctrsm_vbatched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t *  m,
magma_int_t *  n,
magmaFloatComplex  alpha,
magmaFloatComplex **  dA_array,
magma_int_t *  ldda,
magmaFloatComplex **  dB_array,
magma_int_t *  lddb,
magma_int_t  batchCount,
magma_queue_t  queue 
)

ctrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]mINTEGER array, dimension(batchCount + 1). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]nINTEGER array, dimension(batchCount + 1). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]alphaCOMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount + 1). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER array, dimension(batchCount + 1). On entry, LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_dtrsm_inv_outofplace_batched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  flag,
magma_int_t  m,
magma_int_t  n,
double  alpha,
double **  dA_array,
magma_int_t  ldda,
double **  dB_array,
magma_int_t  lddb,
double **  dX_array,
magma_int_t  lddx,
double **  dinvA_array,
magma_int_t  dinvA_length,
double **  dA_displ,
double **  dB_displ,
double **  dX_displ,
double **  dinvA_displ,
magma_int_t  resetozero,
magma_int_t  batchCount,
magma_queue_t  queue 
)

dtrsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_dtrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]nINTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]alphaDOUBLE PRECISION. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/DTRTRI_BATCHED_NB)*DTRTRI_BATCHED_NB*DTRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(n/DTRTRI_BATCHED_NB)*DTRTRI_BATCHED_NB*DTRTRI_BATCHED_NB
[in]dinvA_lengthINTEGER The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by DTRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_dtrsm_inv_work_batched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  flag,
magma_int_t  m,
magma_int_t  n,
double  alpha,
double **  dA_array,
magma_int_t  ldda,
double **  dB_array,
magma_int_t  lddb,
double **  dX_array,
magma_int_t  lddx,
double **  dinvA_array,
magma_int_t  dinvA_length,
double **  dA_displ,
double **  dB_displ,
double **  dX_displ,
double **  dinvA_displ,
magma_int_t  resetozero,
magma_int_t  batchCount,
magma_queue_t  queue 
)

dtrsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_dtrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]nINTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]alphaDOUBLE PRECISION. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/DTRTRI_BATCHED_NB)*DTRTRI_BATCHED_NB*DTRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(n/DTRTRI_BATCHED_NB)*DTRTRI_BATCHED_NB*DTRTRI_BATCHED_NB, where DTRTRI_BATCHED_NB = 128.
[in]dinvA_lengthINTEGER The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by DTRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_dtrsm_inv_batched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  m,
magma_int_t  n,
double  alpha,
double **  dA_array,
magma_int_t  ldda,
double **  dB_array,
magma_int_t  lddb,
magma_int_t  batchCount,
magma_queue_t  queue 
)

dtrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_dtrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]mINTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]nINTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]alphaDOUBLE PRECISION. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_dtrsm_inv_vbatched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t *  m,
magma_int_t *  n,
double  alpha,
double **  dA_array,
magma_int_t *  ldda,
double **  dB_array,
magma_int_t *  lddb,
magma_int_t  batchCount,
magma_queue_t  queue 
)

dtrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_dtrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]mINTEGER array, dimension(batchCount + 1). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]nINTEGER array, dimension(batchCount + 1). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]alphaDOUBLE PRECISION. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount + 1). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER array, dimension(batchCount + 1). On entry, LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_dtrsm_inv_outofplace_vbatched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  flag,
magma_int_t *  m,
magma_int_t *  n,
double  alpha,
double **  dA_array,
magma_int_t *  ldda,
double **  dB_array,
magma_int_t *  lddb,
double **  dX_array,
magma_int_t *  lddx,
double **  dinvA_array,
magma_int_t *  dinvA_length,
double **  dA_displ,
double **  dB_displ,
double **  dX_displ,
double **  dinvA_displ,
magma_int_t  resetozero,
magma_int_t  batchCount,
magma_int_t  max_m,
magma_int_t  max_n,
magma_queue_t  queue 
)

dtrsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_dtrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]nINTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]alphaDOUBLE PRECISION. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
[in]lddbINTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. LDDB >= max( 1, M ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. LDDX >= max( 1, M ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/DTRTRI_BATCHED_NB)*DTRTRI_BATCHED_NB*DTRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(N/DTRTRI_BATCHED_NB)*DTRTRI_BATCHED_NB*DTRTRI_BATCHED_NB
[in]dinvA_lengthINTEGER array, dimension(batchCount). The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by DTRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]max_mINTEGER The maximum value in m.
[in]max_nINTEGER The maximum value in n.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_dtrsm_inv_work_vbatched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  flag,
magma_int_t *  m,
magma_int_t *  n,
double  alpha,
double **  dA_array,
magma_int_t *  ldda,
double **  dB_array,
magma_int_t *  lddb,
double **  dX_array,
magma_int_t *  lddx,
double **  dinvA_array,
magma_int_t *  dinvA_length,
double **  dA_displ,
double **  dB_displ,
double **  dX_displ,
double **  dinvA_displ,
magma_int_t  resetozero,
magma_int_t  batchCount,
magma_int_t  max_m,
magma_int_t  max_n,
magma_queue_t  queue 
)

dtrsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_dtrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of each B. M >= 0.
[in]nINTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of each B. N >= 0.
[in]alphaDOUBLE PRECISION. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. lddx >= max( 1, M ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/DTRTRI_BATCHED_NB)*DTRTRI_BATCHED_NB*DTRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(N/DTRTRI_BATCHED_NB)*DTRTRI_BATCHED_NB*DTRTRI_BATCHED_NB.
[in]dinvA_lengthINTEGER array, dimension(batchCount). The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by DTRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]max_mINTEGER The maximum value in m.
[in]max_nINTEGER The maximum value in n.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_dtrsm_inv_vbatched_max_nocheck ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t *  m,
magma_int_t *  n,
double  alpha,
double **  dA_array,
magma_int_t *  ldda,
double **  dB_array,
magma_int_t *  lddb,
magma_int_t  batchCount,
magma_int_t  max_m,
magma_int_t  max_n,
magma_queue_t  queue 
)
void magmablas_dtrsm_vbatched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t *  m,
magma_int_t *  n,
double  alpha,
double **  dA_array,
magma_int_t *  ldda,
double **  dB_array,
magma_int_t *  lddb,
magma_int_t  batchCount,
magma_queue_t  queue 
)

dtrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_dtrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]mINTEGER array, dimension(batchCount + 1). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]nINTEGER array, dimension(batchCount + 1). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]alphaDOUBLE PRECISION. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount + 1). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER array, dimension(batchCount + 1). On entry, LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_strsm_inv_outofplace_batched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  flag,
magma_int_t  m,
magma_int_t  n,
float  alpha,
float **  dA_array,
magma_int_t  ldda,
float **  dB_array,
magma_int_t  lddb,
float **  dX_array,
magma_int_t  lddx,
float **  dinvA_array,
magma_int_t  dinvA_length,
float **  dA_displ,
float **  dB_displ,
float **  dX_displ,
float **  dinvA_displ,
magma_int_t  resetozero,
magma_int_t  batchCount,
magma_queue_t  queue 
)

strsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_strsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]nINTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]alphaREAL. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a REAL array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a REAL array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a REAL array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a REAL array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/STRTRI_BATCHED_NB)*STRTRI_BATCHED_NB*STRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(n/STRTRI_BATCHED_NB)*STRTRI_BATCHED_NB*STRTRI_BATCHED_NB
[in]dinvA_lengthINTEGER The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by STRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_strsm_inv_work_batched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  flag,
magma_int_t  m,
magma_int_t  n,
float  alpha,
float **  dA_array,
magma_int_t  ldda,
float **  dB_array,
magma_int_t  lddb,
float **  dX_array,
magma_int_t  lddx,
float **  dinvA_array,
magma_int_t  dinvA_length,
float **  dA_displ,
float **  dB_displ,
float **  dX_displ,
float **  dinvA_displ,
magma_int_t  resetozero,
magma_int_t  batchCount,
magma_queue_t  queue 
)

strsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_strsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]nINTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]alphaREAL. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a REAL array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a REAL array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a REAL array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a REAL array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/STRTRI_BATCHED_NB)*STRTRI_BATCHED_NB*STRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(n/STRTRI_BATCHED_NB)*STRTRI_BATCHED_NB*STRTRI_BATCHED_NB, where STRTRI_BATCHED_NB = 128.
[in]dinvA_lengthINTEGER The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by STRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_strsm_inv_batched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  m,
magma_int_t  n,
float  alpha,
float **  dA_array,
magma_int_t  ldda,
float **  dB_array,
magma_int_t  lddb,
magma_int_t  batchCount,
magma_queue_t  queue 
)

strsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_strsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]mINTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]nINTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]alphaREAL. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a REAL array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a REAL array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_strsm_inv_vbatched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t *  m,
magma_int_t *  n,
float  alpha,
float **  dA_array,
magma_int_t *  ldda,
float **  dB_array,
magma_int_t *  lddb,
magma_int_t  batchCount,
magma_queue_t  queue 
)

strsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_strsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]mINTEGER array, dimension(batchCount + 1). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]nINTEGER array, dimension(batchCount + 1). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]alphaREAL. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a REAL array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount + 1). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a REAL array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER array, dimension(batchCount + 1). On entry, LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_strsm_inv_outofplace_vbatched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  flag,
magma_int_t *  m,
magma_int_t *  n,
float  alpha,
float **  dA_array,
magma_int_t *  ldda,
float **  dB_array,
magma_int_t *  lddb,
float **  dX_array,
magma_int_t *  lddx,
float **  dinvA_array,
magma_int_t *  dinvA_length,
float **  dA_displ,
float **  dB_displ,
float **  dX_displ,
float **  dinvA_displ,
magma_int_t  resetozero,
magma_int_t  batchCount,
magma_int_t  max_m,
magma_int_t  max_n,
magma_queue_t  queue 
)

strsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_strsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]nINTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]alphaREAL. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a REAL array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a REAL array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
[in]lddbINTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. LDDB >= max( 1, M ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a REAL array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. LDDX >= max( 1, M ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a REAL array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/STRTRI_BATCHED_NB)*STRTRI_BATCHED_NB*STRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(N/STRTRI_BATCHED_NB)*STRTRI_BATCHED_NB*STRTRI_BATCHED_NB
[in]dinvA_lengthINTEGER array, dimension(batchCount). The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by STRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]max_mINTEGER The maximum value in m.
[in]max_nINTEGER The maximum value in n.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_strsm_inv_work_vbatched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  flag,
magma_int_t *  m,
magma_int_t *  n,
float  alpha,
float **  dA_array,
magma_int_t *  ldda,
float **  dB_array,
magma_int_t *  lddb,
float **  dX_array,
magma_int_t *  lddx,
float **  dinvA_array,
magma_int_t *  dinvA_length,
float **  dA_displ,
float **  dB_displ,
float **  dX_displ,
float **  dinvA_displ,
magma_int_t  resetozero,
magma_int_t  batchCount,
magma_int_t  max_m,
magma_int_t  max_n,
magma_queue_t  queue 
)

strsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_strsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of each B. M >= 0.
[in]nINTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of each B. N >= 0.
[in]alphaREAL. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a REAL array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a REAL array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a REAL array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. lddx >= max( 1, M ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a REAL array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/STRTRI_BATCHED_NB)*STRTRI_BATCHED_NB*STRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(N/STRTRI_BATCHED_NB)*STRTRI_BATCHED_NB*STRTRI_BATCHED_NB.
[in]dinvA_lengthINTEGER array, dimension(batchCount). The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by STRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]max_mINTEGER The maximum value in m.
[in]max_nINTEGER The maximum value in n.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_strsm_inv_vbatched_max_nocheck ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t *  m,
magma_int_t *  n,
float  alpha,
float **  dA_array,
magma_int_t *  ldda,
float **  dB_array,
magma_int_t *  lddb,
magma_int_t  batchCount,
magma_int_t  max_m,
magma_int_t  max_n,
magma_queue_t  queue 
)
void magmablas_strsm_vbatched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t *  m,
magma_int_t *  n,
float  alpha,
float **  dA_array,
magma_int_t *  ldda,
float **  dB_array,
magma_int_t *  lddb,
magma_int_t  batchCount,
magma_queue_t  queue 
)

strsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_strsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]mINTEGER array, dimension(batchCount + 1). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]nINTEGER array, dimension(batchCount + 1). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]alphaREAL. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a REAL array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount + 1). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a REAL array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER array, dimension(batchCount + 1). On entry, LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_ztrsm_inv_outofplace_batched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  flag,
magma_int_t  m,
magma_int_t  n,
magmaDoubleComplex  alpha,
magmaDoubleComplex **  dA_array,
magma_int_t  ldda,
magmaDoubleComplex **  dB_array,
magma_int_t  lddb,
magmaDoubleComplex **  dX_array,
magma_int_t  lddx,
magmaDoubleComplex **  dinvA_array,
magma_int_t  dinvA_length,
magmaDoubleComplex **  dA_displ,
magmaDoubleComplex **  dB_displ,
magmaDoubleComplex **  dX_displ,
magmaDoubleComplex **  dinvA_displ,
magma_int_t  resetozero,
magma_int_t  batchCount,
magma_queue_t  queue 
)

ztrsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]nINTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]alphaCOMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/ZTRTRI_BATCHED_NB)*ZTRTRI_BATCHED_NB*ZTRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(n/ZTRTRI_BATCHED_NB)*ZTRTRI_BATCHED_NB*ZTRTRI_BATCHED_NB
[in]dinvA_lengthINTEGER The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by ZTRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_ztrsm_inv_work_batched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  flag,
magma_int_t  m,
magma_int_t  n,
magmaDoubleComplex  alpha,
magmaDoubleComplex **  dA_array,
magma_int_t  ldda,
magmaDoubleComplex **  dB_array,
magma_int_t  lddb,
magmaDoubleComplex **  dX_array,
magma_int_t  lddx,
magmaDoubleComplex **  dinvA_array,
magma_int_t  dinvA_length,
magmaDoubleComplex **  dA_displ,
magmaDoubleComplex **  dB_displ,
magmaDoubleComplex **  dX_displ,
magmaDoubleComplex **  dinvA_displ,
magma_int_t  resetozero,
magma_int_t  batchCount,
magma_queue_t  queue 
)

ztrsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]nINTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]alphaCOMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/ZTRTRI_BATCHED_NB)*ZTRTRI_BATCHED_NB*ZTRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(n/ZTRTRI_BATCHED_NB)*ZTRTRI_BATCHED_NB*ZTRTRI_BATCHED_NB, where ZTRTRI_BATCHED_NB = 128.
[in]dinvA_lengthINTEGER The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by ZTRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_ztrsm_inv_batched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  m,
magma_int_t  n,
magmaDoubleComplex  alpha,
magmaDoubleComplex **  dA_array,
magma_int_t  ldda,
magmaDoubleComplex **  dB_array,
magma_int_t  lddb,
magma_int_t  batchCount,
magma_queue_t  queue 
)

ztrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]mINTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]nINTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]alphaCOMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_ztrsm_inv_vbatched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t *  m,
magma_int_t *  n,
magmaDoubleComplex  alpha,
magmaDoubleComplex **  dA_array,
magma_int_t *  ldda,
magmaDoubleComplex **  dB_array,
magma_int_t *  lddb,
magma_int_t  batchCount,
magma_queue_t  queue 
)

ztrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]mINTEGER array, dimension(batchCount + 1). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]nINTEGER array, dimension(batchCount + 1). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]alphaCOMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount + 1). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER array, dimension(batchCount + 1). On entry, LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_ztrsm_inv_outofplace_vbatched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  flag,
magma_int_t *  m,
magma_int_t *  n,
magmaDoubleComplex  alpha,
magmaDoubleComplex **  dA_array,
magma_int_t *  ldda,
magmaDoubleComplex **  dB_array,
magma_int_t *  lddb,
magmaDoubleComplex **  dX_array,
magma_int_t *  lddx,
magmaDoubleComplex **  dinvA_array,
magma_int_t *  dinvA_length,
magmaDoubleComplex **  dA_displ,
magmaDoubleComplex **  dB_displ,
magmaDoubleComplex **  dX_displ,
magmaDoubleComplex **  dinvA_displ,
magma_int_t  resetozero,
magma_int_t  batchCount,
magma_int_t  max_m,
magma_int_t  max_n,
magma_queue_t  queue 
)

ztrsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]nINTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]alphaCOMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
[in]lddbINTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. LDDB >= max( 1, M ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. LDDX >= max( 1, M ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/ZTRTRI_BATCHED_NB)*ZTRTRI_BATCHED_NB*ZTRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(N/ZTRTRI_BATCHED_NB)*ZTRTRI_BATCHED_NB*ZTRTRI_BATCHED_NB
[in]dinvA_lengthINTEGER array, dimension(batchCount). The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by ZTRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]max_mINTEGER The maximum value in m.
[in]max_nINTEGER The maximum value in n.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_ztrsm_inv_work_vbatched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  flag,
magma_int_t *  m,
magma_int_t *  n,
magmaDoubleComplex  alpha,
magmaDoubleComplex **  dA_array,
magma_int_t *  ldda,
magmaDoubleComplex **  dB_array,
magma_int_t *  lddb,
magmaDoubleComplex **  dX_array,
magma_int_t *  lddx,
magmaDoubleComplex **  dinvA_array,
magma_int_t *  dinvA_length,
magmaDoubleComplex **  dA_displ,
magmaDoubleComplex **  dB_displ,
magmaDoubleComplex **  dX_displ,
magmaDoubleComplex **  dinvA_displ,
magma_int_t  resetozero,
magma_int_t  batchCount,
magma_int_t  max_m,
magma_int_t  max_n,
magma_queue_t  queue 
)

ztrsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of each B. M >= 0.
[in]nINTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of each B. N >= 0.
[in]alphaCOMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. lddx >= max( 1, M ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/ZTRTRI_BATCHED_NB)*ZTRTRI_BATCHED_NB*ZTRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(N/ZTRTRI_BATCHED_NB)*ZTRTRI_BATCHED_NB*ZTRTRI_BATCHED_NB.
[in]dinvA_lengthINTEGER array, dimension(batchCount). The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by ZTRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]max_mINTEGER The maximum value in m.
[in]max_nINTEGER The maximum value in n.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_ztrsm_inv_vbatched_max_nocheck ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t *  m,
magma_int_t *  n,
magmaDoubleComplex  alpha,
magmaDoubleComplex **  dA_array,
magma_int_t *  ldda,
magmaDoubleComplex **  dB_array,
magma_int_t *  lddb,
magma_int_t  batchCount,
magma_int_t  max_m,
magma_int_t  max_n,
magma_queue_t  queue 
)
void magmablas_ztrsm_vbatched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t *  m,
magma_int_t *  n,
magmaDoubleComplex  alpha,
magmaDoubleComplex **  dA_array,
magma_int_t *  ldda,
magmaDoubleComplex **  dB_array,
magma_int_t *  lddb,
magma_int_t  batchCount,
magma_queue_t  queue 
)

ztrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]mINTEGER array, dimension(batchCount + 1). On entry, each element M specifies the number of rows of the corresponding B. M >= 0.
[in]nINTEGER array, dimension(batchCount + 1). On entry, each element N specifies the number of columns of the corresponding B. N >= 0.
[in]alphaCOMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER array, dimension(batchCount + 1). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER array, dimension(batchCount + 1). On entry, LDDB specifies the first dimension of each array B. lddb >= max( 1, M ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.