MAGMA  2.0.0
Matrix Algebra for GPU and Multicore Architectures
 All Classes Files Functions Friends Groups Pages
single-complex precision

Functions

magma_int_t magma_ctrsm_m (magma_int_t ngpu, magma_side_t side, magma_uplo_t uplo, magma_trans_t transa, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, const magmaFloatComplex *A, magma_int_t lda, magmaFloatComplex *B, magma_int_t ldb)
 CTRSM solves one of the matrix equations op( A )*X = alpha*B, or X*op( A ) = alpha*B, where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op( A ) is one of. More...
 
void magma_cgemm (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_const_ptr dB, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex_ptr dC, magma_int_t lddc)
 Perform matrix-matrix product, \( C = \alpha op(A) op(B) + \beta C \). More...
 
void magma_csymm (magma_side_t side, magma_uplo_t uplo, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_const_ptr dB, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex_ptr dC, magma_int_t lddc)
 Perform symmetric matrix-matrix product. More...
 
void magma_csyrk (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex beta, magmaFloatComplex_ptr dC, magma_int_t lddc)
 Perform symmetric rank-k update. More...
 
void magma_csyr2k (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_const_ptr dB, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex_ptr dC, magma_int_t lddc)
 Perform symmetric rank-2k update. More...
 
void magma_chemm (magma_side_t side, magma_uplo_t uplo, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_const_ptr dB, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex_ptr dC, magma_int_t lddc)
 Perform Hermitian matrix-matrix product. More...
 
void magma_cherk (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, float alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, float beta, magmaFloatComplex_ptr dC, magma_int_t lddc)
 Perform Hermitian rank-k update. More...
 
void magma_cher2k (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_const_ptr dB, magma_int_t lddb, float beta, magmaFloatComplex_ptr dC, magma_int_t lddc)
 Perform Hermitian rank-2k update. More...
 
void magma_ctrmm (magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_ptr dB, magma_int_t lddb)
 Perform triangular matrix-matrix product. More...
 
void magma_ctrsm (magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_ptr dB, magma_int_t lddb)
 Solve triangular matrix-matrix system (multiple right-hand sides). More...
 
void magma_cgemm_q (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_const_ptr dB, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex_ptr dC, magma_int_t lddc, magma_queue_t queue)
 Perform matrix-matrix product, \( C = \alpha op(A) op(B) + \beta C \). More...
 
void magma_csymm_q (magma_side_t side, magma_uplo_t uplo, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_const_ptr dB, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex_ptr dC, magma_int_t lddc, magma_queue_t queue)
 Perform symmetric matrix-matrix product. More...
 
void magma_csyrk_q (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex beta, magmaFloatComplex_ptr dC, magma_int_t lddc, magma_queue_t queue)
 Perform symmetric rank-k update. More...
 
void magma_csyr2k_q (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_const_ptr dB, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex_ptr dC, magma_int_t lddc, magma_queue_t queue)
 Perform symmetric rank-2k update. More...
 
void magma_chemm_q (magma_side_t side, magma_uplo_t uplo, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_const_ptr dB, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex_ptr dC, magma_int_t lddc, magma_queue_t queue)
 Perform Hermitian matrix-matrix product. More...
 
void magma_cherk_q (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, float alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, float beta, magmaFloatComplex_ptr dC, magma_int_t lddc, magma_queue_t queue)
 Perform Hermitian rank-k update. More...
 
void magma_cher2k_q (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_const_ptr dB, magma_int_t lddb, float beta, magmaFloatComplex_ptr dC, magma_int_t lddc, magma_queue_t queue)
 Perform Hermitian rank-2k update. More...
 
void magma_ctrmm_q (magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_ptr dB, magma_int_t lddb, magma_queue_t queue)
 Perform triangular matrix-matrix product. More...
 
void magma_ctrsm_q (magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_ptr dB, magma_int_t lddb, magma_queue_t queue)
 Solve triangular matrix-matrix system (multiple right-hand sides). More...
 
void magma_cgemm_batched (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex const *const *dA_array, magma_int_t ldda, magmaFloatComplex const *const *dB_array, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex **dC_array, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue)
 CGEMM performs one of the matrix-matrix operations. More...
 
void magmablas_cgemm_batched (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex const *const *dA_array, magma_int_t ldda, magmaFloatComplex const *const *dB_array, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex **dC_array, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue)
 CGEMM performs one of the matrix-matrix operations. More...
 
void magmablas_cgemm_q (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_const_ptr dB, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex_ptr dC, magma_int_t lddc, magma_queue_t queue)
 CGEMM performs one of the matrix-matrix operations. More...
 
void magmablas_cgemm (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_const_ptr dB, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex_ptr dC, magma_int_t lddc)
 
void magmablas_cgemm_reduce_q (magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_const_ptr dB, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex_ptr dC, magma_int_t lddc, magma_queue_t queue)
 CGEMM_REDUCE performs one of the matrix-matrix operations. More...
 
void magmablas_cgemm_reduce (magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_const_ptr dB, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex_ptr dC, magma_int_t lddc)
 
void magmablas_cher2k_mgpu2 (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex_ptr dA[], magma_int_t ldda, magma_int_t a_offset, magmaFloatComplex_ptr dB[], magma_int_t lddb, magma_int_t b_offset, float beta, magmaFloatComplex_ptr dC[], magma_int_t lddc, magma_int_t c_offset, magma_int_t ngpu, magma_int_t nb, magma_queue_t queues[][20], magma_int_t nqueue)
 CHER2K performs one of the Hermitian rank 2k operations. More...
 
void magma_cherk_batched (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, float alpha, magmaFloatComplex const *const *dA_array, magma_int_t ldda, float beta, magmaFloatComplex **dC_array, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue)
 CHERK performs one of the hermitian rank k operations. More...
 
void magmablas_cherk_batched (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, float alpha, magmaFloatComplex const *const *dA_array, magma_int_t ldda, float beta, magmaFloatComplex **dC_array, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue)
 CHERK performs one of the hermitian rank k operations. More...
 
void magmablas_ctrsm_outofplace_q (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_ptr dB, magma_int_t lddb, magmaFloatComplex_ptr dX, magma_int_t lddx, magma_int_t flag, magmaFloatComplex_ptr d_dinvA, magma_int_t dinvA_length, magma_queue_t queue)
 ctrsm_outofplace solves one of the matrix equations on gpu More...
 
void magmablas_ctrsm_work_q (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_ptr dB, magma_int_t lddb, magmaFloatComplex_ptr dX, magma_int_t lddx, magma_int_t flag, magmaFloatComplex_ptr d_dinvA, magma_int_t dinvA_length, magma_queue_t queue)
 Similar to magmablas_ctrsm_outofplace, but copies result dX back to dB, as in classical ctrsm interface. More...
 
void magmablas_ctrsm_q (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_ptr dB, magma_int_t lddb, magma_queue_t queue)
 Similar to magmablas_ctrsm_outofplace, but allocates dX and d_dinvA internally. More...
 
void magmablas_ctrsm_outofplace (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_ptr dB, magma_int_t lddb, magmaFloatComplex_ptr dX, magma_int_t lddx, magma_int_t flag, magmaFloatComplex_ptr d_dinvA, magma_int_t dinvA_length)
 
void magmablas_ctrsm_work (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_ptr dB, magma_int_t lddb, magmaFloatComplex_ptr dX, magma_int_t lddx, magma_int_t flag, magmaFloatComplex_ptr d_dinvA, magma_int_t dinvA_length)
 
void magmablas_ctrsm (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_ptr dB, magma_int_t lddb)
 
void magmablas_ctrsm_outofplace_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex **dA_array, magma_int_t ldda, magmaFloatComplex **dB_array, magma_int_t lddb, magmaFloatComplex **dX_array, magma_int_t lddx, magmaFloatComplex **dinvA_array, magma_int_t dinvA_length, magmaFloatComplex **dA_displ, magmaFloatComplex **dB_displ, magmaFloatComplex **dX_displ, magmaFloatComplex **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
 ctrsm_outofplace solves one of the matrix equations on gpu More...
 
void magmablas_ctrsm_work_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex **dA_array, magma_int_t ldda, magmaFloatComplex **dB_array, magma_int_t lddb, magmaFloatComplex **dX_array, magma_int_t lddx, magmaFloatComplex **dinvA_array, magma_int_t dinvA_length, magmaFloatComplex **dA_displ, magmaFloatComplex **dB_displ, magmaFloatComplex **dX_displ, magmaFloatComplex **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
 ctrsm_work solves one of the matrix equations on gpu More...
 
void magmablas_ctrsm_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex **dA_array, magma_int_t ldda, magmaFloatComplex **dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue)
 ctrsm solves one of the matrix equations on gpu More...
 
void magmablas_ctrtri_diag_q (magma_uplo_t uplo, magma_diag_t diag, magma_int_t n, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_ptr d_dinvA, magma_queue_t queue)
 Inverts the NB x NB diagonal blocks of a triangular matrix. More...
 
void magmablas_ctrtri_diag (magma_uplo_t uplo, magma_diag_t diag, magma_int_t n, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_ptr d_dinvA)
 
void magmablas_ctrtri_diag_batched (magma_uplo_t uplo, magma_diag_t diag, magma_int_t n, magmaFloatComplex const *const *dA_array, magma_int_t ldda, magmaFloatComplex **dinvA_array, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
 CTRTRI_DIAG inverts the NB x NB diagonal blocks of a triangular matrix. More...
 

Detailed Description

Function Documentation

void magma_cgemm ( magma_trans_t  transA,
magma_trans_t  transB,
magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
magmaFloatComplex  alpha,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
magmaFloatComplex_const_ptr  dB,
magma_int_t  lddb,
magmaFloatComplex  beta,
magmaFloatComplex_ptr  dC,
magma_int_t  lddc 
)

Perform matrix-matrix product, \( C = \alpha op(A) op(B) + \beta C \).

Parameters
[in]transAOperation op(A) to perform on matrix A.
[in]transBOperation op(B) to perform on matrix B.
[in]mNumber of rows of C and op(A). m >= 0.
[in]nNumber of columns of C and op(B). n >= 0.
[in]kNumber of columns of op(A) and rows of op(B). k >= 0.
[in]alphaScalar \( \alpha \)
[in]dACOMPLEX array on GPU device. If transA == MagmaNoTrans, the m-by-k matrix A of dimension (ldda,k), ldda >= max(1,m);
otherwise, the k-by-m matrix A of dimension (ldda,m), ldda >= max(1,k).
[in]lddaLeading dimension of dA.
[in]dBCOMPLEX array on GPU device. If transB == MagmaNoTrans, the k-by-n matrix B of dimension (lddb,n), lddb >= max(1,k);
otherwise, the n-by-k matrix B of dimension (lddb,k), lddb >= max(1,n).
[in]lddbLeading dimension of dB.
[in]betaScalar \( \beta \)
[in,out]dCCOMPLEX array on GPU device. The m-by-n matrix C of dimension (lddc,n), lddc >= max(1,m).
[in]lddcLeading dimension of dC.
void magma_cgemm_batched ( magma_trans_t  transA,
magma_trans_t  transB,
magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
magmaFloatComplex  alpha,
magmaFloatComplex const *const *  dA_array,
magma_int_t  ldda,
magmaFloatComplex const *const *  dB_array,
magma_int_t  lddb,
magmaFloatComplex  beta,
magmaFloatComplex **  dC_array,
magma_int_t  lddc,
magma_int_t  batchCount,
magma_queue_t  queue 
)

CGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters
[in]transAmagma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( A ) = A.
  • = MagmaTrans: op( A ) = A**T.
  • = MagmaConjTrans: op( A ) = A**H.
[in]transBmagma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( B ) = B.
  • = MagmaTrans: op( B ) = B**T.
  • = MagmaConjTrans: op( B ) = B**H.
[in]mINTEGER. On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero.
[in]nINTEGER. On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero.
[in]kINTEGER. On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero.
[in]alphaCOMPLEX On entry, ALPHA specifies the scalar alpha.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array A of DIMENSION ( ldda, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When transA = MagmaNoTrans then ldda must be at least max( 1, m ), otherwise ldda must be at least max( 1, k ).
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array B of DIMENSION ( lddb, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B.
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B as declared in the calling (sub) program. When transB = MagmaNoTrans then lddb must be at least max( 1, k ), otherwise lddb must be at least max( 1, n ).
[in]betaCOMPLEX. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input.
[in,out]dC_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array C of DIMENSION ( lddc, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n matrix ( alpha*op( A )*op( B ) + beta*C ).
[in]lddcINTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
[in]myhandlecublasHandle_t Handlde to use cuBLAS routines
void magma_cgemm_q ( magma_trans_t  transA,
magma_trans_t  transB,
magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
magmaFloatComplex  alpha,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
magmaFloatComplex_const_ptr  dB,
magma_int_t  lddb,
magmaFloatComplex  beta,
magmaFloatComplex_ptr  dC,
magma_int_t  lddc,
magma_queue_t  queue 
)

Perform matrix-matrix product, \( C = \alpha op(A) op(B) + \beta C \).

Parameters
[in]transAOperation op(A) to perform on matrix A.
[in]transBOperation op(B) to perform on matrix B.
[in]mNumber of rows of C and op(A). m >= 0.
[in]nNumber of columns of C and op(B). n >= 0.
[in]kNumber of columns of op(A) and rows of op(B). k >= 0.
[in]alphaScalar \( \alpha \)
[in]dACOMPLEX array on GPU device. If transA == MagmaNoTrans, the m-by-k matrix A of dimension (ldda,k), ldda >= max(1,m);
otherwise, the k-by-m matrix A of dimension (ldda,m), ldda >= max(1,k).
[in]lddaLeading dimension of dA.
[in]dBCOMPLEX array on GPU device. If transB == MagmaNoTrans, the k-by-n matrix B of dimension (lddb,n), lddb >= max(1,k);
otherwise, the n-by-k matrix B of dimension (lddb,k), lddb >= max(1,n).
[in]lddbLeading dimension of dB.
[in]betaScalar \( \beta \)
[in,out]dCCOMPLEX array on GPU device. The m-by-n matrix C of dimension (lddc,n), lddc >= max(1,m).
[in]lddcLeading dimension of dC.
void magma_chemm ( magma_side_t  side,
magma_uplo_t  uplo,
magma_int_t  m,
magma_int_t  n,
magmaFloatComplex  alpha,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
magmaFloatComplex_const_ptr  dB,
magma_int_t  lddb,
magmaFloatComplex  beta,
magmaFloatComplex_ptr  dC,
magma_int_t  lddc 
)

Perform Hermitian matrix-matrix product.

\( C = \alpha A B + \beta C \) (side == MagmaLeft), or
\( C = \alpha B A + \beta C \) (side == MagmaRight),
where \( A \) is Hermitian.

Parameters
[in]sideWhether A is on the left or right.
[in]uploWhether the upper or lower triangle of A is referenced.
[in]mNumber of rows of C. m >= 0.
[in]nNumber of columns of C. n >= 0.
[in]alphaScalar \( \alpha \)
[in]dACOMPLEX array on GPU device. If side == MagmaLeft, the m-by-m Hermitian matrix A of dimension (ldda,m), ldda >= max(1,m);
otherwise, the n-by-n Hermitian matrix A of dimension (ldda,n), ldda >= max(1,n).
[in]lddaLeading dimension of dA.
[in]dBCOMPLEX array on GPU device. The m-by-n matrix B of dimension (lddb,n), lddb >= max(1,m).
[in]lddbLeading dimension of dB.
[in]betaScalar \( \beta \)
[in,out]dCCOMPLEX array on GPU device. The m-by-n matrix C of dimension (lddc,n), lddc >= max(1,m).
[in]lddcLeading dimension of dC.
void magma_chemm_q ( magma_side_t  side,
magma_uplo_t  uplo,
magma_int_t  m,
magma_int_t  n,
magmaFloatComplex  alpha,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
magmaFloatComplex_const_ptr  dB,
magma_int_t  lddb,
magmaFloatComplex  beta,
magmaFloatComplex_ptr  dC,
magma_int_t  lddc,
magma_queue_t  queue 
)

Perform Hermitian matrix-matrix product.

\( C = \alpha A B + \beta C \) (side == MagmaLeft), or
\( C = \alpha B A + \beta C \) (side == MagmaRight),
where \( A \) is Hermitian.

Parameters
[in]sideWhether A is on the left or right.
[in]uploWhether the upper or lower triangle of A is referenced.
[in]mNumber of rows of C. m >= 0.
[in]nNumber of columns of C. n >= 0.
[in]alphaScalar \( \alpha \)
[in]dACOMPLEX array on GPU device. If side == MagmaLeft, the m-by-m Hermitian matrix A of dimension (ldda,m), ldda >= max(1,m);
otherwise, the n-by-n Hermitian matrix A of dimension (ldda,n), ldda >= max(1,n).
[in]lddaLeading dimension of dA.
[in]dBCOMPLEX array on GPU device. The m-by-n matrix B of dimension (lddb,n), lddb >= max(1,m).
[in]lddbLeading dimension of dB.
[in]betaScalar \( \beta \)
[in,out]dCCOMPLEX array on GPU device. The m-by-n matrix C of dimension (lddc,n), lddc >= max(1,m).
[in]lddcLeading dimension of dC.
void magma_cher2k ( magma_uplo_t  uplo,
magma_trans_t  trans,
magma_int_t  n,
magma_int_t  k,
magmaFloatComplex  alpha,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
magmaFloatComplex_const_ptr  dB,
magma_int_t  lddb,
float  beta,
magmaFloatComplex_ptr  dC,
magma_int_t  lddc 
)

Perform Hermitian rank-2k update.

\( C = \alpha A B^T + \alpha B A^T \beta C \) (trans == MagmaNoTrans), or
\( C = \alpha A^T B + \alpha B^T A \beta C \) (trans == MagmaTrans),
where \( C \) is Hermitian.

Parameters
[in]uploWhether the upper or lower triangle of C is referenced.
[in]transOperation to perform on A and B.
[in]nNumber of rows and columns of C. n >= 0.
[in]kNumber of columns of A and B (for MagmaNoTrans) or rows of A and B (for MagmaTrans). k >= 0.
[in]alphaScalar \( \alpha \)
[in]dACOMPLEX array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix A of dimension (ldda,k), ldda >= max(1,n);
otherwise, the k-by-n matrix A of dimension (ldda,n), ldda >= max(1,k).
[in]lddaLeading dimension of dA.
[in]dBCOMPLEX array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix B of dimension (lddb,k), lddb >= max(1,n);
otherwise, the k-by-n matrix B of dimension (lddb,n), lddb >= max(1,k).
[in]lddbLeading dimension of dB.
[in]betaScalar \( \beta \)
[in,out]dCCOMPLEX array on GPU device. The n-by-n Hermitian matrix C of dimension (lddc,n), lddc >= max(1,n).
[in]lddcLeading dimension of dC.
void magma_cher2k_q ( magma_uplo_t  uplo,
magma_trans_t  trans,
magma_int_t  n,
magma_int_t  k,
magmaFloatComplex  alpha,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
magmaFloatComplex_const_ptr  dB,
magma_int_t  lddb,
float  beta,
magmaFloatComplex_ptr  dC,
magma_int_t  lddc,
magma_queue_t  queue 
)

Perform Hermitian rank-2k update.

\( C = \alpha A B^T + \alpha B A^T \beta C \) (trans == MagmaNoTrans), or
\( C = \alpha A^T B + \alpha B^T A \beta C \) (trans == MagmaTrans),
where \( C \) is Hermitian.

Parameters
[in]uploWhether the upper or lower triangle of C is referenced.
[in]transOperation to perform on A and B.
[in]nNumber of rows and columns of C. n >= 0.
[in]kNumber of columns of A and B (for MagmaNoTrans) or rows of A and B (for MagmaTrans). k >= 0.
[in]alphaScalar \( \alpha \)
[in]dACOMPLEX array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix A of dimension (ldda,k), ldda >= max(1,n);
otherwise, the k-by-n matrix A of dimension (ldda,n), ldda >= max(1,k).
[in]lddaLeading dimension of dA.
[in]dBCOMPLEX array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix B of dimension (lddb,k), lddb >= max(1,n);
otherwise, the k-by-n matrix B of dimension (lddb,n), lddb >= max(1,k).
[in]lddbLeading dimension of dB.
[in]betaScalar \( \beta \)
[in,out]dCCOMPLEX array on GPU device. The n-by-n Hermitian matrix C of dimension (lddc,n), lddc >= max(1,n).
[in]lddcLeading dimension of dC.
void magma_cherk ( magma_uplo_t  uplo,
magma_trans_t  trans,
magma_int_t  n,
magma_int_t  k,
float  alpha,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
float  beta,
magmaFloatComplex_ptr  dC,
magma_int_t  lddc 
)

Perform Hermitian rank-k update.

\( C = \alpha A A^T + \beta C \) (trans == MagmaNoTrans), or
\( C = \alpha A^T A + \beta C \) (trans == MagmaTrans),
where \( C \) is Hermitian.

Parameters
[in]uploWhether the upper or lower triangle of C is referenced.
[in]transOperation to perform on A.
[in]nNumber of rows and columns of C. n >= 0.
[in]kNumber of columns of A (for MagmaNoTrans) or rows of A (for MagmaTrans). k >= 0.
[in]alphaScalar \( \alpha \)
[in]dACOMPLEX array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix A of dimension (ldda,k), ldda >= max(1,n);
otherwise, the k-by-n matrix A of dimension (ldda,n), ldda >= max(1,k).
[in]lddaLeading dimension of dA.
[in]betaScalar \( \beta \)
[in,out]dCCOMPLEX array on GPU device. The n-by-n Hermitian matrix C of dimension (lddc,n), lddc >= max(1,n).
[in]lddcLeading dimension of dC.
void magma_cherk_batched ( magma_uplo_t  uplo,
magma_trans_t  trans,
magma_int_t  n,
magma_int_t  k,
float  alpha,
magmaFloatComplex const *const *  dA_array,
magma_int_t  ldda,
float  beta,
magmaFloatComplex **  dC_array,
magma_int_t  lddc,
magma_int_t  batchCount,
magma_queue_t  queue 
)

CHERK performs one of the hermitian rank k operations.

C := alpha*A*A**H + beta*C,

or

C := alpha*A**H*A + beta*C,

where alpha and beta are real scalars, C is an n by n hermitian matrix and A is an n by k matrix in the first case and a k by n matrix in the second case.

Parameters

Parameters
[in]uplomagma_uplo_t. On entry, uplo specifies whether the upper or lower triangular part of the array C is to be referenced as follows:

uplo = MagmaUpper Only the upper triangular part of C is to be referenced.

uplo = MagmaLower Only the lower triangular part of C is to be referenced.

Parameters
[in]transmagma_trans_t. On entry, trans specifies the operation to be performed as follows:

trans = MagmaNoTrans C := alpha*A*A**H + beta*C.

trans = MagmaConjTrans C := alpha*A**H*A + beta*C.

Parameters
[in]nINTEGER. On entry, specifies the order of the matrix C. N must be at least zero.
[in]kINTEGER. On entry with trans = MagmaNoTrans, k specifies the number of columns of the matrix A, and on entry with trans = MagmaConjTrans, k specifies the number of rows of the matrix A. K must be at least zero.
[in]alphaREAL On entry, ALPHA specifies the scalar alpha.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX A array of DIMENSION ( ldda, ka ), where ka is k when trans = MagmaNoTrans, and is n otherwise. Before entry with trans = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When trans = MagmaNoTrans then ldda must be at least max( 1, n ), otherwise ldda must be at least max( 1, k ).
[in]betaREAL. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input.
[in,out]dC_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array C of DIMENSION ( lddc, n ). Before entry with uplo = MagmaUpper, the leading n by n upper triangular part of the array C must contain the upper triangular part of the hermitian matrix and the strictly lower triangular part of C is not referenced. On exit, the upper triangular part of the array C is overwritten by the upper triangular part of the updated matrix. Before entry with uplo = MagmaLower, the leading n by n lower triangular part of the array C must contain the lower triangular part of the hermitian matrix and the strictly upper triangular part of C is not referenced. On exit, the lower triangular part of the array C is overwritten by the lower triangular part of the updated matrix. Note that the imaginary parts of the diagonal elements need not be set, they are assumed to be zero, and on exit they are set to zero.
[in]lddcINTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magma_cherk_q ( magma_uplo_t  uplo,
magma_trans_t  trans,
magma_int_t  n,
magma_int_t  k,
float  alpha,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
float  beta,
magmaFloatComplex_ptr  dC,
magma_int_t  lddc,
magma_queue_t  queue 
)

Perform Hermitian rank-k update.

\( C = \alpha A A^T + \beta C \) (trans == MagmaNoTrans), or
\( C = \alpha A^T A + \beta C \) (trans == MagmaTrans),
where \( C \) is Hermitian.

Parameters
[in]uploWhether the upper or lower triangle of C is referenced.
[in]transOperation to perform on A.
[in]nNumber of rows and columns of C. n >= 0.
[in]kNumber of columns of A (for MagmaNoTrans) or rows of A (for MagmaTrans). k >= 0.
[in]alphaScalar \( \alpha \)
[in]dACOMPLEX array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix A of dimension (ldda,k), ldda >= max(1,n);
otherwise, the k-by-n matrix A of dimension (ldda,n), ldda >= max(1,k).
[in]lddaLeading dimension of dA.
[in]betaScalar \( \beta \)
[in,out]dCCOMPLEX array on GPU device. The n-by-n Hermitian matrix C of dimension (lddc,n), lddc >= max(1,n).
[in]lddcLeading dimension of dC.
void magma_csymm ( magma_side_t  side,
magma_uplo_t  uplo,
magma_int_t  m,
magma_int_t  n,
magmaFloatComplex  alpha,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
magmaFloatComplex_const_ptr  dB,
magma_int_t  lddb,
magmaFloatComplex  beta,
magmaFloatComplex_ptr  dC,
magma_int_t  lddc 
)

Perform symmetric matrix-matrix product.

\( C = \alpha A B + \beta C \) (side == MagmaLeft), or
\( C = \alpha B A + \beta C \) (side == MagmaRight),
where \( A \) is symmetric.

Parameters
[in]sideWhether A is on the left or right.
[in]uploWhether the upper or lower triangle of A is referenced.
[in]mNumber of rows of C. m >= 0.
[in]nNumber of columns of C. n >= 0.
[in]alphaScalar \( \alpha \)
[in]dACOMPLEX array on GPU device. If side == MagmaLeft, the m-by-m symmetric matrix A of dimension (ldda,m), ldda >= max(1,m);
otherwise, the n-by-n symmetric matrix A of dimension (ldda,n), ldda >= max(1,n).
[in]lddaLeading dimension of dA.
[in]dBCOMPLEX array on GPU device. The m-by-n matrix B of dimension (lddb,n), lddb >= max(1,m).
[in]lddbLeading dimension of dB.
[in]betaScalar \( \beta \)
[in,out]dCCOMPLEX array on GPU device. The m-by-n matrix C of dimension (lddc,n), lddc >= max(1,m).
[in]lddcLeading dimension of dC.
void magma_csymm_q ( magma_side_t  side,
magma_uplo_t  uplo,
magma_int_t  m,
magma_int_t  n,
magmaFloatComplex  alpha,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
magmaFloatComplex_const_ptr  dB,
magma_int_t  lddb,
magmaFloatComplex  beta,
magmaFloatComplex_ptr  dC,
magma_int_t  lddc,
magma_queue_t  queue 
)

Perform symmetric matrix-matrix product.

\( C = \alpha A B + \beta C \) (side == MagmaLeft), or
\( C = \alpha B A + \beta C \) (side == MagmaRight),
where \( A \) is symmetric.

Parameters
[in]sideWhether A is on the left or right.
[in]uploWhether the upper or lower triangle of A is referenced.
[in]mNumber of rows of C. m >= 0.
[in]nNumber of columns of C. n >= 0.
[in]alphaScalar \( \alpha \)
[in]dACOMPLEX array on GPU device. If side == MagmaLeft, the m-by-m symmetric matrix A of dimension (ldda,m), ldda >= max(1,m);
otherwise, the n-by-n symmetric matrix A of dimension (ldda,n), ldda >= max(1,n).
[in]lddaLeading dimension of dA.
[in]dBCOMPLEX array on GPU device. The m-by-n matrix B of dimension (lddb,n), lddb >= max(1,m).
[in]lddbLeading dimension of dB.
[in]betaScalar \( \beta \)
[in,out]dCCOMPLEX array on GPU device. The m-by-n matrix C of dimension (lddc,n), lddc >= max(1,m).
[in]lddcLeading dimension of dC.
void magma_csyr2k ( magma_uplo_t  uplo,
magma_trans_t  trans,
magma_int_t  n,
magma_int_t  k,
magmaFloatComplex  alpha,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
magmaFloatComplex_const_ptr  dB,
magma_int_t  lddb,
magmaFloatComplex  beta,
magmaFloatComplex_ptr  dC,
magma_int_t  lddc 
)

Perform symmetric rank-2k update.

\( C = \alpha A B^T + \alpha B A^T \beta C \) (trans == MagmaNoTrans), or
\( C = \alpha A^T B + \alpha B^T A \beta C \) (trans == MagmaTrans),
where \( C \) is symmetric.

Parameters
[in]uploWhether the upper or lower triangle of C is referenced.
[in]transOperation to perform on A and B.
[in]nNumber of rows and columns of C. n >= 0.
[in]kNumber of columns of A and B (for MagmaNoTrans) or rows of A and B (for MagmaTrans). k >= 0.
[in]alphaScalar \( \alpha \)
[in]dACOMPLEX array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix A of dimension (ldda,k), ldda >= max(1,n);
otherwise, the k-by-n matrix A of dimension (ldda,n), ldda >= max(1,k).
[in]lddaLeading dimension of dA.
[in]dBCOMPLEX array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix B of dimension (lddb,k), lddb >= max(1,n);
otherwise, the k-by-n matrix B of dimension (lddb,n), lddb >= max(1,k).
[in]lddbLeading dimension of dB.
[in]betaScalar \( \beta \)
[in,out]dCCOMPLEX array on GPU device. The n-by-n symmetric matrix C of dimension (lddc,n), lddc >= max(1,n).
[in]lddcLeading dimension of dC.
void magma_csyr2k_q ( magma_uplo_t  uplo,
magma_trans_t  trans,
magma_int_t  n,
magma_int_t  k,
magmaFloatComplex  alpha,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
magmaFloatComplex_const_ptr  dB,
magma_int_t  lddb,
magmaFloatComplex  beta,
magmaFloatComplex_ptr  dC,
magma_int_t  lddc,
magma_queue_t  queue 
)

Perform symmetric rank-2k update.

\( C = \alpha A B^T + \alpha B A^T \beta C \) (trans == MagmaNoTrans), or
\( C = \alpha A^T B + \alpha B^T A \beta C \) (trans == MagmaTrans),
where \( C \) is symmetric.

Parameters
[in]uploWhether the upper or lower triangle of C is referenced.
[in]transOperation to perform on A and B.
[in]nNumber of rows and columns of C. n >= 0.
[in]kNumber of columns of A and B (for MagmaNoTrans) or rows of A and B (for MagmaTrans). k >= 0.
[in]alphaScalar \( \alpha \)
[in]dACOMPLEX array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix A of dimension (ldda,k), ldda >= max(1,n);
otherwise, the k-by-n matrix A of dimension (ldda,n), ldda >= max(1,k).
[in]lddaLeading dimension of dA.
[in]dBCOMPLEX array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix B of dimension (lddb,k), lddb >= max(1,n);
otherwise, the k-by-n matrix B of dimension (lddb,n), lddb >= max(1,k).
[in]lddbLeading dimension of dB.
[in]betaScalar \( \beta \)
[in,out]dCCOMPLEX array on GPU device. The n-by-n symmetric matrix C of dimension (lddc,n), lddc >= max(1,n).
[in]lddcLeading dimension of dC.
void magma_csyrk ( magma_uplo_t  uplo,
magma_trans_t  trans,
magma_int_t  n,
magma_int_t  k,
magmaFloatComplex  alpha,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
magmaFloatComplex  beta,
magmaFloatComplex_ptr  dC,
magma_int_t  lddc 
)

Perform symmetric rank-k update.

\( C = \alpha A A^T + \beta C \) (trans == MagmaNoTrans), or
\( C = \alpha A^T A + \beta C \) (trans == MagmaTrans),
where \( C \) is symmetric.

Parameters
[in]uploWhether the upper or lower triangle of C is referenced.
[in]transOperation to perform on A.
[in]nNumber of rows and columns of C. n >= 0.
[in]kNumber of columns of A (for MagmaNoTrans) or rows of A (for MagmaTrans). k >= 0.
[in]alphaScalar \( \alpha \)
[in]dACOMPLEX array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix A of dimension (ldda,k), ldda >= max(1,n);
otherwise, the k-by-n matrix A of dimension (ldda,n), ldda >= max(1,k).
[in]lddaLeading dimension of dA.
[in]betaScalar \( \beta \)
[in,out]dCCOMPLEX array on GPU device. The n-by-n symmetric matrix C of dimension (lddc,n), lddc >= max(1,n).
[in]lddcLeading dimension of dC.
void magma_csyrk_q ( magma_uplo_t  uplo,
magma_trans_t  trans,
magma_int_t  n,
magma_int_t  k,
magmaFloatComplex  alpha,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
magmaFloatComplex  beta,
magmaFloatComplex_ptr  dC,
magma_int_t  lddc,
magma_queue_t  queue 
)

Perform symmetric rank-k update.

\( C = \alpha A A^T + \beta C \) (trans == MagmaNoTrans), or
\( C = \alpha A^T A + \beta C \) (trans == MagmaTrans),
where \( C \) is symmetric.

Parameters
[in]uploWhether the upper or lower triangle of C is referenced.
[in]transOperation to perform on A.
[in]nNumber of rows and columns of C. n >= 0.
[in]kNumber of columns of A (for MagmaNoTrans) or rows of A (for MagmaTrans). k >= 0.
[in]alphaScalar \( \alpha \)
[in]dACOMPLEX array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix A of dimension (ldda,k), ldda >= max(1,n);
otherwise, the k-by-n matrix A of dimension (ldda,n), ldda >= max(1,k).
[in]lddaLeading dimension of dA.
[in]betaScalar \( \beta \)
[in,out]dCCOMPLEX array on GPU device. The n-by-n symmetric matrix C of dimension (lddc,n), lddc >= max(1,n).
[in]lddcLeading dimension of dC.
void magma_ctrmm ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  trans,
magma_diag_t  diag,
magma_int_t  m,
magma_int_t  n,
magmaFloatComplex  alpha,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
magmaFloatComplex_ptr  dB,
magma_int_t  lddb 
)

Perform triangular matrix-matrix product.

\( B = \alpha op(A) B \) (side == MagmaLeft), or
\( B = \alpha B op(A) \) (side == MagmaRight),
where \( A \) is triangular.

Parameters
[in]sideWhether A is on the left or right.
[in]uploWhether A is upper or lower triangular.
[in]transOperation to perform on A.
[in]diagWhether the diagonal of A is assumed to be unit or non-unit.
[in]mNumber of rows of B. m >= 0.
[in]nNumber of columns of B. n >= 0.
[in]alphaScalar \( \alpha \)
[in]dACOMPLEX array on GPU device. If side == MagmaLeft, the n-by-n triangular matrix A of dimension (ldda,n), ldda >= max(1,n);
otherwise, the m-by-m triangular matrix A of dimension (ldda,m), ldda >= max(1,m).
[in]lddaLeading dimension of dA.
[in]dBCOMPLEX array on GPU device. The m-by-n matrix B of dimension (lddb,n), lddb >= max(1,m).
[in]lddbLeading dimension of dB.
void magma_ctrmm_q ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  trans,
magma_diag_t  diag,
magma_int_t  m,
magma_int_t  n,
magmaFloatComplex  alpha,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
magmaFloatComplex_ptr  dB,
magma_int_t  lddb,
magma_queue_t  queue 
)

Perform triangular matrix-matrix product.

\( B = \alpha op(A) B \) (side == MagmaLeft), or
\( B = \alpha B op(A) \) (side == MagmaRight),
where \( A \) is triangular.

Parameters
[in]sideWhether A is on the left or right.
[in]uploWhether A is upper or lower triangular.
[in]transOperation to perform on A.
[in]diagWhether the diagonal of A is assumed to be unit or non-unit.
[in]mNumber of rows of B. m >= 0.
[in]nNumber of columns of B. n >= 0.
[in]alphaScalar \( \alpha \)
[in]dACOMPLEX array on GPU device. If side == MagmaLeft, the n-by-n triangular matrix A of dimension (ldda,n), ldda >= max(1,n);
otherwise, the m-by-m triangular matrix A of dimension (ldda,m), ldda >= max(1,m).
[in]lddaLeading dimension of dA.
[in]dBCOMPLEX array on GPU device. The m-by-n matrix B of dimension (lddb,n), lddb >= max(1,m).
[in]lddbLeading dimension of dB.
void magma_ctrsm ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  trans,
magma_diag_t  diag,
magma_int_t  m,
magma_int_t  n,
magmaFloatComplex  alpha,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
magmaFloatComplex_ptr  dB,
magma_int_t  lddb 
)

Solve triangular matrix-matrix system (multiple right-hand sides).

\( op(A) X = \alpha B \) (side == MagmaLeft), or
\( X op(A) = \alpha B \) (side == MagmaRight),
where \( A \) is triangular.

Parameters
[in]sideWhether A is on the left or right.
[in]uploWhether A is upper or lower triangular.
[in]transOperation to perform on A.
[in]diagWhether the diagonal of A is assumed to be unit or non-unit.
[in]mNumber of rows of B. m >= 0.
[in]nNumber of columns of B. n >= 0.
[in]alphaScalar \( \alpha \)
[in]dACOMPLEX array on GPU device. If side == MagmaLeft, the m-by-m triangular matrix A of dimension (ldda,m), ldda >= max(1,m);
otherwise, the n-by-n triangular matrix A of dimension (ldda,n), ldda >= max(1,n).
[in]lddaLeading dimension of dA.
[in,out]dBCOMPLEX array on GPU device. On entry, m-by-n matrix B of dimension (lddb,n), lddb >= max(1,m). On exit, overwritten with the solution matrix X.
[in]lddbLeading dimension of dB.
magma_int_t magma_ctrsm_m ( magma_int_t  ngpu,
magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transa,
magma_diag_t  diag,
magma_int_t  m,
magma_int_t  n,
magmaFloatComplex  alpha,
const magmaFloatComplex *  A,
magma_int_t  lda,
magmaFloatComplex *  B,
magma_int_t  ldb 
)

CTRSM solves one of the matrix equations op( A )*X = alpha*B, or X*op( A ) = alpha*B, where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op( A ) is one of.

op( A ) = A or op( A ) = A**T or op( A ) = A**H.

The matrix X is overwritten on B.

Parameters
[in]ngpuINTEGER Number of GPUs to use. ngpu > 0.
[in]sidemagma_side_t. On entry, SIDE specifies whether op( A ) appears on the left or right of X as follows:
  • = MagmaLeft: op( A )*X = alpha*B.
  • = MagmaRight: X*op( A ) = alpha*B.
[in]uplomagma_uplo_t. On entry, UPLO specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transamagma_trans_t. On entry, TRANSA specifies the form of op( A ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( A ) = A.
  • = MagmaTrans: op( A ) = A**T.
  • = MagmaConjTrans: op( A ) = A**H.
[in]diagmagma_diag_t. On entry, DIAG specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]mINTEGER. On entry, M specifies the number of rows of B. M must be at least zero.
[in]nINTEGER. On entry, N specifies the number of columns of B. N must be at least zero.
[in]alphaCOMPLEX. On entry, ALPHA specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]ACOMPLEX array of DIMENSION ( LDA, k ), where k is m when SIDE = MagmaLeft and is n when SIDE = MagmaRight. Before entry with UPLO = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with UPLO = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when DIAG = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]ldaINTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When SIDE = MagmaLeft then LDA >= max( 1, m ), when SIDE = MagmaRight then LDA >= max( 1, n ).
[in,out]BCOMPLEX array of DIMENSION ( LDB, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B, and on exit is overwritten by the solution matrix X.
[in]ldbINTEGER. On entry, LDB specifies the first dimension of B as declared in the calling (sub) program. LDB must be at least max( 1, m ).
void magma_ctrsm_q ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  trans,
magma_diag_t  diag,
magma_int_t  m,
magma_int_t  n,
magmaFloatComplex  alpha,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
magmaFloatComplex_ptr  dB,
magma_int_t  lddb,
magma_queue_t  queue 
)

Solve triangular matrix-matrix system (multiple right-hand sides).

\( op(A) X = \alpha B \) (side == MagmaLeft), or
\( X op(A) = \alpha B \) (side == MagmaRight),
where \( A \) is triangular.

Parameters
[in]sideWhether A is on the left or right.
[in]uploWhether A is upper or lower triangular.
[in]transOperation to perform on A.
[in]diagWhether the diagonal of A is assumed to be unit or non-unit.
[in]mNumber of rows of B. m >= 0.
[in]nNumber of columns of B. n >= 0.
[in]alphaScalar \( \alpha \)
[in]dACOMPLEX array on GPU device. If side == MagmaLeft, the m-by-m triangular matrix A of dimension (ldda,m), ldda >= max(1,m);
otherwise, the n-by-n triangular matrix A of dimension (ldda,n), ldda >= max(1,n).
[in]lddaLeading dimension of dA.
[in,out]dBCOMPLEX array on GPU device. On entry, m-by-n matrix B of dimension (lddb,n), lddb >= max(1,m). On exit, overwritten with the solution matrix X.
[in]lddbLeading dimension of dB.
void magmablas_cgemm ( magma_trans_t  transA,
magma_trans_t  transB,
magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
magmaFloatComplex  alpha,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
magmaFloatComplex_const_ptr  dB,
magma_int_t  lddb,
magmaFloatComplex  beta,
magmaFloatComplex_ptr  dC,
magma_int_t  lddc 
)
void magmablas_cgemm_batched ( magma_trans_t  transA,
magma_trans_t  transB,
magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
magmaFloatComplex  alpha,
magmaFloatComplex const *const *  dA_array,
magma_int_t  ldda,
magmaFloatComplex const *const *  dB_array,
magma_int_t  lddb,
magmaFloatComplex  beta,
magmaFloatComplex **  dC_array,
magma_int_t  lddc,
magma_int_t  batchCount,
magma_queue_t  queue 
)

CGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters
[in]transAmagma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( A ) = A.
  • = MagmaTrans: op( A ) = A**T.
  • = MagmaConjTrans: op( A ) = A**H.
[in]transBmagma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( B ) = B.
  • = MagmaTrans: op( B ) = B**T.
  • = MagmaConjTrans: op( B ) = B**H.
[in]mINTEGER. On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero.
[in]nINTEGER. On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero.
[in]kINTEGER. On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero.
[in]alphaCOMPLEX On entry, ALPHA specifies the scalar alpha.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array A of DIMENSION ( ldda, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When transA = MagmaNoTrans then ldda must be at least max( 1, m ), otherwise ldda must be at least max( 1, k ).
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array B of DIMENSION ( lddb, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B.
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B as declared in the calling (sub) program. When transB = MagmaNoTrans then lddb must be at least max( 1, k ), otherwise lddb must be at least max( 1, n ).
[in]betaCOMPLEX. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input.
[in,out]dC_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array C of DIMENSION ( lddc, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, each array C is overwritten by the m by n matrix ( alpha*op( A )*op( B ) + beta*C ).
[in]lddcINTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_cgemm_q ( magma_trans_t  transA,
magma_trans_t  transB,
magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
magmaFloatComplex  alpha,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
magmaFloatComplex_const_ptr  dB,
magma_int_t  lddb,
magmaFloatComplex  beta,
magmaFloatComplex_ptr  dC,
magma_int_t  lddc,
magma_queue_t  queue 
)

CGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters
[in]transAmagma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( A ) = A.
  • = MagmaTrans: op( A ) = A**T.
  • = MagmaConjTrans: op( A ) = A**H.
[in]transBmagma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( B ) = B.
  • = MagmaTrans: op( B ) = B**T.
  • = MagmaConjTrans: op( B ) = B**H.
[in]mINTEGER. On entry, M specifies the number of rows of the matrix op( dA ) and of the matrix dC. M must be at least zero.
[in]nINTEGER. On entry, N specifies the number of columns of the matrix op( dB ) and the number of columns of the matrix dC. N must be at least zero.
[in]kINTEGER. On entry, K specifies the number of columns of the matrix op( dA ) and the number of rows of the matrix op( dB ). K must be at least zero.
[in]alphaCOMPLEX On entry, ALPHA specifies the scalar alpha.
[in]dACOMPLEX array of DIMENSION ( LDA, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array dA must contain the matrix dA, otherwise the leading k by m part of the array dA must contain the matrix dA.
[in]lddaINTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When transA = MagmaNoTrans then LDA must be at least max( 1, m ), otherwise LDA must be at least max( 1, k ).
[in]dBCOMPLEX array of DIMENSION ( LDB, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array dB must contain the matrix dB, otherwise the leading n by k part of the array dB must contain the matrix dB.
[in]lddbINTEGER. On entry, LDB specifies the first dimension of dB as declared in the calling (sub) program. When transB = MagmaNoTrans then LDB must be at least max( 1, k ), otherwise LDB must be at least max( 1, n ).
[in]betaCOMPLEX. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then dC need not be set on input.
[in,out]dCCOMPLEX array of DIMENSION ( LDC, n ). Before entry, the leading m by n part of the array dC must contain the matrix dC, except when beta is zero, in which case dC need not be set on entry. On exit, the array dC is overwritten by the m by n matrix ( alpha*op( dA )*op( dB ) + beta*dC ).
[in]lddcINTEGER. On entry, LDC specifies the first dimension of dC as declared in the calling (sub) program. LDC must be at least max( 1, m ).
void magmablas_cgemm_reduce ( magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
magmaFloatComplex  alpha,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
magmaFloatComplex_const_ptr  dB,
magma_int_t  lddb,
magmaFloatComplex  beta,
magmaFloatComplex_ptr  dC,
magma_int_t  lddc 
)
void magmablas_cgemm_reduce_q ( magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
magmaFloatComplex  alpha,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
magmaFloatComplex_const_ptr  dB,
magma_int_t  lddb,
magmaFloatComplex  beta,
magmaFloatComplex_ptr  dC,
magma_int_t  lddc,
magma_queue_t  queue 
)

CGEMM_REDUCE performs one of the matrix-matrix operations.

C := alpha*A^T*B + beta*C,

where alpha and beta are scalars, and A, B and C are matrices, with A a k-by-m matrix, B a k-by-n matrix, and C an m-by-n matrix.

This routine is tuned for m, n << k. Typically, m and n are expected to be less than 128.

void magmablas_cher2k_mgpu2 ( magma_uplo_t  uplo,
magma_trans_t  trans,
magma_int_t  n,
magma_int_t  k,
magmaFloatComplex  alpha,
magmaFloatComplex_ptr  dA[],
magma_int_t  ldda,
magma_int_t  a_offset,
magmaFloatComplex_ptr  dB[],
magma_int_t  lddb,
magma_int_t  b_offset,
float  beta,
magmaFloatComplex_ptr  dC[],
magma_int_t  lddc,
magma_int_t  c_offset,
magma_int_t  ngpu,
magma_int_t  nb,
magma_queue_t  queues[][20],
magma_int_t  nqueue 
)

CHER2K performs one of the Hermitian rank 2k operations.

C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C,

or

C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C,

where alpha and beta are scalars with beta real, C is an n by n Hermitian matrix and A and B are n by k matrices in the first case and k by n matrices in the second case.

Parameters
[in]uplomagma_uplo_t. On entry, UPLO specifies whether the upper or lower triangular part of the array C is to be referenced as follows:
  • = MagmaUpper: Only the upper triangular part of C is to be referenced.
  • = MagmaLower: Only the lower triangular part of C is to be referenced.
      current only Lower case is implemented.
    
[in]transmagma_trans_t. On entry, TRANS specifies the operation to be performed as follows:
  • = MagmaNoTrans: C := alpha*A*B**H + conj( alpha )*B*A**H + beta*C.
  • = Magma_ConjTrans: C := alpha*A**H*B + conj( alpha )*B**H*A + beta*C.
      current only NoTrans case is implemented.
    
[in]nINTEGER. On entry, N specifies the order of the matrix C. N must be at least zero.
[in]kINTEGER. On entry with TRANS = MagmaNoTrans, K specifies the number of columns of the matrices A and B, and on entry with TRANS = Magma_ConjTrans, K specifies the number of rows of the matrices A and B. K must be at least zero.
[in]alphaCOMPLEX. On entry, ALPHA specifies the scalar alpha.
[in]dACOMPLEX array of DIMENSION ( LDA, ka ), where ka is k when TRANS = MagmaNoTrans, and is n otherwise. Before entry with TRANS = MagmaNoTrans, the leading n by k part of the array A must contain the matrix A, otherwise the leading k by n part of the array A must contain the matrix A.

[TODO: describe distribution: duplicated on all GPUs.]

Parameters
[in]lddaINTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When TRANS = MagmaNoTrans then LDA must be at least max( 1, n ), otherwise LDA must be at least max( 1, k ).
[in]a_offsetINTEGER Row offset to start sub-matrix of dA. Uses dA(a_offset:a_offset+n, :). 0 <= a_offset < ldda.
[in]dBCOMPLEX array of DIMENSION ( LDB, kb ), where kb is k when TRANS = MagmaNoTrans, and is n otherwise. Before entry with TRANS = MagmaNoTrans, the leading n by k part of the array B must contain the matrix B, otherwise the leading k by n part of the array B must contain the matrix B.

[TODO: describe distribution: duplicated on all GPUs.]

Parameters
[in]lddbINTEGER. On entry, LDB specifies the first dimension of B as declared in the calling (sub) program. When TRANS = MagmaNoTrans then LDB must be at least max( 1, n ), otherwise LDB must be at least max( 1, k ).
[in]b_offsetINTEGER Row offset to start sub-matrix of dB. Uses dB(b_offset:b_offset+n, :). 0 <= b_offset < lddb.
[in]betaREAL. On entry, BETA specifies the scalar beta.
[in,out]dCCOMPLEX array of DIMENSION ( LDC, n ). Before entry with UPLO = MagmaUpper, the leading n by n upper triangular part of the array C must contain the upper triangular part of the Hermitian matrix and the strictly lower triangular part of C is not referenced. On exit, the upper triangular part of the array C is overwritten by the upper triangular part of the updated matrix.
Before entry with UPLO = MagmaLower, the leading n by n lower triangular part of the array C must contain the lower triangular part of the Hermitian matrix and the strictly upper triangular part of C is not referenced. On exit, the lower triangular part of the array C is overwritten by the lower triangular part of the updated matrix.
Note that the imaginary parts of the diagonal elements need not be set, they are assumed to be zero, and on exit they are set to zero. [TODO: verify]

[TODO: describe distribution: 1D column block-cyclic across GPUs.]

Parameters
[in]lddcINTEGER. On entry, LDC specifies the first dimension of C as declared in the calling (sub) program. LDC must be at least max( 1, n ).
[in]c_offsetINTEGER. Row and column offset to start sub-matrix of dC. Uses dC(c_offset:c_offset+n, c_offset:c_offset+n). 0 <= c_offset < lddc.
[in]ngpuINTEGER. Number of GPUs over which matrix C is distributed.
[in]nbINTEGER. Block size used for distribution of C.
[in]queuesarray of CUDA queues, of dimension NGPU by 20. Streams to use for running multiple GEMMs in parallel. Only up to NSTREAM queues are used on each GPU.
[in]nqueueINTEGER. Number of queues to use on each device
void magmablas_cherk_batched ( magma_uplo_t  uplo,
magma_trans_t  trans,
magma_int_t  n,
magma_int_t  k,
float  alpha,
magmaFloatComplex const *const *  dA_array,
magma_int_t  ldda,
float  beta,
magmaFloatComplex **  dC_array,
magma_int_t  lddc,
magma_int_t  batchCount,
magma_queue_t  queue 
)

CHERK performs one of the hermitian rank k operations.

C := alpha*A*A**H + beta*C,

or

C := alpha*A**H*A + beta*C,

where alpha and beta are real scalars, C is an n by n hermitian matrix and A is an n by k matrix in the first case and a k by n matrix in the second case.

Parameters

Parameters
[in]uplomagma_uplo_t. On entry, uplo specifies whether the upper or lower triangular part of the array C is to be referenced as follows:

uplo = MagmaUpper Only the upper triangular part of C is to be referenced.

uplo = MagmaLower Only the lower triangular part of C is to be referenced.

Parameters
[in]transmagma_trans_t. On entry, trans specifies the operation to be performed as follows:

trans = MagmaNoTrans, C := alpha*A*A**H + beta*C.

trans = MagmaConjTrans, C := alpha*A**H*A + beta*C.

Parameters
[in]nINTEGER. On entry, specifies the order of the matrix C. N must be at least zero.
[in]kINTEGER. On entry with trans = MagmaNoTrans, k specifies the number of columns of the matrix A, and on entry with trans = MagmaConjTrans, k specifies the number of rows of the matrix A. K must be at least zero.
[in]alphaREAL On entry, ALPHA specifies the scalar alpha.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array A of DIMENSION ( ldda, ka ), where ka is k when trans = MagmaNoTrans, and is n otherwise. Before entry with trans = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When trans = MagmaNoTrans then ldda must be at least max( 1, n ), otherwise ldda must be at least max( 1, k ).
[in]betaREAL. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input.
[in,out]dC_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array C of DIMENSION ( lddc, n ). Before entry with uplo = MagmaUpper, the leading n by n upper triangular part of the array C must contain the upper triangular part of the hermitian matrix and the strictly lower triangular part of C is not referenced. On exit, the upper triangular part of the array C is overwritten by the upper triangular part of the updated matrix. Before entry with uplo = MagmaLower, the leading n by n lower triangular part of the array C must contain the lower triangular part of the hermitian matrix and the strictly upper triangular part of C is not referenced. On exit, the lower triangular part of the array C is overwritten by the lower triangular part of the updated matrix. Note that the imaginary parts of the diagonal elements need not be set, they are assumed to be zero, and on exit they are set to zero.
[in]lddcINTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_ctrsm ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  m,
magma_int_t  n,
magmaFloatComplex  alpha,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
magmaFloatComplex_ptr  dB,
magma_int_t  lddb 
)
void magmablas_ctrsm_batched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  m,
magma_int_t  n,
magmaFloatComplex  alpha,
magmaFloatComplex **  dA_array,
magma_int_t  ldda,
magmaFloatComplex **  dB_array,
magma_int_t  lddb,
magma_int_t  batchCount,
magma_queue_t  queue 
)

ctrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B, or X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]mINTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]nINTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]alphaCOMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
[in]myhandlecublasHandle_t Handlde to use cuBLAS routines
void magmablas_ctrsm_outofplace ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  m,
magma_int_t  n,
magmaFloatComplex  alpha,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
magmaFloatComplex_ptr  dB,
magma_int_t  lddb,
magmaFloatComplex_ptr  dX,
magma_int_t  lddx,
magma_int_t  flag,
magmaFloatComplex_ptr  d_dinvA,
magma_int_t  dinvA_length 
)
void magmablas_ctrsm_outofplace_batched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  flag,
magma_int_t  m,
magma_int_t  n,
magmaFloatComplex  alpha,
magmaFloatComplex **  dA_array,
magma_int_t  ldda,
magmaFloatComplex **  dB_array,
magma_int_t  lddb,
magmaFloatComplex **  dX_array,
magma_int_t  lddx,
magmaFloatComplex **  dinvA_array,
magma_int_t  dinvA_length,
magmaFloatComplex **  dA_displ,
magmaFloatComplex **  dB_displ,
magmaFloatComplex **  dX_displ,
magmaFloatComplex **  dinvA_displ,
magma_int_t  resetozero,
magma_int_t  batchCount,
magma_queue_t  queue 
)

ctrsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B, or X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]nINTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]alphaCOMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/TRI_NB)*TRI_NB*TRI_NB, If side == MagmaRight, dinvA must be of size >= ceil(n/TRI_NB)*TRI_NB*TRI_NB, where TRI_NB = 128.
[in]dinvA_lengthINTEGER The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by CTRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
[in]myhandlecublasHandle_t Handlde to use cuBLAS routines
void magmablas_ctrsm_outofplace_q ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  m,
magma_int_t  n,
magmaFloatComplex  alpha,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
magmaFloatComplex_ptr  dB,
magma_int_t  lddb,
magmaFloatComplex_ptr  dX,
magma_int_t  lddx,
magma_int_t  flag,
magmaFloatComplex_ptr  d_dinvA,
magma_int_t  dinvA_length,
magma_queue_t  queue 
)

ctrsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B, or X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is output.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]mINTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]nINTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]alphaCOMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dACOMPLEX array of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in]dBCOMPLEX array of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B. On exit, contents in the leading m by n part are destroyed.
[in]lddbINTEGER. On entry, lddb specifies the first dimension of B. lddb >= max( 1, m ).
[out]dXCOMPLEX array of dimension ( lddx, n ). On exit, it contains the m by n solution matrix X.
[in]lddxINTEGER. On entry, lddx specifies the first dimension of X. lddx >= max( 1, m ).
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
d_dinvA(workspace) on device. If side == MagmaLeft, d_dinvA must be of size dinvA_length >= ceil(m/NB)*NB*NB, If side == MagmaRight, d_dinvA must be of size dinvA_length >= ceil(n/NB)*NB*NB, where NB = 128.
[in]dinvA_lengthINTEGER. On entry, dinvA_length specifies the size of d_dinvA.
void magmablas_ctrsm_q ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  m,
magma_int_t  n,
magmaFloatComplex  alpha,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
magmaFloatComplex_ptr  dB,
magma_int_t  lddb,
magma_queue_t  queue 
)

Similar to magmablas_ctrsm_outofplace, but allocates dX and d_dinvA internally.

This makes it a synchronous call, whereas magmablas_ctrsm_outofplace and magmablas_ctrsm_work are asynchronous.

See also
magmablas_ctrsm_work
void magmablas_ctrsm_work ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  m,
magma_int_t  n,
magmaFloatComplex  alpha,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
magmaFloatComplex_ptr  dB,
magma_int_t  lddb,
magmaFloatComplex_ptr  dX,
magma_int_t  lddx,
magma_int_t  flag,
magmaFloatComplex_ptr  d_dinvA,
magma_int_t  dinvA_length 
)
void magmablas_ctrsm_work_batched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  flag,
magma_int_t  m,
magma_int_t  n,
magmaFloatComplex  alpha,
magmaFloatComplex **  dA_array,
magma_int_t  ldda,
magmaFloatComplex **  dB_array,
magma_int_t  lddb,
magmaFloatComplex **  dX_array,
magma_int_t  lddx,
magmaFloatComplex **  dinvA_array,
magma_int_t  dinvA_length,
magmaFloatComplex **  dA_displ,
magmaFloatComplex **  dB_displ,
magmaFloatComplex **  dX_displ,
magmaFloatComplex **  dinvA_displ,
magma_int_t  resetozero,
magma_int_t  batchCount,
magma_queue_t  queue 
)

ctrsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B, or X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
[in]sidemagma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]transAmagma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]flagBOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
[in]mINTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in]nINTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in]alphaCOMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in,out]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
On exit, the solution matrix X
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ).
[in,out]dX_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X
[in]lddxINTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ).
dinvA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/TRI_NB)*TRI_NB*TRI_NB, If side == MagmaRight, dinvA must be of size >= ceil(n/TRI_NB)*TRI_NB*TRI_NB, where TRI_NB = 128.
[in]dinvA_lengthINTEGER The size of each workspace matrix dinvA
dA_displ(workspace) Array of pointers, dimension (batchCount).
dB_displ(workspace) Array of pointers, dimension (batchCount).
dX_displ(workspace) Array of pointers, dimension (batchCount).
dinvA_displ(workspace) Array of pointers, dimension (batchCount).
[in]resetozeroINTEGER Used internally by CTRTRI_DIAG routine
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
[in]myhandlecublasHandle_t Handlde to use cuBLAS routines
void magmablas_ctrsm_work_q ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  m,
magma_int_t  n,
magmaFloatComplex  alpha,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
magmaFloatComplex_ptr  dB,
magma_int_t  lddb,
magmaFloatComplex_ptr  dX,
magma_int_t  lddx,
magma_int_t  flag,
magmaFloatComplex_ptr  d_dinvA,
magma_int_t  dinvA_length,
magma_queue_t  queue 
)

Similar to magmablas_ctrsm_outofplace, but copies result dX back to dB, as in classical ctrsm interface.

See also
magmablas_ctrsm_outofplace
void magmablas_ctrtri_diag ( magma_uplo_t  uplo,
magma_diag_t  diag,
magma_int_t  n,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
magmaFloatComplex_ptr  d_dinvA 
)
void magmablas_ctrtri_diag_batched ( magma_uplo_t  uplo,
magma_diag_t  diag,
magma_int_t  n,
magmaFloatComplex const *const *  dA_array,
magma_int_t  ldda,
magmaFloatComplex **  dinvA_array,
magma_int_t  resetozero,
magma_int_t  batchCount,
magma_queue_t  queue 
)

CTRTRI_DIAG inverts the NB x NB diagonal blocks of a triangular matrix.

This routine is used in ctrsm.

Parameters
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]nINTEGER. On entry, n specifies the order of the matrix A. N >= 0.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( ldda, n ) The triangular matrix A.
If UPLO = MagmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced.
If UPLO = MagmaLower, the leading N-by-N lower triangular part of A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced.
If DIAG = MagmaUnit, the diagonal elements of A are also not referenced and are assumed to be 1.
[in]lddaINTEGER. The leading dimension of each array A. LDDA >= max(1,N).
[out]dinvA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array dinvA of dimension (NB, ceil(n/NB)*NB), where NB = 128. On exit, contains inverses of the NB-by-NB diagonal blocks of A.
[in]resetozeroINTEGER If not zero, each array dinvA will be reset to all zeros
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_ctrtri_diag_q ( magma_uplo_t  uplo,
magma_diag_t  diag,
magma_int_t  n,
magmaFloatComplex_const_ptr  dA,
magma_int_t  ldda,
magmaFloatComplex_ptr  d_dinvA,
magma_queue_t  queue 
)

Inverts the NB x NB diagonal blocks of a triangular matrix.

This routine is used in ctrsm.

Same as ctrtri_diag, but adds queue argument.

ctrtri_diag inverts the NB x NB diagonal blocks of A.

Parameters
[in]uplomagma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in]diagmagma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in]nINTEGER. On entry, n specifies the order of the matrix A. N >= 0.
[in]dACOMPLEX array of dimension ( ldda, n ) The triangular matrix A.
If UPLO = MagmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced.
If UPLO = MagmaLower, the leading N-by-N lower triangular part of A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced.
If DIAG = MagmaUnit, the diagonal elements of A are also not referenced and are assumed to be 1.
[in]lddaINTEGER. The leading dimension of the array A. LDDA >= max(1,N).
[out]d_dinvACOMPLEX array of dimension (NB, ceil(n/NB)*NB), where NB = 128. On exit, contains inverses of the NB-by-NB diagonal blocks of A.
[in]queuemagma_queue_t Queue to execute in.