double-complex precision

Functions

magma_int_t magma_ztrsm_m (magma_int_t ngpu, magma_side_t side, magma_uplo_t uplo, magma_trans_t transa, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *B, magma_int_t ldb)
 ZTRSM solves one of the matrix equations op( A )*X = alpha*B, or X*op( A ) = alpha*B, where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op( A ) is one of.
void magma_zgemm (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC, magma_int_t lddc)
 Perform matrix-matrix product, $ C = \alpha op(A) op(B) + \beta C $.
void magma_zsymm (magma_side_t side, magma_uplo_t uplo, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC, magma_int_t lddc)
 Perform symmetric matrix-matrix product.
void magma_zsyrk (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC, magma_int_t lddc)
 Perform symmetric rank-k update.
void magma_zsyr2k (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC, magma_int_t lddc)
 Perform symmetric rank-2k update.
void magma_zhemm (magma_side_t side, magma_uplo_t uplo, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC, magma_int_t lddc)
 Perform Hermitian matrix-matrix product.
void magma_zherk (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, double alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, double beta, magmaDoubleComplex_ptr dC, magma_int_t lddc)
 Perform Hermitian rank-k update.
void magma_zher2k (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, double beta, magmaDoubleComplex_ptr dC, magma_int_t lddc)
 Perform Hermitian rank-2k update.
void magma_ztrmm (magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr dB, magma_int_t lddb)
 Perform triangular matrix-matrix product.
void magma_ztrsm (magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr dB, magma_int_t lddb)
 Solve triangular matrix-matrix system (multiple right-hand sides).
void magmablas_zgemm_batched (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex const *const *dA_array, magma_int_t ldda, magmaDoubleComplex const *const *dB_array, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex **dC_array, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue)
 ZGEMM performs one of the matrix-matrix operations.
void magmablas_zgemm (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC, magma_int_t lddc)
 ZGEMM performs one of the matrix-matrix operations.
void magmablas_zgemm_batched_lg (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex const *const *dA_array, magma_int_t ldda, magmaDoubleComplex const *const *dB_array, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex **dC_array, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue)
 ZGEMM performs one of the matrix-matrix operations.
void magmablas_zgemm_batched_k32 (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex const *const *dA_array, magma_int_t ldda, magmaDoubleComplex const *const *dB_array, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex **dC_array, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue)
 ZGEMM performs one of the matrix-matrix operations.
void magmablas_zgemm_reduce (magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC, magma_int_t lddc)
 ZGEMM_REDUCE performs one of the matrix-matrix operations.
void magmablas_zher2k_mgpu2 (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex_ptr dA[], magma_int_t ldda, magma_int_t a_offset, magmaDoubleComplex_ptr dB[], magma_int_t lddb, magma_int_t b_offset, double beta, magmaDoubleComplex_ptr dC[], magma_int_t lddc, magma_int_t c_offset, magma_int_t ngpu, magma_int_t nb, magma_queue_t queues[][20], magma_int_t nqueue)
 ZHER2K performs one of the Hermitian rank 2k operations.
void magmablas_zher2k_mgpu_spec (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex_ptr dA[], magma_int_t ldda, magma_int_t a_offset, magmaDoubleComplex_ptr dB[], magma_int_t lddb, magma_int_t b_offset, double beta, magmaDoubleComplex_ptr dC[], magma_int_t lddc, magma_int_t c_offset, magma_int_t ngpu, magma_int_t nb, magma_queue_t queues[][20], magma_int_t nqueue)
 ZHER2K performs one of the Hermitian rank 2k operations.
void magmablas_zherk_batched (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, double alpha, magmaDoubleComplex const *const *dA_array, magma_int_t ldda, double beta, magmaDoubleComplex **dC_array, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue)
 ZHERK performs one of the hermitian rank k operations.
void magmablas_zherk_batched_lg (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, double alpha, magmaDoubleComplex const *const *dA_array, magma_int_t ldda, double beta, magmaDoubleComplex **dC_array, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue)
 ZHERK performs one of the hermitian rank k operations.
void magmablas_zherk_batched_k32 (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, double alpha, magmaDoubleComplex const *const *dA_array, magma_int_t ldda, double beta, magmaDoubleComplex **dC_array, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue)
 ZHERK performs one of the hermitian rank k operations.
void magmablas_ztrsm_outofplace (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr dB, magma_int_t lddb, magma_int_t flag, magmaDoubleComplex_ptr d_dinvA, magmaDoubleComplex_ptr dX)
 ztrsm_outofplace solves one of the matrix equations on gpu
void magmablas_ztrsm_work (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr dB, magma_int_t lddb, magma_int_t flag, magmaDoubleComplex_ptr d_dinvA, magmaDoubleComplex_ptr dX)
void magmablas_ztrsm (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr dB, magma_int_t lddb)
void magmablas_ztrsm_outofplace_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex **dA_array, magma_int_t ldda, magmaDoubleComplex **dB_array, magma_int_t lddb, magmaDoubleComplex **dX_array, magma_int_t lddx, magmaDoubleComplex **dinvA_array, magma_int_t dinvA_length, magmaDoubleComplex **dA_displ, magmaDoubleComplex **dB_displ, magmaDoubleComplex **dX_displ, magmaDoubleComplex **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
 ztrsm_work solves one of the matrix equations on gpu
void magmablas_ztrsm_work_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex **dA_array, magma_int_t ldda, magmaDoubleComplex **dB_array, magma_int_t lddb, magmaDoubleComplex **dX_array, magma_int_t lddx, magmaDoubleComplex **dinvA_array, magma_int_t dinvA_length, magmaDoubleComplex **dA_displ, magmaDoubleComplex **dB_displ, magmaDoubleComplex **dX_displ, magmaDoubleComplex **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
void magmablas_ztrsm_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex **dA_array, magma_int_t ldda, magmaDoubleComplex **dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue)
void magmablas_ztrtri_diag_q (magma_uplo_t uplo, magma_diag_t diag, magma_int_t n, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr d_dinvA, magma_queue_t queue)
 Inverts the NB x NB diagonal blocks of a triangular matrix.
void magmablas_ztrtri_diag (magma_uplo_t uplo, magma_diag_t diag, magma_int_t n, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr d_dinvA)
void magmablas_ztrtri_diag_batched (magma_uplo_t uplo, magma_diag_t diag, magma_int_t n, magmaDoubleComplex const *const *dA_array, magma_int_t ldda, magmaDoubleComplex **dinvA_array, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
 Inverts the NB x NB diagonal blocks of a triangular matrix.

Function Documentation

void magma_zgemm ( magma_trans_t  transA,
magma_trans_t  transB,
magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
magmaDoubleComplex  alpha,
magmaDoubleComplex_const_ptr  dA,
magma_int_t  ldda,
magmaDoubleComplex_const_ptr  dB,
magma_int_t  lddb,
magmaDoubleComplex  beta,
magmaDoubleComplex_ptr  dC,
magma_int_t  lddc 
)

Perform matrix-matrix product, $ C = \alpha op(A) op(B) + \beta C $.

Parameters:
[in] transA Operation op(A) to perform on matrix A.
[in] transB Operation op(B) to perform on matrix B.
[in] m Number of rows of C and op(A). m >= 0.
[in] n Number of columns of C and op(B). n >= 0.
[in] k Number of columns of op(A) and rows of op(B). k >= 0.
[in] alpha Scalar $ \alpha $
[in] dA COMPLEX_16 array on GPU device. If transA == MagmaNoTrans, the m-by-k matrix A of dimension (ldda,k), ldda >= max(1,m);
otherwise, the k-by-m matrix A of dimension (ldda,m), ldda >= max(1,k).
[in] ldda Leading dimension of dA.
[in] dB COMPLEX_16 array on GPU device. If transB == MagmaNoTrans, the k-by-n matrix B of dimension (lddb,n), lddb >= max(1,k);
otherwise, the n-by-k matrix B of dimension (lddb,k), lddb >= max(1,n).
[in] lddb Leading dimension of dB.
[in] beta Scalar $ \beta $
[in,out] dC COMPLEX_16 array on GPU device. The m-by-n matrix C of dimension (lddc,n), lddc >= max(1,m).
[in] lddc Leading dimension of dC.
void magma_zhemm ( magma_side_t  side,
magma_uplo_t  uplo,
magma_int_t  m,
magma_int_t  n,
magmaDoubleComplex  alpha,
magmaDoubleComplex_const_ptr  dA,
magma_int_t  ldda,
magmaDoubleComplex_const_ptr  dB,
magma_int_t  lddb,
magmaDoubleComplex  beta,
magmaDoubleComplex_ptr  dC,
magma_int_t  lddc 
)

Perform Hermitian matrix-matrix product.

$ C = \alpha A B + \beta C $ (side == MagmaLeft), or
$ C = \alpha B A + \beta C $ (side == MagmaRight),
where $ A $ is Hermitian.

Parameters:
[in] side Whether A is on the left or right.
[in] uplo Whether the upper or lower triangle of A is referenced.
[in] m Number of rows of C. m >= 0.
[in] n Number of columns of C. n >= 0.
[in] alpha Scalar $ \alpha $
[in] dA COMPLEX_16 array on GPU device. If side == MagmaLeft, the m-by-m Hermitian matrix A of dimension (ldda,m), ldda >= max(1,m);
otherwise, the n-by-n Hermitian matrix A of dimension (ldda,n), ldda >= max(1,n).
[in] ldda Leading dimension of dA.
[in] dB COMPLEX_16 array on GPU device. The m-by-n matrix B of dimension (lddb,n), lddb >= max(1,m).
[in] lddb Leading dimension of dB.
[in] beta Scalar $ \beta $
[in,out] dC COMPLEX_16 array on GPU device. The m-by-n matrix C of dimension (lddc,n), lddc >= max(1,m).
[in] lddc Leading dimension of dC.
void magma_zher2k ( magma_uplo_t  uplo,
magma_trans_t  trans,
magma_int_t  n,
magma_int_t  k,
magmaDoubleComplex  alpha,
magmaDoubleComplex_const_ptr  dA,
magma_int_t  ldda,
magmaDoubleComplex_const_ptr  dB,
magma_int_t  lddb,
double  beta,
magmaDoubleComplex_ptr  dC,
magma_int_t  lddc 
)

Perform Hermitian rank-2k update.

$ C = \alpha A B^T + \alpha B A^T \beta C $ (trans == MagmaNoTrans), or
$ C = \alpha A^T B + \alpha B^T A \beta C $ (trans == MagmaTrans),
where $ C $ is Hermitian.

Parameters:
[in] uplo Whether the upper or lower triangle of C is referenced.
[in] trans Operation to perform on A and B.
[in] n Number of rows and columns of C. n >= 0.
[in] k Number of columns of A and B (for MagmaNoTrans) or rows of A and B (for MagmaTrans). k >= 0.
[in] alpha Scalar $ \alpha $
[in] dA COMPLEX_16 array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix A of dimension (ldda,k), ldda >= max(1,n);
otherwise, the k-by-n matrix A of dimension (ldda,n), ldda >= max(1,k).
[in] ldda Leading dimension of dA.
[in] dB COMPLEX_16 array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix B of dimension (lddb,k), lddb >= max(1,n);
otherwise, the k-by-n matrix B of dimension (lddb,n), lddb >= max(1,k).
[in] lddb Leading dimension of dB.
[in] beta Scalar $ \beta $
[in,out] dC COMPLEX_16 array on GPU device. The n-by-n Hermitian matrix C of dimension (lddc,n), lddc >= max(1,n).
[in] lddc Leading dimension of dC.
void magma_zherk ( magma_uplo_t  uplo,
magma_trans_t  trans,
magma_int_t  n,
magma_int_t  k,
double  alpha,
magmaDoubleComplex_const_ptr  dA,
magma_int_t  ldda,
double  beta,
magmaDoubleComplex_ptr  dC,
magma_int_t  lddc 
)

Perform Hermitian rank-k update.

$ C = \alpha A A^T + \beta C $ (trans == MagmaNoTrans), or
$ C = \alpha A^T A + \beta C $ (trans == MagmaTrans),
where $ C $ is Hermitian.

Parameters:
[in] uplo Whether the upper or lower triangle of C is referenced.
[in] trans Operation to perform on A.
[in] n Number of rows and columns of C. n >= 0.
[in] k Number of columns of A (for MagmaNoTrans) or rows of A (for MagmaTrans). k >= 0.
[in] alpha Scalar $ \alpha $
[in] dA COMPLEX_16 array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix A of dimension (ldda,k), ldda >= max(1,n);
otherwise, the k-by-n matrix A of dimension (ldda,n), ldda >= max(1,k).
[in] ldda Leading dimension of dA.
[in] beta Scalar $ \beta $
[in,out] dC COMPLEX_16 array on GPU device. The n-by-n Hermitian matrix C of dimension (lddc,n), lddc >= max(1,n).
[in] lddc Leading dimension of dC.
void magma_zsymm ( magma_side_t  side,
magma_uplo_t  uplo,
magma_int_t  m,
magma_int_t  n,
magmaDoubleComplex  alpha,
magmaDoubleComplex_const_ptr  dA,
magma_int_t  ldda,
magmaDoubleComplex_const_ptr  dB,
magma_int_t  lddb,
magmaDoubleComplex  beta,
magmaDoubleComplex_ptr  dC,
magma_int_t  lddc 
)

Perform symmetric matrix-matrix product.

$ C = \alpha A B + \beta C $ (side == MagmaLeft), or
$ C = \alpha B A + \beta C $ (side == MagmaRight),
where $ A $ is symmetric.

Parameters:
[in] side Whether A is on the left or right.
[in] uplo Whether the upper or lower triangle of A is referenced.
[in] m Number of rows of C. m >= 0.
[in] n Number of columns of C. n >= 0.
[in] alpha Scalar $ \alpha $
[in] dA COMPLEX_16 array on GPU device. If side == MagmaLeft, the m-by-m symmetric matrix A of dimension (ldda,m), ldda >= max(1,m);
otherwise, the n-by-n symmetric matrix A of dimension (ldda,n), ldda >= max(1,n).
[in] ldda Leading dimension of dA.
[in] dB COMPLEX_16 array on GPU device. The m-by-n matrix B of dimension (lddb,n), lddb >= max(1,m).
[in] lddb Leading dimension of dB.
[in] beta Scalar $ \beta $
[in,out] dC COMPLEX_16 array on GPU device. The m-by-n matrix C of dimension (lddc,n), lddc >= max(1,m).
[in] lddc Leading dimension of dC.
void magma_zsyr2k ( magma_uplo_t  uplo,
magma_trans_t  trans,
magma_int_t  n,
magma_int_t  k,
magmaDoubleComplex  alpha,
magmaDoubleComplex_const_ptr  dA,
magma_int_t  ldda,
magmaDoubleComplex_const_ptr  dB,
magma_int_t  lddb,
magmaDoubleComplex  beta,
magmaDoubleComplex_ptr  dC,
magma_int_t  lddc 
)

Perform symmetric rank-2k update.

$ C = \alpha A B^T + \alpha B A^T \beta C $ (trans == MagmaNoTrans), or
$ C = \alpha A^T B + \alpha B^T A \beta C $ (trans == MagmaTrans),
where $ C $ is symmetric.

Parameters:
[in] uplo Whether the upper or lower triangle of C is referenced.
[in] trans Operation to perform on A and B.
[in] n Number of rows and columns of C. n >= 0.
[in] k Number of columns of A and B (for MagmaNoTrans) or rows of A and B (for MagmaTrans). k >= 0.
[in] alpha Scalar $ \alpha $
[in] dA COMPLEX_16 array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix A of dimension (ldda,k), ldda >= max(1,n);
otherwise, the k-by-n matrix A of dimension (ldda,n), ldda >= max(1,k).
[in] ldda Leading dimension of dA.
[in] dB COMPLEX_16 array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix B of dimension (lddb,k), lddb >= max(1,n);
otherwise, the k-by-n matrix B of dimension (lddb,n), lddb >= max(1,k).
[in] lddb Leading dimension of dB.
[in] beta Scalar $ \beta $
[in,out] dC COMPLEX_16 array on GPU device. The n-by-n symmetric matrix C of dimension (lddc,n), lddc >= max(1,n).
[in] lddc Leading dimension of dC.
void magma_zsyrk ( magma_uplo_t  uplo,
magma_trans_t  trans,
magma_int_t  n,
magma_int_t  k,
magmaDoubleComplex  alpha,
magmaDoubleComplex_const_ptr  dA,
magma_int_t  ldda,
magmaDoubleComplex  beta,
magmaDoubleComplex_ptr  dC,
magma_int_t  lddc 
)

Perform symmetric rank-k update.

$ C = \alpha A A^T + \beta C $ (trans == MagmaNoTrans), or
$ C = \alpha A^T A + \beta C $ (trans == MagmaTrans),
where $ C $ is symmetric.

Parameters:
[in] uplo Whether the upper or lower triangle of C is referenced.
[in] trans Operation to perform on A.
[in] n Number of rows and columns of C. n >= 0.
[in] k Number of columns of A (for MagmaNoTrans) or rows of A (for MagmaTrans). k >= 0.
[in] alpha Scalar $ \alpha $
[in] dA COMPLEX_16 array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix A of dimension (ldda,k), ldda >= max(1,n);
otherwise, the k-by-n matrix A of dimension (ldda,n), ldda >= max(1,k).
[in] ldda Leading dimension of dA.
[in] beta Scalar $ \beta $
[in,out] dC COMPLEX_16 array on GPU device. The n-by-n symmetric matrix C of dimension (lddc,n), lddc >= max(1,n).
[in] lddc Leading dimension of dC.
void magma_ztrmm ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  trans,
magma_diag_t  diag,
magma_int_t  m,
magma_int_t  n,
magmaDoubleComplex  alpha,
magmaDoubleComplex_const_ptr  dA,
magma_int_t  ldda,
magmaDoubleComplex_ptr  dB,
magma_int_t  lddb 
)

Perform triangular matrix-matrix product.

$ B = \alpha op(A) B $ (side == MagmaLeft), or
$ B = \alpha B op(A) $ (side == MagmaRight),
where $ A $ is triangular.

Parameters:
[in] side Whether A is on the left or right.
[in] uplo Whether A is upper or lower triangular.
[in] trans Operation to perform on A.
[in] diag Whether the diagonal of A is assumed to be unit or non-unit.
[in] m Number of rows of B. m >= 0.
[in] n Number of columns of B. n >= 0.
[in] alpha Scalar $ \alpha $
[in] dA COMPLEX_16 array on GPU device. If side == MagmaLeft, the n-by-n triangular matrix A of dimension (ldda,n), ldda >= max(1,n);
otherwise, the m-by-m triangular matrix A of dimension (ldda,m), ldda >= max(1,m).
[in] ldda Leading dimension of dA.
[in] dB COMPLEX_16 array on GPU device. The m-by-n matrix B of dimension (lddb,n), lddb >= max(1,m).
[in] lddb Leading dimension of dB.
void magma_ztrsm ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  trans,
magma_diag_t  diag,
magma_int_t  m,
magma_int_t  n,
magmaDoubleComplex  alpha,
magmaDoubleComplex_const_ptr  dA,
magma_int_t  ldda,
magmaDoubleComplex_ptr  dB,
magma_int_t  lddb 
)

Solve triangular matrix-matrix system (multiple right-hand sides).

$ op(A) X = \alpha B $ (side == MagmaLeft), or
$ X op(A) = \alpha B $ (side == MagmaRight),
where $ A $ is triangular.

Parameters:
[in] side Whether A is on the left or right.
[in] uplo Whether A is upper or lower triangular.
[in] trans Operation to perform on A.
[in] diag Whether the diagonal of A is assumed to be unit or non-unit.
[in] m Number of rows of B. m >= 0.
[in] n Number of columns of B. n >= 0.
[in] alpha Scalar $ \alpha $
[in] dA COMPLEX_16 array on GPU device. If side == MagmaLeft, the m-by-m triangular matrix A of dimension (ldda,m), ldda >= max(1,m);
otherwise, the n-by-n triangular matrix A of dimension (ldda,n), ldda >= max(1,n).
[in] ldda Leading dimension of dA.
[in,out] dB COMPLEX_16 array on GPU device. On entry, m-by-n matrix B of dimension (lddb,n), lddb >= max(1,m). On exit, overwritten with the solution matrix X.
[in] lddb Leading dimension of dB.
magma_int_t magma_ztrsm_m ( magma_int_t  ngpu,
magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transa,
magma_diag_t  diag,
magma_int_t  m,
magma_int_t  n,
magmaDoubleComplex  alpha,
magmaDoubleComplex *  A,
magma_int_t  lda,
magmaDoubleComplex *  B,
magma_int_t  ldb 
)

ZTRSM solves one of the matrix equations op( A )*X = alpha*B, or X*op( A ) = alpha*B, where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op( A ) is one of.

op( A ) = A or op( A ) = A**T or op( A ) = A**H.

The matrix X is overwritten on B.

Parameters:
[in] ngpu INTEGER Number of GPUs to use. ngpu > 0.
[in] side magma_side_t. On entry, SIDE specifies whether op( A ) appears on the left or right of X as follows:

  • = MagmaLeft: op( A )*X = alpha*B.
  • = MagmaRight: X*op( A ) = alpha*B.
[in] uplo magma_uplo_t. On entry, UPLO specifies whether the matrix A is an upper or lower triangular matrix as follows:

  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in] transa magma_trans_t. On entry, TRANSA specifies the form of op( A ) to be used in the matrix multiplication as follows:

  • = MagmaNoTrans: op( A ) = A.
  • = MagmaTrans: op( A ) = A**T.
  • = MagmaConjTrans: op( A ) = A**H.
[in] diag magma_diag_t. On entry, DIAG specifies whether or not A is unit triangular as follows:

  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in] m INTEGER. On entry, M specifies the number of rows of B. M must be at least zero.
[in] n INTEGER. On entry, N specifies the number of columns of B. N must be at least zero.
[in] alpha COMPLEX_16. On entry, ALPHA specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in] A COMPLEX_16 array of DIMENSION ( LDA, k ), where k is m when SIDE = MagmaLeft and is n when SIDE = MagmaRight. Before entry with UPLO = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with UPLO = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when DIAG = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in] lda INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When SIDE = MagmaLeft then LDA >= max( 1, m ), when SIDE = MagmaRight then LDA >= max( 1, n ).
[in,out] B COMPLEX_16 array of DIMENSION ( LDB, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B, and on exit is overwritten by the solution matrix X.
[in] ldb INTEGER. On entry, LDB specifies the first dimension of B as declared in the calling (sub) program. LDB must be at least max( 1, m ).
void magmablas_zgemm ( magma_trans_t  transA,
magma_trans_t  transB,
magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
magmaDoubleComplex  alpha,
magmaDoubleComplex_const_ptr  dA,
magma_int_t  ldda,
magmaDoubleComplex_const_ptr  dB,
magma_int_t  lddb,
magmaDoubleComplex  beta,
magmaDoubleComplex_ptr  dC,
magma_int_t  lddc 
)

ZGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X or op( X ) = X**T or op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters ----------

Parameters:
[in] transA CHARACTER*1. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:

  • = 'N': op( A ) = A.
  • = 'T': op( A ) = A**T.
  • = 'C': op( A ) = A**H.
[in] transB CHARACTER*1. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:

  • = 'N': op( B ) = B.
  • = 'T': op( B ) = B**T.
  • = 'C': op( B ) = B**H.
[in] m INTEGER. On entry, M specifies the number of rows of the matrix op( dA ) and of the matrix dC. M must be at least zero.
[in] n INTEGER. On entry, N specifies the number of columns of the matrix op( dB ) and the number of columns of the matrix dC. N must be at least zero.
[in] k INTEGER. On entry, K specifies the number of columns of the matrix op( dA ) and the number of rows of the matrix op( dB ). K must be at least zero.
[in] alpha COMPLEX_16 On entry, ALPHA specifies the scalar alpha.
[in] dA COMPLEX_16 array of DIMENSION ( LDA, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array dA must contain the matrix dA, otherwise the leading k by m part of the array dA must contain the matrix dA.
[in] ldda INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When transA = MagmaNoTrans then LDA must be at least max( 1, m ), otherwise LDA must be at least max( 1, k ).
[in] dB COMPLEX_16 array of DIMENSION ( LDB, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array dB must contain the matrix dB, otherwise the leading n by k part of the array dB must contain the matrix dB.
[in] lddb INTEGER. On entry, LDB specifies the first dimension of dB as declared in the calling (sub) program. When transB = MagmaNoTrans then LDB must be at least max( 1, k ), otherwise LDB must be at least max( 1, n ).
[in] beta COMPLEX_16. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then dC need not be set on input.
[in,out] dC COMPLEX_16 array of DIMENSION ( LDC, n ). Before entry, the leading m by n part of the array dC must contain the matrix dC, except when beta is zero, in which case dC need not be set on entry. On exit, the array dC is overwritten by the m by n matrix ( alpha*op( dA )*op( dB ) + beta*dC ).
[in] lddc INTEGER. On entry, LDC specifies the first dimension of dC as declared in the calling (sub) program. LDC must be at least max( 1, m ).
void magmablas_zgemm_batched ( magma_trans_t  transA,
magma_trans_t  transB,
magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
magmaDoubleComplex  alpha,
magmaDoubleComplex const *const *  dA_array,
magma_int_t  ldda,
magmaDoubleComplex const *const *  dB_array,
magma_int_t  lddb,
magmaDoubleComplex  beta,
magmaDoubleComplex **  dC_array,
magma_int_t  lddc,
magma_int_t  batchCount,
magma_queue_t  queue 
)

ZGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X or op( X ) = X**T or op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters ----------

Parameters:
[in] transA CHARACTER*1. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:

  • = 'N': op( A ) = A.
  • = 'T': op( A ) = A**T.
  • = 'C': op( A ) = A**H.
[in] transB CHARACTER*1. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:

  • = 'N': op( B ) = B.
  • = 'T': op( B ) = B**T.
  • = 'C': op( B ) = B**H.
[in] m INTEGER. On entry, M specifies the number of rows of the matrix op( dA ) and of the matrix dC. M must be at least zero.
[in] n INTEGER. On entry, N specifies the number of columns of the matrix op( dB ) and the number of columns of the matrix dC. N must be at least zero.
[in] k INTEGER. On entry, K specifies the number of columns of the matrix op( dA ) and the number of rows of the matrix op( dB ). K must be at least zero.
[in] alpha COMPLEX_16 On entry, ALPHA specifies the scalar alpha.
[in] dA COMPLEX_16 array of DIMENSION ( LDA, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array dA must contain the matrix dA, otherwise the leading k by m part of the array dA must contain the matrix dA.
[in] ldda INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When transA = MagmaNoTrans then LDA must be at least max( 1, m ), otherwise LDA must be at least max( 1, k ).
[in] dB COMPLEX_16 array of DIMENSION ( LDB, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array dB must contain the matrix dB, otherwise the leading n by k part of the array dB must contain the matrix dB.
[in] lddb INTEGER. On entry, LDB specifies the first dimension of dB as declared in the calling (sub) program. When transB = MagmaNoTrans then LDB must be at least max( 1, k ), otherwise LDB must be at least max( 1, n ).
[in] beta COMPLEX_16. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then dC need not be set on input.
[in,out] dC COMPLEX_16 array of DIMENSION ( LDC, n ). Before entry, the leading m by n part of the array dC must contain the matrix dC, except when beta is zero, in which case dC need not be set on entry. On exit, the array dC is overwritten by the m by n matrix ( alpha*op( dA )*op( dB ) + beta*dC ).
[in] lddc INTEGER. On entry, LDC specifies the first dimension of dC as declared in the calling (sub) program. LDC must be at least max( 1, m ).
void magmablas_zgemm_batched_k32 ( magma_trans_t  transA,
magma_trans_t  transB,
magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
magmaDoubleComplex  alpha,
magmaDoubleComplex const *const *  dA_array,
magma_int_t  ldda,
magmaDoubleComplex const *const *  dB_array,
magma_int_t  lddb,
magmaDoubleComplex  beta,
magmaDoubleComplex **  dC_array,
magma_int_t  lddc,
magma_int_t  batchCount,
magma_queue_t  queue 
)

ZGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X or op( X ) = X**T or op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters ----------

Parameters:
[in] transA CHARACTER*1. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:

  • = 'N': op( A ) = A.
  • = 'T': op( A ) = A**T.
  • = 'C': op( A ) = A**H.
[in] transB CHARACTER*1. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:

  • = 'N': op( B ) = B.
  • = 'T': op( B ) = B**T.
  • = 'C': op( B ) = B**H.
[in] m INTEGER. On entry, M specifies the number of rows of the matrix op( dA ) and of the matrix dC. M must be at least zero.
[in] n INTEGER. On entry, N specifies the number of columns of the matrix op( dB ) and the number of columns of the matrix dC. N must be at least zero.
[in] k INTEGER. On entry, K specifies the number of columns of the matrix op( dA ) and the number of rows of the matrix op( dB ). K must be at least zero.
[in] alpha COMPLEX_16 On entry, ALPHA specifies the scalar alpha.
[in] dA COMPLEX_16 array of DIMENSION ( LDA, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array dA must contain the matrix dA, otherwise the leading k by m part of the array dA must contain the matrix dA.
[in] ldda INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When transA = MagmaNoTrans then LDA must be at least max( 1, m ), otherwise LDA must be at least max( 1, k ).
[in] dB COMPLEX_16 array of DIMENSION ( LDB, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array dB must contain the matrix dB, otherwise the leading n by k part of the array dB must contain the matrix dB.
[in] lddb INTEGER. On entry, LDB specifies the first dimension of dB as declared in the calling (sub) program. When transB = MagmaNoTrans then LDB must be at least max( 1, k ), otherwise LDB must be at least max( 1, n ).
[in] beta COMPLEX_16. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then dC need not be set on input.
[in,out] dC COMPLEX_16 array of DIMENSION ( LDC, n ). Before entry, the leading m by n part of the array dC must contain the matrix dC, except when beta is zero, in which case dC need not be set on entry. On exit, the array dC is overwritten by the m by n matrix ( alpha*op( dA )*op( dB ) + beta*dC ).
[in] lddc INTEGER. On entry, LDC specifies the first dimension of dC as declared in the calling (sub) program. LDC must be at least max( 1, m ).
void magmablas_zgemm_batched_lg ( magma_trans_t  transA,
magma_trans_t  transB,
magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
magmaDoubleComplex  alpha,
magmaDoubleComplex const *const *  dA_array,
magma_int_t  ldda,
magmaDoubleComplex const *const *  dB_array,
magma_int_t  lddb,
magmaDoubleComplex  beta,
magmaDoubleComplex **  dC_array,
magma_int_t  lddc,
magma_int_t  batchCount,
magma_queue_t  queue 
)

ZGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X or op( X ) = X**T or op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters ----------

Parameters:
[in] transA CHARACTER*1. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:

  • = 'N': op( A ) = A.
  • = 'T': op( A ) = A**T.
  • = 'C': op( A ) = A**H.
[in] transB CHARACTER*1. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:

  • = 'N': op( B ) = B.
  • = 'T': op( B ) = B**T.
  • = 'C': op( B ) = B**H.
[in] m INTEGER. On entry, M specifies the number of rows of the matrix op( dA ) and of the matrix dC. M must be at least zero.
[in] n INTEGER. On entry, N specifies the number of columns of the matrix op( dB ) and the number of columns of the matrix dC. N must be at least zero.
[in] k INTEGER. On entry, K specifies the number of columns of the matrix op( dA ) and the number of rows of the matrix op( dB ). K must be at least zero.
[in] alpha COMPLEX_16 On entry, ALPHA specifies the scalar alpha.
[in] dA COMPLEX_16 array of DIMENSION ( LDA, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array dA must contain the matrix dA, otherwise the leading k by m part of the array dA must contain the matrix dA.
[in] ldda INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When transA = MagmaNoTrans then LDA must be at least max( 1, m ), otherwise LDA must be at least max( 1, k ).
[in] dB COMPLEX_16 array of DIMENSION ( LDB, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array dB must contain the matrix dB, otherwise the leading n by k part of the array dB must contain the matrix dB.
[in] lddb INTEGER. On entry, LDB specifies the first dimension of dB as declared in the calling (sub) program. When transB = MagmaNoTrans then LDB must be at least max( 1, k ), otherwise LDB must be at least max( 1, n ).
[in] beta COMPLEX_16. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then dC need not be set on input.
[in,out] dC COMPLEX_16 array of DIMENSION ( LDC, n ). Before entry, the leading m by n part of the array dC must contain the matrix dC, except when beta is zero, in which case dC need not be set on entry. On exit, the array dC is overwritten by the m by n matrix ( alpha*op( dA )*op( dB ) + beta*dC ).
[in] lddc INTEGER. On entry, LDC specifies the first dimension of dC as declared in the calling (sub) program. LDC must be at least max( 1, m ).
void magmablas_zgemm_reduce ( magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
magmaDoubleComplex  alpha,
magmaDoubleComplex_const_ptr  dA,
magma_int_t  ldda,
magmaDoubleComplex_const_ptr  dB,
magma_int_t  lddb,
magmaDoubleComplex  beta,
magmaDoubleComplex_ptr  dC,
magma_int_t  lddc 
)

ZGEMM_REDUCE performs one of the matrix-matrix operations.

C := alpha*A^T*B + beta*C,

where alpha and beta are scalars, and A, B and C are matrices, with A a k-by-m matrix, B a k-by-n matrix, and C an m-by-n matrix.

This routine is tuned for m, n << k. Typically, m and n are expected to be less than 128.

void magmablas_zher2k_mgpu2 ( magma_uplo_t  uplo,
magma_trans_t  trans,
magma_int_t  n,
magma_int_t  k,
magmaDoubleComplex  alpha,
magmaDoubleComplex_ptr  dA[],
magma_int_t  ldda,
magma_int_t  a_offset,
magmaDoubleComplex_ptr  dB[],
magma_int_t  lddb,
magma_int_t  b_offset,
double  beta,
magmaDoubleComplex_ptr  dC[],
magma_int_t  lddc,
magma_int_t  c_offset,
magma_int_t  ngpu,
magma_int_t  nb,
magma_queue_t  queues[][20],
magma_int_t  nqueue 
)

ZHER2K performs one of the Hermitian rank 2k operations.

C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C,

or

C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C,

where alpha and beta are scalars with beta real, C is an n by n Hermitian matrix and A and B are n by k matrices in the first case and k by n matrices in the second case.

Parameters:
[in] uplo magma_uplo_t. On entry, UPLO specifies whether the upper or lower triangular part of the array C is to be referenced as follows:

  • = MagmaUpper: Only the upper triangular part of C is to be referenced.
  • = MagmaLower: Only the lower triangular part of C is to be referenced.

current only Lower case is implemented.

Parameters:
[in] trans magma_trans_t. On entry, TRANS specifies the operation to be performed as follows:

  • = MagmaNoTrans: C := alpha*A*B**H + conj( alpha )*B*A**H + beta*C.
  • = Magma_ConjTrans: C := alpha*A**H*B + conj( alpha )*B**H*A + beta*C.

current only NoTrans case is implemented.

Parameters:
[in] n INTEGER. On entry, N specifies the order of the matrix C. N must be at least zero.
[in] k INTEGER. On entry with TRANS = MagmaNoTrans, K specifies the number of columns of the matrices A and B, and on entry with TRANS = Magma_ConjTrans, K specifies the number of rows of the matrices A and B. K must be at least zero.
[in] alpha COMPLEX*16. On entry, ALPHA specifies the scalar alpha.
[in] dA COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is k when TRANS = MagmaNoTrans, and is n otherwise. Before entry with TRANS = MagmaNoTrans, the leading n by k part of the array A must contain the matrix A, otherwise the leading k by n part of the array A must contain the matrix A.

[TODO: describe distribution: duplicated on all GPUs.]

Parameters:
[in] ldda INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When TRANS = MagmaNoTrans then LDA must be at least max( 1, n ), otherwise LDA must be at least max( 1, k ).
[in] a_offset INTEGER Row offset to start sub-matrix of dA. Uses dA(a_offset:a_offset+n, :). 0 <= a_offset < ldda.
[in] dB COMPLEX*16 array of DIMENSION ( LDB, kb ), where kb is k when TRANS = MagmaNoTrans, and is n otherwise. Before entry with TRANS = MagmaNoTrans, the leading n by k part of the array B must contain the matrix B, otherwise the leading k by n part of the array B must contain the matrix B.

[TODO: describe distribution: duplicated on all GPUs.]

Parameters:
[in] lddb INTEGER. On entry, LDB specifies the first dimension of B as declared in the calling (sub) program. When TRANS = MagmaNoTrans then LDB must be at least max( 1, n ), otherwise LDB must be at least max( 1, k ).
[in] b_offset INTEGER Row offset to start sub-matrix of dB. Uses dB(b_offset:b_offset+n, :). 0 <= b_offset < lddb.
[in] beta DOUBLE PRECISION. On entry, BETA specifies the scalar beta.
[in,out] dC COMPLEX*16 array of DIMENSION ( LDC, n ). Before entry with UPLO = MagmaUpper, the leading n by n upper triangular part of the array C must contain the upper triangular part of the Hermitian matrix and the strictly lower triangular part of C is not referenced. On exit, the upper triangular part of the array C is overwritten by the upper triangular part of the updated matrix.
Before entry with UPLO = MagmaLower, the leading n by n lower triangular part of the array C must contain the lower triangular part of the Hermitian matrix and the strictly upper triangular part of C is not referenced. On exit, the lower triangular part of the array C is overwritten by the lower triangular part of the updated matrix.
Note that the imaginary parts of the diagonal elements need not be set, they are assumed to be zero, and on exit they are set to zero. [TODO: verify]

[TODO: describe distribution: 1D column block-cyclic across GPUs.]

Parameters:
[in] lddc INTEGER. On entry, LDC specifies the first dimension of C as declared in the calling (sub) program. LDC must be at least max( 1, n ).
[in] c_offset INTEGER. Row and column offset to start sub-matrix of dC. Uses dC(c_offset:c_offset+n, c_offset:c_offset+n). 0 <= c_offset < lddc.
[in] ngpu INTEGER. Number of GPUs over which matrix C is distributed.
[in] nb INTEGER. Block size used for distribution of C.
[in] queues array of CUDA queues, of dimension NGPU by 20. Streams to use for running multiple GEMMs in parallel. Only up to NSTREAM queues are used on each GPU.
[in] nqueue INTEGER. Number of queues to use on each device
void magmablas_zher2k_mgpu_spec ( magma_uplo_t  uplo,
magma_trans_t  trans,
magma_int_t  n,
magma_int_t  k,
magmaDoubleComplex  alpha,
magmaDoubleComplex_ptr  dA[],
magma_int_t  ldda,
magma_int_t  a_offset,
magmaDoubleComplex_ptr  dB[],
magma_int_t  lddb,
magma_int_t  b_offset,
double  beta,
magmaDoubleComplex_ptr  dC[],
magma_int_t  lddc,
magma_int_t  c_offset,
magma_int_t  ngpu,
magma_int_t  nb,
magma_queue_t  queues[][20],
magma_int_t  nqueue 
)

ZHER2K performs one of the Hermitian rank 2k operations.

C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C,

or

C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C,

where alpha and beta are scalars with beta real, C is an n by n Hermitian matrix and A and B are n by k matrices in the first case and k by n matrices in the second case.

This version assumes C has been symmetrized, so both upper and lower are stored, and it maintains the symmetry, doing twice the operations.

Parameters:
[in] uplo magma_uplo_t. On entry, UPLO specifies whether the upper or lower triangular part of the array C is to be referenced as follows:

  • = MagmaUpper: Only the upper triangular part of C is to be referenced.
  • = MagmaLower: Only the lower triangular part of C is to be referenced.

current only Lower case is implemented.

Parameters:
[in] trans magma_trans_t. On entry, TRANS specifies the operation to be performed as follows:

  • = MagmaNoTrans: C := alpha*A*B**H + conj( alpha )*B*A**H + beta*C.
  • = Magma_ConjTrans: C := alpha*A**H*B + conj( alpha )*B**H*A + beta*C.

current only NoTrans case is implemented.

Parameters:
[in] n INTEGER. On entry, N specifies the order of the matrix C. N must be at least zero.
[in] k INTEGER. On entry with TRANS = MagmaNoTrans, K specifies the number of columns of the matrices A and B, and on entry with TRANS = Magma_ConjTrans, K specifies the number of rows of the matrices A and B. K must be at least zero.
[in] alpha COMPLEX*16. On entry, ALPHA specifies the scalar alpha.
[in] dA COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is k when TRANS = MagmaNoTrans, and is n otherwise. Before entry with TRANS = MagmaNoTrans, the leading n by k part of the array A must contain the matrix A, otherwise the leading k by n part of the array A must contain the matrix A.

[TODO: describe distribution: duplicated on all GPUs.]

Parameters:
[in] ldda INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When TRANS = MagmaNoTrans then LDA must be at least max( 1, n ), otherwise LDA must be at least max( 1, k ).
[in] a_offset INTEGER Row offset to start sub-matrix of dA. Uses dA(a_offset:a_offset+n, :). 0 <= a_offset < ldda.
[in] dB COMPLEX*16 array of DIMENSION ( LDB, kb ), where kb is k when TRANS = MagmaNoTrans, and is n otherwise. Before entry with TRANS = MagmaNoTrans, the leading n by k part of the array B must contain the matrix B, otherwise the leading k by n part of the array B must contain the matrix B.

[TODO: describe distribution: duplicated on all GPUs.]

Parameters:
[in] lddb INTEGER. On entry, LDB specifies the first dimension of B as declared in the calling (sub) program. When TRANS = MagmaNoTrans then LDB must be at least max( 1, n ), otherwise LDB must be at least max( 1, k ).
[in] b_offset INTEGER Row offset to start sub-matrix of dB. Uses dB(b_offset:b_offset+n, :). 0 <= b_offset < lddb.
[in] beta DOUBLE PRECISION. On entry, BETA specifies the scalar beta.
[in,out] dC COMPLEX*16 array of DIMENSION ( LDC, n ). Before entry with UPLO = MagmaUpper, the leading n by n upper triangular part of the array C must contain the upper triangular part of the Hermitian matrix and the strictly lower triangular part of C is not referenced. On exit, the upper triangular part of the array C is overwritten by the upper triangular part of the updated matrix.
Before entry with UPLO = MagmaLower, the leading n by n lower triangular part of the array C must contain the lower triangular part of the Hermitian matrix and the strictly upper triangular part of C is not referenced. On exit, the lower triangular part of the array C is overwritten by the lower triangular part of the updated matrix.
Note that the imaginary parts of the diagonal elements need not be set, they are assumed to be zero, and on exit they are set to zero. [TODO: verify]

[TODO: describe distribution: 1D column block-cyclic across GPUs.]

Parameters:
[in] lddc INTEGER. On entry, LDC specifies the first dimension of C as declared in the calling (sub) program. LDC must be at least max( 1, n ).
[in] c_offset INTEGER. Row and column offset to start sub-matrix of dC. Uses dC(c_offset:c_offset+n, c_offset:c_offset+n). 0 <= c_offset < lddc.
[in] ngpu INTEGER. Number of GPUs over which matrix C is distributed.
[in] nb INTEGER. Block size used for distribution of C.
[in] queues array of CUDA queues, of dimension NGPU by 20. Streams to use for running multiple GEMMs in parallel. Only up to NSTREAM queues are used on each GPU.
[in] nqueue INTEGER. Number of queues to use on each device
void magmablas_zherk_batched ( magma_uplo_t  uplo,
magma_trans_t  trans,
magma_int_t  n,
magma_int_t  k,
double  alpha,
magmaDoubleComplex const *const *  dA_array,
magma_int_t  ldda,
double  beta,
magmaDoubleComplex **  dC_array,
magma_int_t  lddc,
magma_int_t  batchCount,
magma_queue_t  queue 
)

ZHERK performs one of the hermitian rank k operations.

C := alpha*A*A**H + beta*C,

or

C := alpha*A**H*A + beta*C,

where alpha and beta are real scalars, C is an n by n hermitian matrix and A is an n by k matrix in the first case and a k by n matrix in the second case.

Parameters ----------

Parameters:
[in] uplo CHARACTER*1. On entry, uplo specifies whether the upper or lower triangular part of the array C is to be referenced as follows:

uplo = 'U' or 'u' Only the upper triangular part of C is to be referenced.

uplo = 'L' or 'l' Only the lower triangular part of C is to be referenced.

Parameters:
[in] trans CHARACTER*1. On entry, trans specifies the operation to be performed as follows:

trans = 'N' or 'n' C := alpha*A*A**H + beta*C.

trans = 'C' or 'c' C := alpha*A**H*A + beta*C.

Parameters:
[in] n INTEGER. On entry, specifies the order of the matrix C. N must be at least zero.
[in] k INTEGER. On entry with trans = 'N' or 'n', k specifies the number of columns of the matrix A, and on entry with trans = 'C' or 'c', k specifies the number of rows of the matrix A. K must be at least zero.
[in] alpha DOUBLE PRECISION On entry, ALPHA specifies the scalar alpha.
[in] dA COMPLEX_16 array of DIMENSION ( ldda, ka ), where ka is k when trans = MagmaNoTrans, and is n otherwise. Before entry with trans = MagmaNoTrans, the leading m by k part of the array dA must contain the matrix dA, otherwise the leading k by m part of the array dA must contain the matrix dA.
[in] ldda INTEGER. On entry, ldda specifies the first dimension of A as declared in the calling (sub) program. When trans = MagmaNoTrans then ldda must be at least max( 1, n ), otherwise ldda must be at least max( 1, k ).
[in] beta DOUBLE PRECISION. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then dC need not be set on input.
[in,out] dC COMPLEX_16 array of DIMENSION ( lddc, n ). Before entry with uplo = 'U' or 'u', the leading n by n upper triangular part of the array C must contain the upper triangular part of the hermitian matrix and the strictly lower triangular part of C is not referenced. On exit, the upper triangular part of the array C is overwritten by the upper triangular part of the updated matrix. Before entry with uplo = 'L' or 'l', the leading n by n lower triangular part of the array C must contain the lower triangular part of the hermitian matrix and the strictly upper triangular part of C is not referenced. On exit, the lower triangular part of the array C is overwritten by the lower triangular part of the updated matrix. Note that the imaginary parts of the diagonal elements need not be set, they are assumed to be zero, and on exit they are set to zero.
[in] lddc INTEGER. On entry, lddc specifies the first dimension of dC as declared in the calling (sub) program. lddc must be at least max( 1, m ).
void magmablas_zherk_batched_k32 ( magma_uplo_t  uplo,
magma_trans_t  trans,
magma_int_t  n,
magma_int_t  k,
double  alpha,
magmaDoubleComplex const *const *  dA_array,
magma_int_t  ldda,
double  beta,
magmaDoubleComplex **  dC_array,
magma_int_t  lddc,
magma_int_t  batchCount,
magma_queue_t  queue 
)

ZHERK performs one of the hermitian rank k operations.

C := alpha*A*A**H + beta*C,

or

C := alpha*A**H*A + beta*C,

where alpha and beta are real scalars, C is an n by n hermitian matrix and A is an n by k matrix in the first case and a k by n matrix in the second case.

Parameters ----------

Parameters:
[in] uplo CHARACTER*1. On entry, uplo specifies whether the upper or lower triangular part of the array C is to be referenced as follows:

uplo = 'U' or 'u' Only the upper triangular part of C is to be referenced.

uplo = 'L' or 'l' Only the lower triangular part of C is to be referenced.

Parameters:
[in] trans CHARACTER*1. On entry, trans specifies the operation to be performed as follows:

trans = 'N' or 'n' C := alpha*A*A**H + beta*C.

trans = 'C' or 'c' C := alpha*A**H*A + beta*C.

Parameters:
[in] n INTEGER. On entry, specifies the order of the matrix C. N must be at least zero.
[in] k INTEGER. On entry with trans = 'N' or 'n', k specifies the number of columns of the matrix A, and on entry with trans = 'C' or 'c', k specifies the number of rows of the matrix A. K must be at least zero.
[in] alpha DOUBLE PRECISION On entry, ALPHA specifies the scalar alpha.
[in] dA COMPLEX_16 array of DIMENSION ( ldda, ka ), where ka is k when trans = MagmaNoTrans, and is n otherwise. Before entry with trans = MagmaNoTrans, the leading m by k part of the array dA must contain the matrix dA, otherwise the leading k by m part of the array dA must contain the matrix dA.
[in] ldda INTEGER. On entry, ldda specifies the first dimension of A as declared in the calling (sub) program. When trans = MagmaNoTrans then ldda must be at least max( 1, n ), otherwise ldda must be at least max( 1, k ).
[in] beta DOUBLE PRECISION. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then dC need not be set on input.
[in,out] dC COMPLEX_16 array of DIMENSION ( lddc, n ). Before entry with uplo = 'U' or 'u', the leading n by n upper triangular part of the array C must contain the upper triangular part of the hermitian matrix and the strictly lower triangular part of C is not referenced. On exit, the upper triangular part of the array C is overwritten by the upper triangular part of the updated matrix. Before entry with uplo = 'L' or 'l', the leading n by n lower triangular part of the array C must contain the lower triangular part of the hermitian matrix and the strictly upper triangular part of C is not referenced. On exit, the lower triangular part of the array C is overwritten by the lower triangular part of the updated matrix. Note that the imaginary parts of the diagonal elements need not be set, they are assumed to be zero, and on exit they are set to zero.
[in] lddc INTEGER. On entry, lddc specifies the first dimension of dC as declared in the calling (sub) program. lddc must be at least max( 1, m ).
void magmablas_zherk_batched_lg ( magma_uplo_t  uplo,
magma_trans_t  trans,
magma_int_t  n,
magma_int_t  k,
double  alpha,
magmaDoubleComplex const *const *  dA_array,
magma_int_t  ldda,
double  beta,
magmaDoubleComplex **  dC_array,
magma_int_t  lddc,
magma_int_t  batchCount,
magma_queue_t  queue 
)

ZHERK performs one of the hermitian rank k operations.

C := alpha*A*A**H + beta*C,

or

C := alpha*A**H*A + beta*C,

where alpha and beta are real scalars, C is an n by n hermitian matrix and A is an n by k matrix in the first case and a k by n matrix in the second case.

Parameters ----------

Parameters:
[in] uplo CHARACTER*1. On entry, uplo specifies whether the upper or lower triangular part of the array C is to be referenced as follows:

uplo = 'U' or 'u' Only the upper triangular part of C is to be referenced.

uplo = 'L' or 'l' Only the lower triangular part of C is to be referenced.

Parameters:
[in] trans CHARACTER*1. On entry, trans specifies the operation to be performed as follows:

trans = 'N' or 'n' C := alpha*A*A**H + beta*C.

trans = 'C' or 'c' C := alpha*A**H*A + beta*C.

Parameters:
[in] n INTEGER. On entry, specifies the order of the matrix C. N must be at least zero.
[in] k INTEGER. On entry with trans = 'N' or 'n', k specifies the number of columns of the matrix A, and on entry with trans = 'C' or 'c', k specifies the number of rows of the matrix A. K must be at least zero.
[in] alpha DOUBLE PRECISION On entry, ALPHA specifies the scalar alpha.
[in] dA COMPLEX_16 array of DIMENSION ( ldda, ka ), where ka is k when trans = MagmaNoTrans, and is n otherwise. Before entry with trans = MagmaNoTrans, the leading m by k part of the array dA must contain the matrix dA, otherwise the leading k by m part of the array dA must contain the matrix dA.
[in] ldda INTEGER. On entry, ldda specifies the first dimension of A as declared in the calling (sub) program. When trans = MagmaNoTrans then ldda must be at least max( 1, n ), otherwise ldda must be at least max( 1, k ).
[in] beta DOUBLE PRECISION. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then dC need not be set on input.
[in,out] dC COMPLEX_16 array of DIMENSION ( lddc, n ). Before entry with uplo = 'U' or 'u', the leading n by n upper triangular part of the array C must contain the upper triangular part of the hermitian matrix and the strictly lower triangular part of C is not referenced. On exit, the upper triangular part of the array C is overwritten by the upper triangular part of the updated matrix. Before entry with uplo = 'L' or 'l', the leading n by n lower triangular part of the array C must contain the lower triangular part of the hermitian matrix and the strictly upper triangular part of C is not referenced. On exit, the lower triangular part of the array C is overwritten by the lower triangular part of the updated matrix. Note that the imaginary parts of the diagonal elements need not be set, they are assumed to be zero, and on exit they are set to zero.
[in] lddc INTEGER. On entry, lddc specifies the first dimension of dC as declared in the calling (sub) program. lddc must be at least max( 1, m ).
void magmablas_ztrsm ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  m,
magma_int_t  n,
magmaDoubleComplex  alpha,
magmaDoubleComplex_const_ptr  dA,
magma_int_t  ldda,
magmaDoubleComplex_ptr  dB,
magma_int_t  lddb 
)
void magmablas_ztrsm_batched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  m,
magma_int_t  n,
magmaDoubleComplex  alpha,
magmaDoubleComplex **  dA_array,
magma_int_t  ldda,
magmaDoubleComplex **  dB_array,
magma_int_t  lddb,
magma_int_t  batchCount,
magma_queue_t  queue 
)
void magmablas_ztrsm_outofplace ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  m,
magma_int_t  n,
magmaDoubleComplex  alpha,
magmaDoubleComplex_const_ptr  dA,
magma_int_t  ldda,
magmaDoubleComplex_ptr  dB,
magma_int_t  lddb,
magma_int_t  flag,
magmaDoubleComplex_ptr  d_dinvA,
magmaDoubleComplex_ptr  dX 
)

ztrsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B, or X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A, or op(A) = A^T, or op(A) = A^H.

The matrix X is output.

This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters:
[in] side magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:

  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in] uplo magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:

  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in] transA magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:

  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in] diag magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:

  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in] m INTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in] n INTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in] alpha COMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in] dA COMPLEX_16 array of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in] ldda INTEGER. On entry, ldda specifies the first dimension of A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in] dB COMPLEX_16 array of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
[in] lddb INTEGER. On entry, lddb specifies the first dimension of B. lddb >= max( 1, m ).
[in] flag BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
d_dinvA (workspace) on device. If side == MagmaLeft, d_dinvA must be of size >= ceil(m/NB)*NB*NB, If side == MagmaRight, d_dinvA must be of size >= ceil(n/NB)*NB*NB, where NB = 128.
[out] dX COMPLEX_16 array of dimension ( m, n ). On exit it contain the solution matrix X.
void magmablas_ztrsm_outofplace_batched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  flag,
magma_int_t  m,
magma_int_t  n,
magmaDoubleComplex  alpha,
magmaDoubleComplex **  dA_array,
magma_int_t  ldda,
magmaDoubleComplex **  dB_array,
magma_int_t  lddb,
magmaDoubleComplex **  dX_array,
magma_int_t  lddx,
magmaDoubleComplex **  dinvA_array,
magma_int_t  dinvA_length,
magmaDoubleComplex **  dA_displ,
magmaDoubleComplex **  dB_displ,
magmaDoubleComplex **  dX_displ,
magmaDoubleComplex **  dinvA_displ,
magma_int_t  resetozero,
magma_int_t  batchCount,
magma_queue_t  queue 
)

ztrsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B, or X*op(A) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A, or op(A) = A^T, or op(A) = A^H.

The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters:
[in] side magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:

  • = MagmaLeft: op(A)*X = alpha*B.
  • = MagmaRight: X*op(A) = alpha*B.
[in] uplo magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:

  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in] transA magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:

  • = MagmaNoTrans: op(A) = A.
  • = MagmaTrans: op(A) = A^T.
  • = MagmaConjTrans: op(A) = A^H.
[in] diag magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:

  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in] m INTEGER. On entry, m specifies the number of rows of B. m >= 0.
[in] n INTEGER. On entry, n specifies the number of columns of B. n >= 0.
[in] alpha COMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry.
[in] dA COMPLEX_16 array of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity.
[in] ldda INTEGER. On entry, ldda specifies the first dimension of A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ).
[in] dB COMPLEX_16 array of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B.
[in] lddb INTEGER. On entry, lddb specifies the first dimension of B. lddb >= max( 1, m ).
[in] flag BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted.
d_dinvA (workspace) on device. If side == MagmaLeft, d_dinvA must be of size >= ceil(m/TRI_NB)*TRI_NB*TRI_NB, If side == MagmaRight, d_dinvA must be of size >= ceil(n/TRI_NB)*TRI_NB*TRI_NB, where TRI_NB = 128.
[out] dX COMPLEX_16 array of dimension ( lddx, n ). On exit it contain the solution matrix X.
void magmablas_ztrsm_work ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  m,
magma_int_t  n,
magmaDoubleComplex  alpha,
magmaDoubleComplex_const_ptr  dA,
magma_int_t  ldda,
magmaDoubleComplex_ptr  dB,
magma_int_t  lddb,
magma_int_t  flag,
magmaDoubleComplex_ptr  d_dinvA,
magmaDoubleComplex_ptr  dX 
)
void magmablas_ztrsm_work_batched ( magma_side_t  side,
magma_uplo_t  uplo,
magma_trans_t  transA,
magma_diag_t  diag,
magma_int_t  flag,
magma_int_t  m,
magma_int_t  n,
magmaDoubleComplex  alpha,
magmaDoubleComplex **  dA_array,
magma_int_t  ldda,
magmaDoubleComplex **  dB_array,
magma_int_t  lddb,
magmaDoubleComplex **  dX_array,
magma_int_t  lddx,
magmaDoubleComplex **  dinvA_array,
magma_int_t  dinvA_length,
magmaDoubleComplex **  dA_displ,
magmaDoubleComplex **  dB_displ,
magmaDoubleComplex **  dX_displ,
magmaDoubleComplex **  dinvA_displ,
magma_int_t  resetozero,
magma_int_t  batchCount,
magma_queue_t  queue 
)
void magmablas_ztrtri_diag ( magma_uplo_t  uplo,
magma_diag_t  diag,
magma_int_t  n,
magmaDoubleComplex_const_ptr  dA,
magma_int_t  ldda,
magmaDoubleComplex_ptr  d_dinvA 
)
void magmablas_ztrtri_diag_batched ( magma_uplo_t  uplo,
magma_diag_t  diag,
magma_int_t  n,
magmaDoubleComplex const *const *  dA_array,
magma_int_t  ldda,
magmaDoubleComplex **  dinvA_array,
magma_int_t  resetozero,
magma_int_t  batchCount,
magma_queue_t  queue 
)

Inverts the NB x NB diagonal blocks of a triangular matrix.

This routine is used in ztrsm.

Same as ztrtri_diag, but adds queue argument. ztrtri_diag inverts the NB x NB diagonal blocks of A.

Parameters:
[in] uplo magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:

  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in] diag magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:

  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in] n INTEGER. On entry, n specifies the order of the matrix A. N >= 0.
[in] dA_array COMPLEX_16 array of dimension ( ldda, n ) The triangular matrix A.
If UPLO = 'U', the leading N-by-N upper triangular part of A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced.
If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced.
If DIAG = 'U', the diagonal elements of A are also not referenced and are assumed to be 1.
[in] ldda INTEGER. The leading dimension of the array A. LDDA >= max(1,N).
[out] dinvA_array COMPLEX_16 array of dimension (NB, ceil(n/NB)*NB), where NB = 128. On exit, contains inverses of the NB-by-NB diagonal blocks of A.
[in] queue magma_queue_t Queue to execute in.
void magmablas_ztrtri_diag_q ( magma_uplo_t  uplo,
magma_diag_t  diag,
magma_int_t  n,
magmaDoubleComplex_const_ptr  dA,
magma_int_t  ldda,
magmaDoubleComplex_ptr  d_dinvA,
magma_queue_t  queue 
)

Inverts the NB x NB diagonal blocks of a triangular matrix.

This routine is used in ztrsm.

Same as ztrtri_diag, but adds queue argument. ztrtri_diag inverts the NB x NB diagonal blocks of A.

Parameters:
[in] uplo magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:

  • = MagmaUpper: A is an upper triangular matrix.
  • = MagmaLower: A is a lower triangular matrix.
[in] diag magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:

  • = MagmaUnit: A is assumed to be unit triangular.
  • = MagmaNonUnit: A is not assumed to be unit triangular.
[in] n INTEGER. On entry, n specifies the order of the matrix A. N >= 0.
[in] dA COMPLEX_16 array of dimension ( ldda, n ) The triangular matrix A.
If UPLO = 'U', the leading N-by-N upper triangular part of A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced.
If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced.
If DIAG = 'U', the diagonal elements of A are also not referenced and are assumed to be 1.
[in] ldda INTEGER. The leading dimension of the array A. LDDA >= max(1,N).
[out] d_dinvA COMPLEX_16 array of dimension (NB, ceil(n/NB)*NB), where NB = 128. On exit, contains inverses of the NB-by-NB diagonal blocks of A.
[in] queue magma_queue_t Queue to execute in.

Generated on 3 May 2015 for MAGMA by  doxygen 1.6.1