![]() |
MAGMA
2.0.2
Matrix Algebra for GPU and Multicore Architectures
|
Functions | |
magma_int_t | magma_ztrsm_m (magma_int_t ngpu, magma_side_t side, magma_uplo_t uplo, magma_trans_t transa, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, const magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *B, magma_int_t ldb) |
ZTRSM solves one of the matrix equations op( A )*X = alpha*B, or X*op( A ) = alpha*B, where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op( A ) is one of. More... | |
void | magma_zgemm (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC, magma_int_t lddc) |
Perform matrix-matrix product, \( C = \alpha op(A) op(B) + \beta C \). More... | |
void | magma_zsymm (magma_side_t side, magma_uplo_t uplo, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC, magma_int_t lddc) |
Perform symmetric matrix-matrix product. More... | |
void | magma_zsyrk (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC, magma_int_t lddc) |
Perform symmetric rank-k update. More... | |
void | magma_zsyr2k (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC, magma_int_t lddc) |
Perform symmetric rank-2k update. More... | |
void | magma_zhemm (magma_side_t side, magma_uplo_t uplo, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC, magma_int_t lddc) |
Perform Hermitian matrix-matrix product. More... | |
void | magma_zherk (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, double alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, double beta, magmaDoubleComplex_ptr dC, magma_int_t lddc) |
Perform Hermitian rank-k update. More... | |
void | magma_zher2k (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, double beta, magmaDoubleComplex_ptr dC, magma_int_t lddc) |
Perform Hermitian rank-2k update. More... | |
void | magma_ztrmm (magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr dB, magma_int_t lddb) |
Perform triangular matrix-matrix product. More... | |
void | magma_ztrsm (magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr dB, magma_int_t lddb) |
Solve triangular matrix-matrix system (multiple right-hand sides). More... | |
void | magma_zgemm_q (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC, magma_int_t lddc, magma_queue_t queue) |
Perform matrix-matrix product, \( C = \alpha op(A) op(B) + \beta C \). More... | |
void | magma_zsymm_q (magma_side_t side, magma_uplo_t uplo, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC, magma_int_t lddc, magma_queue_t queue) |
Perform symmetric matrix-matrix product. More... | |
void | magma_zsyrk_q (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC, magma_int_t lddc, magma_queue_t queue) |
Perform symmetric rank-k update. More... | |
void | magma_zsyr2k_q (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC, magma_int_t lddc, magma_queue_t queue) |
Perform symmetric rank-2k update. More... | |
void | magma_zhemm_q (magma_side_t side, magma_uplo_t uplo, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC, magma_int_t lddc, magma_queue_t queue) |
Perform Hermitian matrix-matrix product. More... | |
void | magma_zherk_q (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, double alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, double beta, magmaDoubleComplex_ptr dC, magma_int_t lddc, magma_queue_t queue) |
Perform Hermitian rank-k update. More... | |
void | magma_zher2k_q (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, double beta, magmaDoubleComplex_ptr dC, magma_int_t lddc, magma_queue_t queue) |
Perform Hermitian rank-2k update. More... | |
void | magma_ztrmm_q (magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr dB, magma_int_t lddb, magma_queue_t queue) |
Perform triangular matrix-matrix product. More... | |
void | magma_ztrsm_q (magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr dB, magma_int_t lddb, magma_queue_t queue) |
Solve triangular matrix-matrix system (multiple right-hand sides). More... | |
void | magmablas_zgemm (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC, magma_int_t lddc) |
void | magmablas_zgemm_reduce (magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC, magma_int_t lddc) |
void | magmablas_ztrsm (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr dB, magma_int_t lddb) |
void | magmablas_ztrsm_outofplace (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr dB, magma_int_t lddb, magmaDoubleComplex_ptr dX, magma_int_t lddx, magma_int_t flag, magmaDoubleComplex_ptr d_dinvA, magma_int_t dinvA_length) |
void | magmablas_ztrsm_work (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr dB, magma_int_t lddb, magmaDoubleComplex_ptr dX, magma_int_t lddx, magma_int_t flag, magmaDoubleComplex_ptr d_dinvA, magma_int_t dinvA_length) |
void | magmablas_ztrtri_diag (magma_uplo_t uplo, magma_diag_t diag, magma_int_t n, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr d_dinvA) |
void | magma_zgetmatrix_1D_row_bcyclic (magma_int_t m, magma_int_t n, magmaDoubleComplex_const_ptr const *dA, magma_int_t ldda, magmaDoubleComplex *hA, magma_int_t lda, magma_int_t ngpu, magma_int_t nb) |
void | magma_zgetmatrix_1D_col_bcyclic (magma_int_t m, magma_int_t n, magmaDoubleComplex_const_ptr const *dA, magma_int_t ldda, magmaDoubleComplex *hA, magma_int_t lda, magma_int_t ngpu, magma_int_t nb) |
void | magma_zsetmatrix_1D_row_bcyclic (magma_int_t m, magma_int_t n, const magmaDoubleComplex *hA, magma_int_t lda, magmaDoubleComplex_ptr *dA, magma_int_t ldda, magma_int_t ngpu, magma_int_t nb) |
void | magma_zsetmatrix_1D_col_bcyclic (magma_int_t m, magma_int_t n, const magmaDoubleComplex *hA, magma_int_t lda, magmaDoubleComplex_ptr *dA, magma_int_t ldda, magma_int_t ngpu, magma_int_t nb) |
void | magma_zgemm_batched (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex const *const *dA_array, magma_int_t ldda, magmaDoubleComplex const *const *dB_array, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex **dC_array, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue) |
ZGEMM performs one of the matrix-matrix operations. More... | |
void | magmablas_zhemm_mgpu (magma_side_t side, magma_uplo_t uplo, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_ptr dA[], magma_int_t ldda, magma_int_t offset, magmaDoubleComplex_ptr dB[], magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC[], magma_int_t lddc, magmaDoubleComplex_ptr dwork[], magma_int_t dworksiz, magma_int_t ngpu, magma_int_t nb, magma_queue_t queues[][20], magma_int_t nqueue, magma_event_t events[][MagmaMaxGPUs *MagmaMaxGPUs+10], magma_int_t nevents, magma_int_t gnode[MagmaMaxGPUs][MagmaMaxGPUs+2], magma_int_t ncmplx) |
ZHEMM performs one of the matrix-matrix operations. More... | |
void | magmablas_zher2k_mgpu2 (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex_ptr dA[], magma_int_t ldda, magma_int_t a_offset, magmaDoubleComplex_ptr dB[], magma_int_t lddb, magma_int_t b_offset, double beta, magmaDoubleComplex_ptr dC[], magma_int_t lddc, magma_int_t c_offset, magma_int_t ngpu, magma_int_t nb, magma_queue_t queues[][20], magma_int_t nqueue) |
ZHER2K performs one of the Hermitian rank 2k operations. More... | |
void | magma_zherk_batched (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, double alpha, magmaDoubleComplex const *const *dA_array, magma_int_t ldda, double beta, magmaDoubleComplex **dC_array, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue) |
ZHERK performs one of the Hermitian rank k operations. More... | |
void | magmablas_ztrsm_outofplace_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex **dA_array, magma_int_t ldda, magmaDoubleComplex **dB_array, magma_int_t lddb, magmaDoubleComplex **dX_array, magma_int_t lddx, magmaDoubleComplex **dinvA_array, magma_int_t dinvA_length, magmaDoubleComplex **dA_displ, magmaDoubleComplex **dB_displ, magmaDoubleComplex **dX_displ, magmaDoubleComplex **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue) |
ztrsm_outofplace solves one of the matrix equations on gpu More... | |
void | magmablas_ztrsm_work_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex **dA_array, magma_int_t ldda, magmaDoubleComplex **dB_array, magma_int_t lddb, magmaDoubleComplex **dX_array, magma_int_t lddx, magmaDoubleComplex **dinvA_array, magma_int_t dinvA_length, magmaDoubleComplex **dA_displ, magmaDoubleComplex **dB_displ, magmaDoubleComplex **dX_displ, magmaDoubleComplex **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue) |
ztrsm_work solves one of the matrix equations on gpu More... | |
void | magmablas_ztrsm_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex **dA_array, magma_int_t ldda, magmaDoubleComplex **dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue) |
ztrsm solves one of the matrix equations on gpu More... | |
void magma_zgemm | ( | magma_trans_t | transA, |
magma_trans_t | transB, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex_const_ptr | dB, | ||
magma_int_t | lddb, | ||
magmaDoubleComplex | beta, | ||
magmaDoubleComplex_ptr | dC, | ||
magma_int_t | lddc | ||
) |
Perform matrix-matrix product, \( C = \alpha op(A) op(B) + \beta C \).
[in] | transA | Operation op(A) to perform on matrix A. |
[in] | transB | Operation op(B) to perform on matrix B. |
[in] | m | Number of rows of C and op(A). m >= 0. |
[in] | n | Number of columns of C and op(B). n >= 0. |
[in] | k | Number of columns of op(A) and rows of op(B). k >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dA | COMPLEX_16 array on GPU device. If transA == MagmaNoTrans, the m-by-k matrix A of dimension (ldda,k), ldda >= max(1,m); otherwise, the k-by-m matrix A of dimension (ldda,m), ldda >= max(1,k). |
[in] | ldda | Leading dimension of dA. |
[in] | dB | COMPLEX_16 array on GPU device. If transB == MagmaNoTrans, the k-by-n matrix B of dimension (lddb,n), lddb >= max(1,k); otherwise, the n-by-k matrix B of dimension (lddb,k), lddb >= max(1,n). |
[in] | lddb | Leading dimension of dB. |
[in] | beta | Scalar \( \beta \) |
[in,out] | dC | COMPLEX_16 array on GPU device. The m-by-n matrix C of dimension (lddc,n), lddc >= max(1,m). |
[in] | lddc | Leading dimension of dC. |
void magma_zgemm_batched | ( | magma_trans_t | transA, |
magma_trans_t | transB, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex const *const * | dA_array, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex const *const * | dB_array, | ||
magma_int_t | lddb, | ||
magmaDoubleComplex | beta, | ||
magmaDoubleComplex ** | dC_array, | ||
magma_int_t | lddc, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
ZGEMM performs one of the matrix-matrix operations.
C = alpha*op( A )*op( B ) + beta*C,
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
[in] | transA | magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
|
[in] | transB | magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
|
[in] | m | INTEGER. On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero. |
[in] | n | INTEGER. On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero. |
[in] | k | INTEGER. On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero. |
[in] | alpha | COMPLEX_16 On entry, ALPHA specifies the scalar alpha. |
[in] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of DIMENSION ( ldda, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A. |
[in] | ldda | INTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When transA = MagmaNoTrans then ldda must be at least max( 1, m ), otherwise ldda must be at least max( 1, k ). |
[in] | dB_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of DIMENSION ( lddb, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B. |
[in] | lddb | INTEGER. On entry, lddb specifies the first dimension of each array B as declared in the calling (sub) program. When transB = MagmaNoTrans then lddb must be at least max( 1, k ), otherwise lddb must be at least max( 1, n ). |
[in] | beta | COMPLEX_16. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input. |
[in,out] | dC_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array C of DIMENSION ( lddc, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n matrix ( alpha*op( A )*op( B ) + beta*C ). |
[in] | lddc | INTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ). |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
void magma_zgemm_q | ( | magma_trans_t | transA, |
magma_trans_t | transB, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex_const_ptr | dB, | ||
magma_int_t | lddb, | ||
magmaDoubleComplex | beta, | ||
magmaDoubleComplex_ptr | dC, | ||
magma_int_t | lddc, | ||
magma_queue_t | queue | ||
) |
Perform matrix-matrix product, \( C = \alpha op(A) op(B) + \beta C \).
[in] | transA | Operation op(A) to perform on matrix A. |
[in] | transB | Operation op(B) to perform on matrix B. |
[in] | m | Number of rows of C and op(A). m >= 0. |
[in] | n | Number of columns of C and op(B). n >= 0. |
[in] | k | Number of columns of op(A) and rows of op(B). k >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dA | COMPLEX_16 array on GPU device. If transA == MagmaNoTrans, the m-by-k matrix A of dimension (ldda,k), ldda >= max(1,m); otherwise, the k-by-m matrix A of dimension (ldda,m), ldda >= max(1,k). |
[in] | ldda | Leading dimension of dA. |
[in] | dB | COMPLEX_16 array on GPU device. If transB == MagmaNoTrans, the k-by-n matrix B of dimension (lddb,n), lddb >= max(1,k); otherwise, the n-by-k matrix B of dimension (lddb,k), lddb >= max(1,n). |
[in] | lddb | Leading dimension of dB. |
[in] | beta | Scalar \( \beta \) |
[in,out] | dC | COMPLEX_16 array on GPU device. The m-by-n matrix C of dimension (lddc,n), lddc >= max(1,m). |
[in] | lddc | Leading dimension of dC. |
[in] | queue | magma_queue_t Queue to execute in. |
void magma_zgetmatrix_1D_col_bcyclic | ( | magma_int_t | m, |
magma_int_t | n, | ||
magmaDoubleComplex_const_ptr const * | dA, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex * | hA, | ||
magma_int_t | lda, | ||
magma_int_t | ngpu, | ||
magma_int_t | nb | ||
) |
void magma_zgetmatrix_1D_row_bcyclic | ( | magma_int_t | m, |
magma_int_t | n, | ||
magmaDoubleComplex_const_ptr const * | dA, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex * | hA, | ||
magma_int_t | lda, | ||
magma_int_t | ngpu, | ||
magma_int_t | nb | ||
) |
void magma_zhemm | ( | magma_side_t | side, |
magma_uplo_t | uplo, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex_const_ptr | dB, | ||
magma_int_t | lddb, | ||
magmaDoubleComplex | beta, | ||
magmaDoubleComplex_ptr | dC, | ||
magma_int_t | lddc | ||
) |
Perform Hermitian matrix-matrix product.
\( C = \alpha A B + \beta C \) (side == MagmaLeft), or
\( C = \alpha B A + \beta C \) (side == MagmaRight),
where \( A \) is Hermitian.
[in] | side | Whether A is on the left or right. |
[in] | uplo | Whether the upper or lower triangle of A is referenced. |
[in] | m | Number of rows of C. m >= 0. |
[in] | n | Number of columns of C. n >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dA | COMPLEX_16 array on GPU device. If side == MagmaLeft, the m-by-m Hermitian matrix A of dimension (ldda,m), ldda >= max(1,m); otherwise, the n-by-n Hermitian matrix A of dimension (ldda,n), ldda >= max(1,n). |
[in] | ldda | Leading dimension of dA. |
[in] | dB | COMPLEX_16 array on GPU device. The m-by-n matrix B of dimension (lddb,n), lddb >= max(1,m). |
[in] | lddb | Leading dimension of dB. |
[in] | beta | Scalar \( \beta \) |
[in,out] | dC | COMPLEX_16 array on GPU device. The m-by-n matrix C of dimension (lddc,n), lddc >= max(1,m). |
[in] | lddc | Leading dimension of dC. |
void magma_zhemm_q | ( | magma_side_t | side, |
magma_uplo_t | uplo, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex_const_ptr | dB, | ||
magma_int_t | lddb, | ||
magmaDoubleComplex | beta, | ||
magmaDoubleComplex_ptr | dC, | ||
magma_int_t | lddc, | ||
magma_queue_t | queue | ||
) |
Perform Hermitian matrix-matrix product.
\( C = \alpha A B + \beta C \) (side == MagmaLeft), or
\( C = \alpha B A + \beta C \) (side == MagmaRight),
where \( A \) is Hermitian.
[in] | side | Whether A is on the left or right. |
[in] | uplo | Whether the upper or lower triangle of A is referenced. |
[in] | m | Number of rows of C. m >= 0. |
[in] | n | Number of columns of C. n >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dA | COMPLEX_16 array on GPU device. If side == MagmaLeft, the m-by-m Hermitian matrix A of dimension (ldda,m), ldda >= max(1,m); otherwise, the n-by-n Hermitian matrix A of dimension (ldda,n), ldda >= max(1,n). |
[in] | ldda | Leading dimension of dA. |
[in] | dB | COMPLEX_16 array on GPU device. The m-by-n matrix B of dimension (lddb,n), lddb >= max(1,m). |
[in] | lddb | Leading dimension of dB. |
[in] | beta | Scalar \( \beta \) |
[in,out] | dC | COMPLEX_16 array on GPU device. The m-by-n matrix C of dimension (lddc,n), lddc >= max(1,m). |
[in] | lddc | Leading dimension of dC. |
[in] | queue | magma_queue_t Queue to execute in. |
void magma_zher2k | ( | magma_uplo_t | uplo, |
magma_trans_t | trans, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex_const_ptr | dB, | ||
magma_int_t | lddb, | ||
double | beta, | ||
magmaDoubleComplex_ptr | dC, | ||
magma_int_t | lddc | ||
) |
Perform Hermitian rank-2k update.
\( C = \alpha A B^T + \alpha B A^T \beta C \) (trans == MagmaNoTrans), or
\( C = \alpha A^T B + \alpha B^T A \beta C \) (trans == MagmaTrans),
where \( C \) is Hermitian.
[in] | uplo | Whether the upper or lower triangle of C is referenced. |
[in] | trans | Operation to perform on A and B. |
[in] | n | Number of rows and columns of C. n >= 0. |
[in] | k | Number of columns of A and B (for MagmaNoTrans) or rows of A and B (for MagmaTrans). k >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dA | COMPLEX_16 array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix A of dimension (ldda,k), ldda >= max(1,n); otherwise, the k-by-n matrix A of dimension (ldda,n), ldda >= max(1,k). |
[in] | ldda | Leading dimension of dA. |
[in] | dB | COMPLEX_16 array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix B of dimension (lddb,k), lddb >= max(1,n); otherwise, the k-by-n matrix B of dimension (lddb,n), lddb >= max(1,k). |
[in] | lddb | Leading dimension of dB. |
[in] | beta | Scalar \( \beta \) |
[in,out] | dC | COMPLEX_16 array on GPU device. The n-by-n Hermitian matrix C of dimension (lddc,n), lddc >= max(1,n). |
[in] | lddc | Leading dimension of dC. |
void magma_zher2k_q | ( | magma_uplo_t | uplo, |
magma_trans_t | trans, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex_const_ptr | dB, | ||
magma_int_t | lddb, | ||
double | beta, | ||
magmaDoubleComplex_ptr | dC, | ||
magma_int_t | lddc, | ||
magma_queue_t | queue | ||
) |
Perform Hermitian rank-2k update.
\( C = \alpha A B^T + \alpha B A^T \beta C \) (trans == MagmaNoTrans), or
\( C = \alpha A^T B + \alpha B^T A \beta C \) (trans == MagmaTrans),
where \( C \) is Hermitian.
[in] | uplo | Whether the upper or lower triangle of C is referenced. |
[in] | trans | Operation to perform on A and B. |
[in] | n | Number of rows and columns of C. n >= 0. |
[in] | k | Number of columns of A and B (for MagmaNoTrans) or rows of A and B (for MagmaTrans). k >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dA | COMPLEX_16 array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix A of dimension (ldda,k), ldda >= max(1,n); otherwise, the k-by-n matrix A of dimension (ldda,n), ldda >= max(1,k). |
[in] | ldda | Leading dimension of dA. |
[in] | dB | COMPLEX_16 array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix B of dimension (lddb,k), lddb >= max(1,n); otherwise, the k-by-n matrix B of dimension (lddb,n), lddb >= max(1,k). |
[in] | lddb | Leading dimension of dB. |
[in] | beta | Scalar \( \beta \) |
[in,out] | dC | COMPLEX_16 array on GPU device. The n-by-n Hermitian matrix C of dimension (lddc,n), lddc >= max(1,n). |
[in] | lddc | Leading dimension of dC. |
[in] | queue | magma_queue_t Queue to execute in. |
void magma_zherk | ( | magma_uplo_t | uplo, |
magma_trans_t | trans, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
double | alpha, | ||
magmaDoubleComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
double | beta, | ||
magmaDoubleComplex_ptr | dC, | ||
magma_int_t | lddc | ||
) |
Perform Hermitian rank-k update.
\( C = \alpha A A^T + \beta C \) (trans == MagmaNoTrans), or
\( C = \alpha A^T A + \beta C \) (trans == MagmaTrans),
where \( C \) is Hermitian.
[in] | uplo | Whether the upper or lower triangle of C is referenced. |
[in] | trans | Operation to perform on A. |
[in] | n | Number of rows and columns of C. n >= 0. |
[in] | k | Number of columns of A (for MagmaNoTrans) or rows of A (for MagmaTrans). k >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dA | COMPLEX_16 array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix A of dimension (ldda,k), ldda >= max(1,n); otherwise, the k-by-n matrix A of dimension (ldda,n), ldda >= max(1,k). |
[in] | ldda | Leading dimension of dA. |
[in] | beta | Scalar \( \beta \) |
[in,out] | dC | COMPLEX_16 array on GPU device. The n-by-n Hermitian matrix C of dimension (lddc,n), lddc >= max(1,n). |
[in] | lddc | Leading dimension of dC. |
void magma_zherk_batched | ( | magma_uplo_t | uplo, |
magma_trans_t | trans, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
double | alpha, | ||
magmaDoubleComplex const *const * | dA_array, | ||
magma_int_t | ldda, | ||
double | beta, | ||
magmaDoubleComplex ** | dC_array, | ||
magma_int_t | lddc, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
ZHERK performs one of the Hermitian rank k operations.
C := alpha*A*A**H + beta*C,
or
C := alpha*A**H*A + beta*C,
where alpha and beta are real scalars, C is an n by n Hermitian matrix and A is an n by k matrix in the first case and a k by n matrix in the second case.
[in] | uplo | magma_uplo_t. On entry, uplo specifies whether the upper or lower triangular part of the array C is to be referenced as follows: |
uplo = MagmaUpper Only the upper triangular part of C is to be referenced.
uplo = MagmaLower Only the lower triangular part of C is to be referenced.
[in] | trans | magma_trans_t. On entry, trans specifies the operation to be performed as follows: |
trans = MagmaNoTrans C := alpha*A*A**H + beta*C.
trans = MagmaConjTrans C := alpha*A**H*A + beta*C.
[in] | n | INTEGER. On entry, specifies the order of the matrix C. N must be at least zero. |
[in] | k | INTEGER. On entry with trans = MagmaNoTrans, k specifies the number of columns of the matrix A, and on entry with trans = MagmaConjTrans, k specifies the number of rows of the matrix A. K must be at least zero. |
[in] | alpha | DOUBLE PRECISION On entry, ALPHA specifies the scalar alpha. |
[in] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 A array of DIMENSION ( ldda, ka ), where ka is k when trans = MagmaNoTrans, and is n otherwise. Before entry with trans = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A. |
[in] | ldda | INTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When trans = MagmaNoTrans then ldda must be at least max( 1, n ), otherwise ldda must be at least max( 1, k ). |
[in] | beta | DOUBLE PRECISION. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input. |
[in,out] | dC_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array C of DIMENSION ( lddc, n ). Before entry with uplo = MagmaUpper, the leading n by n upper triangular part of the array C must contain the upper triangular part of the Hermitian matrix and the strictly lower triangular part of C is not referenced. On exit, the upper triangular part of the array C is overwritten by the upper triangular part of the updated matrix. Before entry with uplo = MagmaLower, the leading n by n lower triangular part of the array C must contain the lower triangular part of the Hermitian matrix and the strictly upper triangular part of C is not referenced. On exit, the lower triangular part of the array C is overwritten by the lower triangular part of the updated matrix. Note that the imaginary parts of the diagonal elements need not be set, they are assumed to be zero, and on exit they are set to zero. |
[in] | lddc | INTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ). |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
void magma_zherk_q | ( | magma_uplo_t | uplo, |
magma_trans_t | trans, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
double | alpha, | ||
magmaDoubleComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
double | beta, | ||
magmaDoubleComplex_ptr | dC, | ||
magma_int_t | lddc, | ||
magma_queue_t | queue | ||
) |
Perform Hermitian rank-k update.
\( C = \alpha A A^T + \beta C \) (trans == MagmaNoTrans), or
\( C = \alpha A^T A + \beta C \) (trans == MagmaTrans),
where \( C \) is Hermitian.
[in] | uplo | Whether the upper or lower triangle of C is referenced. |
[in] | trans | Operation to perform on A. |
[in] | n | Number of rows and columns of C. n >= 0. |
[in] | k | Number of columns of A (for MagmaNoTrans) or rows of A (for MagmaTrans). k >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dA | COMPLEX_16 array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix A of dimension (ldda,k), ldda >= max(1,n); otherwise, the k-by-n matrix A of dimension (ldda,n), ldda >= max(1,k). |
[in] | ldda | Leading dimension of dA. |
[in] | beta | Scalar \( \beta \) |
[in,out] | dC | COMPLEX_16 array on GPU device. The n-by-n Hermitian matrix C of dimension (lddc,n), lddc >= max(1,n). |
[in] | lddc | Leading dimension of dC. |
[in] | queue | magma_queue_t Queue to execute in. |
void magma_zsetmatrix_1D_col_bcyclic | ( | magma_int_t | m, |
magma_int_t | n, | ||
const magmaDoubleComplex * | hA, | ||
magma_int_t | lda, | ||
magmaDoubleComplex_ptr * | dA, | ||
magma_int_t | ldda, | ||
magma_int_t | ngpu, | ||
magma_int_t | nb | ||
) |
void magma_zsetmatrix_1D_row_bcyclic | ( | magma_int_t | m, |
magma_int_t | n, | ||
const magmaDoubleComplex * | hA, | ||
magma_int_t | lda, | ||
magmaDoubleComplex_ptr * | dA, | ||
magma_int_t | ldda, | ||
magma_int_t | ngpu, | ||
magma_int_t | nb | ||
) |
void magma_zsymm | ( | magma_side_t | side, |
magma_uplo_t | uplo, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex_const_ptr | dB, | ||
magma_int_t | lddb, | ||
magmaDoubleComplex | beta, | ||
magmaDoubleComplex_ptr | dC, | ||
magma_int_t | lddc | ||
) |
Perform symmetric matrix-matrix product.
\( C = \alpha A B + \beta C \) (side == MagmaLeft), or
\( C = \alpha B A + \beta C \) (side == MagmaRight),
where \( A \) is symmetric.
[in] | side | Whether A is on the left or right. |
[in] | uplo | Whether the upper or lower triangle of A is referenced. |
[in] | m | Number of rows of C. m >= 0. |
[in] | n | Number of columns of C. n >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dA | COMPLEX_16 array on GPU device. If side == MagmaLeft, the m-by-m symmetric matrix A of dimension (ldda,m), ldda >= max(1,m); otherwise, the n-by-n symmetric matrix A of dimension (ldda,n), ldda >= max(1,n). |
[in] | ldda | Leading dimension of dA. |
[in] | dB | COMPLEX_16 array on GPU device. The m-by-n matrix B of dimension (lddb,n), lddb >= max(1,m). |
[in] | lddb | Leading dimension of dB. |
[in] | beta | Scalar \( \beta \) |
[in,out] | dC | COMPLEX_16 array on GPU device. The m-by-n matrix C of dimension (lddc,n), lddc >= max(1,m). |
[in] | lddc | Leading dimension of dC. |
void magma_zsymm_q | ( | magma_side_t | side, |
magma_uplo_t | uplo, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex_const_ptr | dB, | ||
magma_int_t | lddb, | ||
magmaDoubleComplex | beta, | ||
magmaDoubleComplex_ptr | dC, | ||
magma_int_t | lddc, | ||
magma_queue_t | queue | ||
) |
Perform symmetric matrix-matrix product.
\( C = \alpha A B + \beta C \) (side == MagmaLeft), or
\( C = \alpha B A + \beta C \) (side == MagmaRight),
where \( A \) is symmetric.
[in] | side | Whether A is on the left or right. |
[in] | uplo | Whether the upper or lower triangle of A is referenced. |
[in] | m | Number of rows of C. m >= 0. |
[in] | n | Number of columns of C. n >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dA | COMPLEX_16 array on GPU device. If side == MagmaLeft, the m-by-m symmetric matrix A of dimension (ldda,m), ldda >= max(1,m); otherwise, the n-by-n symmetric matrix A of dimension (ldda,n), ldda >= max(1,n). |
[in] | ldda | Leading dimension of dA. |
[in] | dB | COMPLEX_16 array on GPU device. The m-by-n matrix B of dimension (lddb,n), lddb >= max(1,m). |
[in] | lddb | Leading dimension of dB. |
[in] | beta | Scalar \( \beta \) |
[in,out] | dC | COMPLEX_16 array on GPU device. The m-by-n matrix C of dimension (lddc,n), lddc >= max(1,m). |
[in] | lddc | Leading dimension of dC. |
[in] | queue | magma_queue_t Queue to execute in. |
void magma_zsyr2k | ( | magma_uplo_t | uplo, |
magma_trans_t | trans, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex_const_ptr | dB, | ||
magma_int_t | lddb, | ||
magmaDoubleComplex | beta, | ||
magmaDoubleComplex_ptr | dC, | ||
magma_int_t | lddc | ||
) |
Perform symmetric rank-2k update.
\( C = \alpha A B^T + \alpha B A^T \beta C \) (trans == MagmaNoTrans), or
\( C = \alpha A^T B + \alpha B^T A \beta C \) (trans == MagmaTrans),
where \( C \) is symmetric.
[in] | uplo | Whether the upper or lower triangle of C is referenced. |
[in] | trans | Operation to perform on A and B. |
[in] | n | Number of rows and columns of C. n >= 0. |
[in] | k | Number of columns of A and B (for MagmaNoTrans) or rows of A and B (for MagmaTrans). k >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dA | COMPLEX_16 array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix A of dimension (ldda,k), ldda >= max(1,n); otherwise, the k-by-n matrix A of dimension (ldda,n), ldda >= max(1,k). |
[in] | ldda | Leading dimension of dA. |
[in] | dB | COMPLEX_16 array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix B of dimension (lddb,k), lddb >= max(1,n); otherwise, the k-by-n matrix B of dimension (lddb,n), lddb >= max(1,k). |
[in] | lddb | Leading dimension of dB. |
[in] | beta | Scalar \( \beta \) |
[in,out] | dC | COMPLEX_16 array on GPU device. The n-by-n symmetric matrix C of dimension (lddc,n), lddc >= max(1,n). |
[in] | lddc | Leading dimension of dC. |
void magma_zsyr2k_q | ( | magma_uplo_t | uplo, |
magma_trans_t | trans, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex_const_ptr | dB, | ||
magma_int_t | lddb, | ||
magmaDoubleComplex | beta, | ||
magmaDoubleComplex_ptr | dC, | ||
magma_int_t | lddc, | ||
magma_queue_t | queue | ||
) |
Perform symmetric rank-2k update.
\( C = \alpha A B^T + \alpha B A^T \beta C \) (trans == MagmaNoTrans), or
\( C = \alpha A^T B + \alpha B^T A \beta C \) (trans == MagmaTrans),
where \( C \) is symmetric.
[in] | uplo | Whether the upper or lower triangle of C is referenced. |
[in] | trans | Operation to perform on A and B. |
[in] | n | Number of rows and columns of C. n >= 0. |
[in] | k | Number of columns of A and B (for MagmaNoTrans) or rows of A and B (for MagmaTrans). k >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dA | COMPLEX_16 array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix A of dimension (ldda,k), ldda >= max(1,n); otherwise, the k-by-n matrix A of dimension (ldda,n), ldda >= max(1,k). |
[in] | ldda | Leading dimension of dA. |
[in] | dB | COMPLEX_16 array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix B of dimension (lddb,k), lddb >= max(1,n); otherwise, the k-by-n matrix B of dimension (lddb,n), lddb >= max(1,k). |
[in] | lddb | Leading dimension of dB. |
[in] | beta | Scalar \( \beta \) |
[in,out] | dC | COMPLEX_16 array on GPU device. The n-by-n symmetric matrix C of dimension (lddc,n), lddc >= max(1,n). |
[in] | lddc | Leading dimension of dC. |
[in] | queue | magma_queue_t Queue to execute in. |
void magma_zsyrk | ( | magma_uplo_t | uplo, |
magma_trans_t | trans, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex | beta, | ||
magmaDoubleComplex_ptr | dC, | ||
magma_int_t | lddc | ||
) |
Perform symmetric rank-k update.
\( C = \alpha A A^T + \beta C \) (trans == MagmaNoTrans), or
\( C = \alpha A^T A + \beta C \) (trans == MagmaTrans),
where \( C \) is symmetric.
[in] | uplo | Whether the upper or lower triangle of C is referenced. |
[in] | trans | Operation to perform on A. |
[in] | n | Number of rows and columns of C. n >= 0. |
[in] | k | Number of columns of A (for MagmaNoTrans) or rows of A (for MagmaTrans). k >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dA | COMPLEX_16 array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix A of dimension (ldda,k), ldda >= max(1,n); otherwise, the k-by-n matrix A of dimension (ldda,n), ldda >= max(1,k). |
[in] | ldda | Leading dimension of dA. |
[in] | beta | Scalar \( \beta \) |
[in,out] | dC | COMPLEX_16 array on GPU device. The n-by-n symmetric matrix C of dimension (lddc,n), lddc >= max(1,n). |
[in] | lddc | Leading dimension of dC. |
void magma_zsyrk_q | ( | magma_uplo_t | uplo, |
magma_trans_t | trans, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex | beta, | ||
magmaDoubleComplex_ptr | dC, | ||
magma_int_t | lddc, | ||
magma_queue_t | queue | ||
) |
Perform symmetric rank-k update.
\( C = \alpha A A^T + \beta C \) (trans == MagmaNoTrans), or
\( C = \alpha A^T A + \beta C \) (trans == MagmaTrans),
where \( C \) is symmetric.
[in] | uplo | Whether the upper or lower triangle of C is referenced. |
[in] | trans | Operation to perform on A. |
[in] | n | Number of rows and columns of C. n >= 0. |
[in] | k | Number of columns of A (for MagmaNoTrans) or rows of A (for MagmaTrans). k >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dA | COMPLEX_16 array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix A of dimension (ldda,k), ldda >= max(1,n); otherwise, the k-by-n matrix A of dimension (ldda,n), ldda >= max(1,k). |
[in] | ldda | Leading dimension of dA. |
[in] | beta | Scalar \( \beta \) |
[in,out] | dC | COMPLEX_16 array on GPU device. The n-by-n symmetric matrix C of dimension (lddc,n), lddc >= max(1,n). |
[in] | lddc | Leading dimension of dC. |
[in] | queue | magma_queue_t Queue to execute in. |
void magma_ztrmm | ( | magma_side_t | side, |
magma_uplo_t | uplo, | ||
magma_trans_t | trans, | ||
magma_diag_t | diag, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex_ptr | dB, | ||
magma_int_t | lddb | ||
) |
Perform triangular matrix-matrix product.
\( B = \alpha op(A) B \) (side == MagmaLeft), or
\( B = \alpha B op(A) \) (side == MagmaRight),
where \( A \) is triangular.
[in] | side | Whether A is on the left or right. |
[in] | uplo | Whether A is upper or lower triangular. |
[in] | trans | Operation to perform on A. |
[in] | diag | Whether the diagonal of A is assumed to be unit or non-unit. |
[in] | m | Number of rows of B. m >= 0. |
[in] | n | Number of columns of B. n >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dA | COMPLEX_16 array on GPU device. If side == MagmaLeft, the n-by-n triangular matrix A of dimension (ldda,n), ldda >= max(1,n); otherwise, the m-by-m triangular matrix A of dimension (ldda,m), ldda >= max(1,m). |
[in] | ldda | Leading dimension of dA. |
[in] | dB | COMPLEX_16 array on GPU device. The m-by-n matrix B of dimension (lddb,n), lddb >= max(1,m). |
[in] | lddb | Leading dimension of dB. |
void magma_ztrmm_q | ( | magma_side_t | side, |
magma_uplo_t | uplo, | ||
magma_trans_t | trans, | ||
magma_diag_t | diag, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex_ptr | dB, | ||
magma_int_t | lddb, | ||
magma_queue_t | queue | ||
) |
Perform triangular matrix-matrix product.
\( B = \alpha op(A) B \) (side == MagmaLeft), or
\( B = \alpha B op(A) \) (side == MagmaRight),
where \( A \) is triangular.
[in] | side | Whether A is on the left or right. |
[in] | uplo | Whether A is upper or lower triangular. |
[in] | trans | Operation to perform on A. |
[in] | diag | Whether the diagonal of A is assumed to be unit or non-unit. |
[in] | m | Number of rows of B. m >= 0. |
[in] | n | Number of columns of B. n >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dA | COMPLEX_16 array on GPU device. If side == MagmaLeft, the n-by-n triangular matrix A of dimension (ldda,n), ldda >= max(1,n); otherwise, the m-by-m triangular matrix A of dimension (ldda,m), ldda >= max(1,m). |
[in] | ldda | Leading dimension of dA. |
[in] | dB | COMPLEX_16 array on GPU device. The m-by-n matrix B of dimension (lddb,n), lddb >= max(1,m). |
[in] | lddb | Leading dimension of dB. |
[in] | queue | magma_queue_t Queue to execute in. |
void magma_ztrsm | ( | magma_side_t | side, |
magma_uplo_t | uplo, | ||
magma_trans_t | trans, | ||
magma_diag_t | diag, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex_ptr | dB, | ||
magma_int_t | lddb | ||
) |
Solve triangular matrix-matrix system (multiple right-hand sides).
\( op(A) X = \alpha B \) (side == MagmaLeft), or
\( X op(A) = \alpha B \) (side == MagmaRight),
where \( A \) is triangular.
[in] | side | Whether A is on the left or right. |
[in] | uplo | Whether A is upper or lower triangular. |
[in] | trans | Operation to perform on A. |
[in] | diag | Whether the diagonal of A is assumed to be unit or non-unit. |
[in] | m | Number of rows of B. m >= 0. |
[in] | n | Number of columns of B. n >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dA | COMPLEX_16 array on GPU device. If side == MagmaLeft, the m-by-m triangular matrix A of dimension (ldda,m), ldda >= max(1,m); otherwise, the n-by-n triangular matrix A of dimension (ldda,n), ldda >= max(1,n). |
[in] | ldda | Leading dimension of dA. |
[in,out] | dB | COMPLEX_16 array on GPU device. On entry, m-by-n matrix B of dimension (lddb,n), lddb >= max(1,m). On exit, overwritten with the solution matrix X. |
[in] | lddb | Leading dimension of dB. |
magma_int_t magma_ztrsm_m | ( | magma_int_t | ngpu, |
magma_side_t | side, | ||
magma_uplo_t | uplo, | ||
magma_trans_t | transa, | ||
magma_diag_t | diag, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magmaDoubleComplex | alpha, | ||
const magmaDoubleComplex * | A, | ||
magma_int_t | lda, | ||
magmaDoubleComplex * | B, | ||
magma_int_t | ldb | ||
) |
ZTRSM solves one of the matrix equations op( A )*X = alpha*B, or X*op( A ) = alpha*B, where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op( A ) is one of.
op( A ) = A or op( A ) = A**T or op( A ) = A**H.
The matrix X is overwritten on B.
[in] | ngpu | INTEGER Number of GPUs to use. ngpu > 0. |
[in] | side | magma_side_t. On entry, SIDE specifies whether op( A ) appears on the left or right of X as follows:
|
[in] | uplo | magma_uplo_t. On entry, UPLO specifies whether the matrix A is an upper or lower triangular matrix as follows:
|
[in] | transa | magma_trans_t. On entry, TRANSA specifies the form of op( A ) to be used in the matrix multiplication as follows:
|
[in] | diag | magma_diag_t. On entry, DIAG specifies whether or not A is unit triangular as follows:
|
[in] | m | INTEGER. On entry, M specifies the number of rows of B. M must be at least zero. |
[in] | n | INTEGER. On entry, N specifies the number of columns of B. N must be at least zero. |
[in] | alpha | COMPLEX_16. On entry, ALPHA specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. |
[in] | A | COMPLEX_16 array of DIMENSION ( LDA, k ), where k is m when SIDE = MagmaLeft and is n when SIDE = MagmaRight. Before entry with UPLO = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with UPLO = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when DIAG = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. |
[in] | lda | INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When SIDE = MagmaLeft then LDA >= max( 1, m ), when SIDE = MagmaRight then LDA >= max( 1, n ). |
[in,out] | B | COMPLEX_16 array of DIMENSION ( LDB, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B, and on exit is overwritten by the solution matrix X. |
[in] | ldb | INTEGER. On entry, LDB specifies the first dimension of B as declared in the calling (sub) program. LDB must be at least max( 1, m ). |
void magma_ztrsm_q | ( | magma_side_t | side, |
magma_uplo_t | uplo, | ||
magma_trans_t | trans, | ||
magma_diag_t | diag, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex_ptr | dB, | ||
magma_int_t | lddb, | ||
magma_queue_t | queue | ||
) |
Solve triangular matrix-matrix system (multiple right-hand sides).
\( op(A) X = \alpha B \) (side == MagmaLeft), or
\( X op(A) = \alpha B \) (side == MagmaRight),
where \( A \) is triangular.
[in] | side | Whether A is on the left or right. |
[in] | uplo | Whether A is upper or lower triangular. |
[in] | trans | Operation to perform on A. |
[in] | diag | Whether the diagonal of A is assumed to be unit or non-unit. |
[in] | m | Number of rows of B. m >= 0. |
[in] | n | Number of columns of B. n >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dA | COMPLEX_16 array on GPU device. If side == MagmaLeft, the m-by-m triangular matrix A of dimension (ldda,m), ldda >= max(1,m); otherwise, the n-by-n triangular matrix A of dimension (ldda,n), ldda >= max(1,n). |
[in] | ldda | Leading dimension of dA. |
[in,out] | dB | COMPLEX_16 array on GPU device. On entry, m-by-n matrix B of dimension (lddb,n), lddb >= max(1,m). On exit, overwritten with the solution matrix X. |
[in] | lddb | Leading dimension of dB. |
[in] | queue | magma_queue_t Queue to execute in. |
void magmablas_zgemm | ( | magma_trans_t | transA, |
magma_trans_t | transB, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex_const_ptr | dB, | ||
magma_int_t | lddb, | ||
magmaDoubleComplex | beta, | ||
magmaDoubleComplex_ptr | dC, | ||
magma_int_t | lddc | ||
) |
void magmablas_zgemm_reduce | ( | magma_int_t | m, |
magma_int_t | n, | ||
magma_int_t | k, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex_const_ptr | dB, | ||
magma_int_t | lddb, | ||
magmaDoubleComplex | beta, | ||
magmaDoubleComplex_ptr | dC, | ||
magma_int_t | lddc | ||
) |
void magmablas_zhemm_mgpu | ( | magma_side_t | side, |
magma_uplo_t | uplo, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex_ptr | dA[], | ||
magma_int_t | ldda, | ||
magma_int_t | offset, | ||
magmaDoubleComplex_ptr | dB[], | ||
magma_int_t | lddb, | ||
magmaDoubleComplex | beta, | ||
magmaDoubleComplex_ptr | dC[], | ||
magma_int_t | lddc, | ||
magmaDoubleComplex_ptr | dwork[], | ||
magma_int_t | dworksiz, | ||
magma_int_t | ngpu, | ||
magma_int_t | nb, | ||
magma_queue_t | queues[][20], | ||
magma_int_t | nqueue, | ||
magma_event_t | events[][MagmaMaxGPUs *MagmaMaxGPUs+10], | ||
magma_int_t | nevents, | ||
magma_int_t | gnode[MagmaMaxGPUs][MagmaMaxGPUs+2], | ||
magma_int_t | ncmplx | ||
) |
ZHEMM performs one of the matrix-matrix operations.
C := alpha*A*B + beta*C, or C := alpha*B*A + beta*C,
where alpha and beta are scalars, A is a Hermitian matrix, and B and C are m by n matrices.
[in] | side | magma_side_t On entry, SIDE specifies whether the Hermitian matrix A appears on the left or right in the operation as follows: |
SIDE = MagmaLeft C := alpha*A*B + beta*C,
SIDE = MagmaRight C := alpha*B*A + beta*C.
Currently, only MagmaLeft is implemented ***
[in] | uplo | magma_uplo_t On entry, UPLO specifies whether the upper or lower triangular part of the Hermitian matrix A is to be referenced as follows: |
UPLO = MagmaUpper Only the upper triangular part of the Hermitian matrix is to be referenced.
UPLO = MagmaLower Only the lower triangular part of the Hermitian matrix is to be referenced.
Currently, only MagmaLower is implemented ***
[in] | m | INTEGER On entry, M specifies the number of rows of the matrix dC. M >= 0. |
[in] | n | INTEGER On entry, N specifies the number of columns of the matrix dC. N >= 0. |
[in] | alpha | COMPLEX*16 On entry, ALPHA specifies the scalar alpha. |
[in] | dA | COMPLEX*16 array of DIMENSION ( LDDA, ka ), where ka is m when SIDE = MagmaLower and is n otherwise. Before entry with SIDE = MagmaLeft, the m by m part of the array A must contain the Hermitian matrix, such that when UPLO = MagmaUpper, the leading m by m upper triangular part of the array A must contain the upper triangular part of the Hermitian matrix and the strictly lower triangular part of A is not referenced, and when UPLO = MagmaLower, the leading m by m lower triangular part of the array A must contain the lower triangular part of the Hermitian matrix and the strictly upper triangular part of A is not referenced. Before entry with SIDE = MagmaRight, the n by n part of the array A must contain the Hermitian matrix, such that when UPLO = MagmaUpper, the leading n by n upper triangular part of the array A must contain the upper triangular part of the Hermitian matrix and the strictly lower triangular part of A is not referenced, and when UPLO = MagmaLower, the leading n by n lower triangular part of the array A must contain the lower triangular part of the Hermitian matrix and the strictly upper triangular part of A is not referenced. Note that the imaginary parts of the diagonal elements need not be set, they are assumed to be zero. |
[in] | ldda | INTEGER On entry, LDDA specifies the first dimension of A as declared in the calling (sub) program. When SIDE = MagmaLower then LDDA >= max( 1, m ), otherwise LDDA >= max( 1, n ). |
[in] | dB | COMPLEX*16 array of DIMENSION ( LDDB, n ). Before entry, the leading m by n part of the array B must contain the matrix B. |
[in] | lddb | INTEGER On entry, LDDB specifies the first dimension of B as declared in the calling (sub) program. LDDB >= max( 1, m ). |
[in] | beta | COMPLEX*16 On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input. |
[in,out] | dC | COMPLEX*16 array of DIMENSION ( LDDC, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n updated matrix. |
[in] | lddc | INTEGER On entry, LDDC specifies the first dimension of C as declared in the calling (sub) program. LDDC >= max( 1, m ). |
void magmablas_zher2k_mgpu2 | ( | magma_uplo_t | uplo, |
magma_trans_t | trans, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex_ptr | dA[], | ||
magma_int_t | ldda, | ||
magma_int_t | a_offset, | ||
magmaDoubleComplex_ptr | dB[], | ||
magma_int_t | lddb, | ||
magma_int_t | b_offset, | ||
double | beta, | ||
magmaDoubleComplex_ptr | dC[], | ||
magma_int_t | lddc, | ||
magma_int_t | c_offset, | ||
magma_int_t | ngpu, | ||
magma_int_t | nb, | ||
magma_queue_t | queues[][20], | ||
magma_int_t | nqueue | ||
) |
ZHER2K performs one of the Hermitian rank 2k operations.
C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C,
or
C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C,
where alpha and beta are scalars with beta real, C is an n by n Hermitian matrix and A and B are n by k matrices in the first case and k by n matrices in the second case.
[in] | uplo | magma_uplo_t. On entry, UPLO specifies whether the upper or lower triangular part of the array C is to be referenced as follows:
|
[in] | trans | magma_trans_t. On entry, TRANS specifies the operation to be performed as follows:
|
[in] | n | INTEGER. On entry, N specifies the order of the matrix C. N must be at least zero. |
[in] | k | INTEGER. On entry with TRANS = MagmaNoTrans, K specifies the number of columns of the matrices A and B, and on entry with TRANS = Magma_ConjTrans, K specifies the number of rows of the matrices A and B. K must be at least zero. |
[in] | alpha | COMPLEX*16. On entry, ALPHA specifies the scalar alpha. |
[in] | dA | COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is k when TRANS = MagmaNoTrans, and is n otherwise. Before entry with TRANS = MagmaNoTrans, the leading n by k part of the array A must contain the matrix A, otherwise the leading k by n part of the array A must contain the matrix A. |
[TODO: describe distribution: duplicated on all GPUs.]
[in] | ldda | INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When TRANS = MagmaNoTrans then LDA must be at least max( 1, n ), otherwise LDA must be at least max( 1, k ). |
[in] | a_offset | INTEGER Row offset to start sub-matrix of dA. Uses dA(a_offset:a_offset+n, :). 0 <= a_offset < ldda. |
[in] | dB | COMPLEX*16 array of DIMENSION ( LDB, kb ), where kb is k when TRANS = MagmaNoTrans, and is n otherwise. Before entry with TRANS = MagmaNoTrans, the leading n by k part of the array B must contain the matrix B, otherwise the leading k by n part of the array B must contain the matrix B. |
[TODO: describe distribution: duplicated on all GPUs.]
[in] | lddb | INTEGER. On entry, LDB specifies the first dimension of B as declared in the calling (sub) program. When TRANS = MagmaNoTrans then LDB must be at least max( 1, n ), otherwise LDB must be at least max( 1, k ). |
[in] | b_offset | INTEGER Row offset to start sub-matrix of dB. Uses dB(b_offset:b_offset+n, :). 0 <= b_offset < lddb. |
[in] | beta | DOUBLE PRECISION. On entry, BETA specifies the scalar beta. |
[in,out] | dC | COMPLEX*16 array of DIMENSION ( LDC, n ). Before entry with UPLO = MagmaUpper, the leading n by n upper triangular part of the array C must contain the upper triangular part of the Hermitian matrix and the strictly lower triangular part of C is not referenced. On exit, the upper triangular part of the array C is overwritten by the upper triangular part of the updated matrix. Before entry with UPLO = MagmaLower, the leading n by n lower triangular part of the array C must contain the lower triangular part of the Hermitian matrix and the strictly upper triangular part of C is not referenced. On exit, the lower triangular part of the array C is overwritten by the lower triangular part of the updated matrix. Note that the imaginary parts of the diagonal elements need not be set, they are assumed to be zero, and on exit they are set to zero. [TODO: verify] |
[TODO: describe distribution: 1D column block-cyclic across GPUs.]
[in] | lddc | INTEGER. On entry, LDC specifies the first dimension of C as declared in the calling (sub) program. LDC must be at least max( 1, n ). |
[in] | c_offset | INTEGER. Row and column offset to start sub-matrix of dC. Uses dC(c_offset:c_offset+n, c_offset:c_offset+n). 0 <= c_offset < lddc. |
[in] | ngpu | INTEGER. Number of GPUs over which matrix C is distributed. |
[in] | nb | INTEGER. Block size used for distribution of C. |
[in] | queues | array of CUDA queues, of dimension NGPU by 20. Streams to use for running multiple GEMMs in parallel. Only up to NSTREAM queues are used on each GPU. |
[in] | nqueue | INTEGER. Number of queues to use on each device |
void magmablas_ztrsm | ( | magma_side_t | side, |
magma_uplo_t | uplo, | ||
magma_trans_t | transA, | ||
magma_diag_t | diag, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex_ptr | dB, | ||
magma_int_t | lddb | ||
) |
void magmablas_ztrsm_batched | ( | magma_side_t | side, |
magma_uplo_t | uplo, | ||
magma_trans_t | transA, | ||
magma_diag_t | diag, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex ** | dA_array, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex ** | dB_array, | ||
magma_int_t | lddb, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
ztrsm solves one of the matrix equations on gpu
op(A)*X = alpha*B, or X*op(A) = alpha*B,
where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of
op(A) = A, or op(A) = A^T, or op(A) = A^H.
The matrix X is overwritten on B.
This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.
[in] | side | magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
|
[in] | uplo | magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
|
[in] | transA | magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
|
[in] | diag | magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
|
[in] | m | INTEGER. On entry, m specifies the number of rows of B. m >= 0. |
[in] | n | INTEGER. On entry, n specifies the number of columns of B. n >= 0. |
[in] | alpha | COMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. |
[in] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. |
[in] | ldda | INTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ). |
[in,out] | dB_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X |
[in] | lddb | INTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ). |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
void magmablas_ztrsm_outofplace | ( | magma_side_t | side, |
magma_uplo_t | uplo, | ||
magma_trans_t | transA, | ||
magma_diag_t | diag, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex_ptr | dB, | ||
magma_int_t | lddb, | ||
magmaDoubleComplex_ptr | dX, | ||
magma_int_t | lddx, | ||
magma_int_t | flag, | ||
magmaDoubleComplex_ptr | d_dinvA, | ||
magma_int_t | dinvA_length | ||
) |
void magmablas_ztrsm_outofplace_batched | ( | magma_side_t | side, |
magma_uplo_t | uplo, | ||
magma_trans_t | transA, | ||
magma_diag_t | diag, | ||
magma_int_t | flag, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex ** | dA_array, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex ** | dB_array, | ||
magma_int_t | lddb, | ||
magmaDoubleComplex ** | dX_array, | ||
magma_int_t | lddx, | ||
magmaDoubleComplex ** | dinvA_array, | ||
magma_int_t | dinvA_length, | ||
magmaDoubleComplex ** | dA_displ, | ||
magmaDoubleComplex ** | dB_displ, | ||
magmaDoubleComplex ** | dX_displ, | ||
magmaDoubleComplex ** | dinvA_displ, | ||
magma_int_t | resetozero, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
ztrsm_outofplace solves one of the matrix equations on gpu
op(A)*X = alpha*B, or X*op(A) = alpha*B,
where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of
op(A) = A, or op(A) = A^T, or op(A) = A^H.
The matrix X is overwritten on B.
This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.
[in] | side | magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
|
[in] | uplo | magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
|
[in] | transA | magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
|
[in] | diag | magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
|
[in] | flag | BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted. |
[in] | m | INTEGER. On entry, m specifies the number of rows of B. m >= 0. |
[in] | n | INTEGER. On entry, n specifies the number of columns of B. n >= 0. |
[in] | alpha | COMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. |
[in] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. |
[in] | ldda | INTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ). |
[in] | dB_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B. |
[in] | lddb | INTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ). |
[in,out] | dX_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X |
[in] | lddx | INTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ). |
dinvA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/TRI_NB)*TRI_NB*TRI_NB, If side == MagmaRight, dinvA must be of size >= ceil(n/TRI_NB)*TRI_NB*TRI_NB, where TRI_NB = 128. | |
[in] | dinvA_length | INTEGER The size of each workspace matrix dinvA |
dA_displ | (workspace) Array of pointers, dimension (batchCount). | |
dB_displ | (workspace) Array of pointers, dimension (batchCount). | |
dX_displ | (workspace) Array of pointers, dimension (batchCount). | |
dinvA_displ | (workspace) Array of pointers, dimension (batchCount). | |
[in] | resetozero | INTEGER Used internally by ZTRTRI_DIAG routine |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
void magmablas_ztrsm_work | ( | magma_side_t | side, |
magma_uplo_t | uplo, | ||
magma_trans_t | transA, | ||
magma_diag_t | diag, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex_ptr | dB, | ||
magma_int_t | lddb, | ||
magmaDoubleComplex_ptr | dX, | ||
magma_int_t | lddx, | ||
magma_int_t | flag, | ||
magmaDoubleComplex_ptr | d_dinvA, | ||
magma_int_t | dinvA_length | ||
) |
void magmablas_ztrsm_work_batched | ( | magma_side_t | side, |
magma_uplo_t | uplo, | ||
magma_trans_t | transA, | ||
magma_diag_t | diag, | ||
magma_int_t | flag, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex ** | dA_array, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex ** | dB_array, | ||
magma_int_t | lddb, | ||
magmaDoubleComplex ** | dX_array, | ||
magma_int_t | lddx, | ||
magmaDoubleComplex ** | dinvA_array, | ||
magma_int_t | dinvA_length, | ||
magmaDoubleComplex ** | dA_displ, | ||
magmaDoubleComplex ** | dB_displ, | ||
magmaDoubleComplex ** | dX_displ, | ||
magmaDoubleComplex ** | dinvA_displ, | ||
magma_int_t | resetozero, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
ztrsm_work solves one of the matrix equations on gpu
op(A)*X = alpha*B, or X*op(A) = alpha*B,
where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of
op(A) = A, or op(A) = A^T, or op(A) = A^H.
The matrix X is overwritten on B.
This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.
[in] | side | magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows:
|
[in] | uplo | magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
|
[in] | transA | magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows:
|
[in] | diag | magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
|
[in] | flag | BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted. |
[in] | m | INTEGER. On entry, m specifies the number of rows of B. m >= 0. |
[in] | n | INTEGER. On entry, n specifies the number of columns of B. n >= 0. |
[in] | alpha | COMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. |
[in] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. |
[in] | ldda | INTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ). |
[in,out] | dB_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X |
[in] | lddb | INTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ). |
[in,out] | dX_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X |
[in] | lddx | INTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ). |
dinvA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/TRI_NB)*TRI_NB*TRI_NB, If side == MagmaRight, dinvA must be of size >= ceil(n/TRI_NB)*TRI_NB*TRI_NB, where TRI_NB = 128. | |
[in] | dinvA_length | INTEGER The size of each workspace matrix dinvA |
dA_displ | (workspace) Array of pointers, dimension (batchCount). | |
dB_displ | (workspace) Array of pointers, dimension (batchCount). | |
dX_displ | (workspace) Array of pointers, dimension (batchCount). | |
dinvA_displ | (workspace) Array of pointers, dimension (batchCount). | |
[in] | resetozero | INTEGER Used internally by ZTRTRI_DIAG routine |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
void magmablas_ztrtri_diag | ( | magma_uplo_t | uplo, |
magma_diag_t | diag, | ||
magma_int_t | n, | ||
magmaDoubleComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex_ptr | d_dinvA | ||
) |