![]() |
MAGMA 2.9.0
Matrix Algebra for GPU and Multicore Architectures
|
\(C = \alpha \;op(A) \;op(B) + \beta C\) More...
Functions | |
void | magma_cgemm (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_const_ptr dB, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex_ptr dC, magma_int_t lddc, magma_queue_t queue) |
Perform matrix-matrix product, \( C = \alpha op(A) op(B) + \beta C \). | |
void | magma_dgemm (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, double alpha, magmaDouble_const_ptr dA, magma_int_t ldda, magmaDouble_const_ptr dB, magma_int_t lddb, double beta, magmaDouble_ptr dC, magma_int_t lddc, magma_queue_t queue) |
Perform matrix-matrix product, \( C = \alpha op(A) op(B) + \beta C \). | |
void | magma_hgemm (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaHalf alpha, magmaHalf_const_ptr dA, magma_int_t ldda, magmaHalf_const_ptr dB, magma_int_t lddb, magmaHalf beta, magmaHalf_ptr dC, magma_int_t lddc, magma_queue_t queue) |
Perform FP16 matrix-matrix product, \( C = \alpha op(A) op(B) + \beta C \). | |
void | magma_sgemm (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, float alpha, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_const_ptr dB, magma_int_t lddb, float beta, magmaFloat_ptr dC, magma_int_t lddc, magma_queue_t queue) |
Perform matrix-matrix product, \( C = \alpha op(A) op(B) + \beta C \). | |
void | magma_zgemm (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC, magma_int_t lddc, magma_queue_t queue) |
Perform matrix-matrix product, \( C = \alpha op(A) op(B) + \beta C \). | |
void | magmablas_cgemm (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex const *dA, magma_int_t ldda, magmaFloatComplex const *dB, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex *dC, magma_int_t lddc, magma_queue_t queue) |
CGEMM performs one of the matrix-matrix operations. | |
void | magmablas_cgemm_reduce (magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_const_ptr dB, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex_ptr dC, magma_int_t lddc, magma_queue_t queue) |
CGEMM_REDUCE performs one of the matrix-matrix operations. | |
void | magmablas_dgemm (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, double alpha, double const *dA, magma_int_t ldda, double const *dB, magma_int_t lddb, double beta, double *dC, magma_int_t lddc, magma_queue_t queue) |
DGEMM performs one of the matrix-matrix operations. | |
void | magmablas_dgemm_reduce (magma_int_t m, magma_int_t n, magma_int_t k, double alpha, magmaDouble_const_ptr dA, magma_int_t ldda, magmaDouble_const_ptr dB, magma_int_t lddb, double beta, magmaDouble_ptr dC, magma_int_t lddc, magma_queue_t queue) |
DGEMM_REDUCE performs one of the matrix-matrix operations. | |
void | magmablas_sgemm (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, float alpha, float const *dA, magma_int_t ldda, float const *dB, magma_int_t lddb, float beta, float *dC, magma_int_t lddc, magma_queue_t queue) |
SGEMM performs one of the matrix-matrix operations. | |
void | magmablas_sgemm_reduce (magma_int_t m, magma_int_t n, magma_int_t k, float alpha, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_const_ptr dB, magma_int_t lddb, float beta, magmaFloat_ptr dC, magma_int_t lddc, magma_queue_t queue) |
SGEMM_REDUCE performs one of the matrix-matrix operations. | |
void | magmablas_zgemm (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex const *dA, magma_int_t ldda, magmaDoubleComplex const *dB, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex *dC, magma_int_t lddc, magma_queue_t queue) |
ZGEMM performs one of the matrix-matrix operations. | |
void | magmablas_zgemm_reduce (magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC, magma_int_t lddc, magma_queue_t queue) |
ZGEMM_REDUCE performs one of the matrix-matrix operations. | |
\(C = \alpha \;op(A) \;op(B) + \beta C\)
void magma_cgemm | ( | magma_trans_t | transA, |
magma_trans_t | transB, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
magmaFloatComplex | alpha, | ||
magmaFloatComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaFloatComplex_const_ptr | dB, | ||
magma_int_t | lddb, | ||
magmaFloatComplex | beta, | ||
magmaFloatComplex_ptr | dC, | ||
magma_int_t | lddc, | ||
magma_queue_t | queue ) |
Perform matrix-matrix product, \( C = \alpha op(A) op(B) + \beta C \).
[in] | transA | Operation op(A) to perform on matrix A. |
[in] | transB | Operation op(B) to perform on matrix B. |
[in] | m | Number of rows of C and op(A). m >= 0. |
[in] | n | Number of columns of C and op(B). n >= 0. |
[in] | k | Number of columns of op(A) and rows of op(B). k >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dA | COMPLEX array on GPU device. If transA == MagmaNoTrans, the m-by-k matrix A of dimension (ldda,k), ldda >= max(1,m); otherwise, the k-by-m matrix A of dimension (ldda,m), ldda >= max(1,k). |
[in] | ldda | Leading dimension of dA. |
[in] | dB | COMPLEX array on GPU device. If transB == MagmaNoTrans, the k-by-n matrix B of dimension (lddb,n), lddb >= max(1,k); otherwise, the n-by-k matrix B of dimension (lddb,k), lddb >= max(1,n). |
[in] | lddb | Leading dimension of dB. |
[in] | beta | Scalar \( \beta \) |
[in,out] | dC | COMPLEX array on GPU device. The m-by-n matrix C of dimension (lddc,n), lddc >= max(1,m). |
[in] | lddc | Leading dimension of dC. |
[in] | queue | magma_queue_t Queue to execute in. |
void magma_dgemm | ( | magma_trans_t | transA, |
magma_trans_t | transB, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
double | alpha, | ||
magmaDouble_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDouble_const_ptr | dB, | ||
magma_int_t | lddb, | ||
double | beta, | ||
magmaDouble_ptr | dC, | ||
magma_int_t | lddc, | ||
magma_queue_t | queue ) |
Perform matrix-matrix product, \( C = \alpha op(A) op(B) + \beta C \).
[in] | transA | Operation op(A) to perform on matrix A. |
[in] | transB | Operation op(B) to perform on matrix B. |
[in] | m | Number of rows of C and op(A). m >= 0. |
[in] | n | Number of columns of C and op(B). n >= 0. |
[in] | k | Number of columns of op(A) and rows of op(B). k >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dA | DOUBLE PRECISION array on GPU device. If transA == MagmaNoTrans, the m-by-k matrix A of dimension (ldda,k), ldda >= max(1,m); otherwise, the k-by-m matrix A of dimension (ldda,m), ldda >= max(1,k). |
[in] | ldda | Leading dimension of dA. |
[in] | dB | DOUBLE PRECISION array on GPU device. If transB == MagmaNoTrans, the k-by-n matrix B of dimension (lddb,n), lddb >= max(1,k); otherwise, the n-by-k matrix B of dimension (lddb,k), lddb >= max(1,n). |
[in] | lddb | Leading dimension of dB. |
[in] | beta | Scalar \( \beta \) |
[in,out] | dC | DOUBLE PRECISION array on GPU device. The m-by-n matrix C of dimension (lddc,n), lddc >= max(1,m). |
[in] | lddc | Leading dimension of dC. |
[in] | queue | magma_queue_t Queue to execute in. |
void magma_hgemm | ( | magma_trans_t | transA, |
magma_trans_t | transB, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
magmaHalf | alpha, | ||
magmaHalf_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaHalf_const_ptr | dB, | ||
magma_int_t | lddb, | ||
magmaHalf | beta, | ||
magmaHalf_ptr | dC, | ||
magma_int_t | lddc, | ||
magma_queue_t | queue ) |
Perform FP16 matrix-matrix product, \( C = \alpha op(A) op(B) + \beta C \).
This routine requires CUDA 7.5 or greater.
[in] | transA | Operation op(A) to perform on matrix A. |
[in] | transB | Operation op(B) to perform on matrix B. |
[in] | m | Number of rows of C and op(A). m >= 0. |
[in] | n | Number of columns of C and op(B). n >= 0. |
[in] | k | Number of columns of op(A) and rows of op(B). k >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dA | HALF PRECISION array on GPU device. If transA == MagmaNoTrans, the m-by-k matrix A of dimension (ldda,k), ldda >= max(1,m); otherwise, the k-by-m matrix A of dimension (ldda,m), ldda >= max(1,k). |
[in] | ldda | Leading dimension of dA. |
[in] | dB | HALF PRECISION array on GPU device. If transB == MagmaNoTrans, the k-by-n matrix B of dimension (lddb,n), lddb >= max(1,k); otherwise, the n-by-k matrix B of dimension (lddb,k), lddb >= max(1,n). |
[in] | lddb | Leading dimension of dB. |
[in] | beta | Scalar \( \beta \) |
[in,out] | dC | HALF PRECISION array on GPU device. The m-by-n matrix C of dimension (lddc,n), lddc >= max(1,m). |
[in] | lddc | Leading dimension of dC. |
[in] | queue | magma_queue_t Queue to execute in. |
void magma_sgemm | ( | magma_trans_t | transA, |
magma_trans_t | transB, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
float | alpha, | ||
magmaFloat_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaFloat_const_ptr | dB, | ||
magma_int_t | lddb, | ||
float | beta, | ||
magmaFloat_ptr | dC, | ||
magma_int_t | lddc, | ||
magma_queue_t | queue ) |
Perform matrix-matrix product, \( C = \alpha op(A) op(B) + \beta C \).
[in] | transA | Operation op(A) to perform on matrix A. |
[in] | transB | Operation op(B) to perform on matrix B. |
[in] | m | Number of rows of C and op(A). m >= 0. |
[in] | n | Number of columns of C and op(B). n >= 0. |
[in] | k | Number of columns of op(A) and rows of op(B). k >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dA | REAL array on GPU device. If transA == MagmaNoTrans, the m-by-k matrix A of dimension (ldda,k), ldda >= max(1,m); otherwise, the k-by-m matrix A of dimension (ldda,m), ldda >= max(1,k). |
[in] | ldda | Leading dimension of dA. |
[in] | dB | REAL array on GPU device. If transB == MagmaNoTrans, the k-by-n matrix B of dimension (lddb,n), lddb >= max(1,k); otherwise, the n-by-k matrix B of dimension (lddb,k), lddb >= max(1,n). |
[in] | lddb | Leading dimension of dB. |
[in] | beta | Scalar \( \beta \) |
[in,out] | dC | REAL array on GPU device. The m-by-n matrix C of dimension (lddc,n), lddc >= max(1,m). |
[in] | lddc | Leading dimension of dC. |
[in] | queue | magma_queue_t Queue to execute in. |
void magma_zgemm | ( | magma_trans_t | transA, |
magma_trans_t | transB, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex_const_ptr | dB, | ||
magma_int_t | lddb, | ||
magmaDoubleComplex | beta, | ||
magmaDoubleComplex_ptr | dC, | ||
magma_int_t | lddc, | ||
magma_queue_t | queue ) |
Perform matrix-matrix product, \( C = \alpha op(A) op(B) + \beta C \).
[in] | transA | Operation op(A) to perform on matrix A. |
[in] | transB | Operation op(B) to perform on matrix B. |
[in] | m | Number of rows of C and op(A). m >= 0. |
[in] | n | Number of columns of C and op(B). n >= 0. |
[in] | k | Number of columns of op(A) and rows of op(B). k >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dA | COMPLEX_16 array on GPU device. If transA == MagmaNoTrans, the m-by-k matrix A of dimension (ldda,k), ldda >= max(1,m); otherwise, the k-by-m matrix A of dimension (ldda,m), ldda >= max(1,k). |
[in] | ldda | Leading dimension of dA. |
[in] | dB | COMPLEX_16 array on GPU device. If transB == MagmaNoTrans, the k-by-n matrix B of dimension (lddb,n), lddb >= max(1,k); otherwise, the n-by-k matrix B of dimension (lddb,k), lddb >= max(1,n). |
[in] | lddb | Leading dimension of dB. |
[in] | beta | Scalar \( \beta \) |
[in,out] | dC | COMPLEX_16 array on GPU device. The m-by-n matrix C of dimension (lddc,n), lddc >= max(1,m). |
[in] | lddc | Leading dimension of dC. |
[in] | queue | magma_queue_t Queue to execute in. |
void magmablas_cgemm | ( | magma_trans_t | transA, |
magma_trans_t | transB, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
magmaFloatComplex | alpha, | ||
magmaFloatComplex const * | dA, | ||
magma_int_t | ldda, | ||
magmaFloatComplex const * | dB, | ||
magma_int_t | lddb, | ||
magmaFloatComplex | beta, | ||
magmaFloatComplex * | dC, | ||
magma_int_t | lddc, | ||
magma_queue_t | queue ) |
CGEMM performs one of the matrix-matrix operations.
C = alpha*op( A )*op( B ) + beta*C,
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
[in] | transA | magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
|
[in] | transB | magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
|
[in] | m | INTEGER. On entry, M specifies the number of rows of the matrix op( dA ) and of the matrix dC. M must be at least zero. |
[in] | n | INTEGER. On entry, N specifies the number of columns of the matrix op( dB ) and the number of columns of the matrix dC. N must be at least zero. |
[in] | k | INTEGER. On entry, K specifies the number of columns of the matrix op( dA ) and the number of rows of the matrix op( dB ). K must be at least zero. |
[in] | alpha | COMPLEX On entry, ALPHA specifies the scalar alpha. |
[in] | dA | COMPLEX array of DIMENSION ( LDA, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array dA must contain the matrix dA, otherwise the leading k by m part of the array dA must contain the matrix dA. |
[in] | ldda | INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When transA = MagmaNoTrans then LDA must be at least max( 1, m ), otherwise LDA must be at least max( 1, k ). |
[in] | dB | COMPLEX array of DIMENSION ( LDB, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array dB must contain the matrix dB, otherwise the leading n by k part of the array dB must contain the matrix dB. |
[in] | lddb | INTEGER. On entry, LDB specifies the first dimension of dB as declared in the calling (sub) program. When transB = MagmaNoTrans then LDB must be at least max( 1, k ), otherwise LDB must be at least max( 1, n ). |
[in] | beta | COMPLEX. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then dC need not be set on input. |
[in,out] | dC | COMPLEX array of DIMENSION ( LDC, n ). Before entry, the leading m by n part of the array dC must contain the matrix dC, except when beta is zero, in which case dC need not be set on entry. On exit, the array dC is overwritten by the m by n matrix ( alpha*op( dA )*op( dB ) + beta*dC ). |
[in] | lddc | INTEGER. On entry, LDC specifies the first dimension of dC as declared in the calling (sub) program. LDC must be at least max( 1, m ). |
[in] | queue | magma_queue_t Queue to execute in. |
void magmablas_cgemm_reduce | ( | magma_int_t | m, |
magma_int_t | n, | ||
magma_int_t | k, | ||
magmaFloatComplex | alpha, | ||
magmaFloatComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaFloatComplex_const_ptr | dB, | ||
magma_int_t | lddb, | ||
magmaFloatComplex | beta, | ||
magmaFloatComplex_ptr | dC, | ||
magma_int_t | lddc, | ||
magma_queue_t | queue ) |
CGEMM_REDUCE performs one of the matrix-matrix operations.
C := alpha*A^T*B + beta*C,
where alpha and beta are scalars, and A, B and C are matrices, with A a k-by-m matrix, B a k-by-n matrix, and C an m-by-n matrix.
This routine is tuned for m, n << k. Typically, m and n are expected to be less than 128.
void magmablas_dgemm | ( | magma_trans_t | transA, |
magma_trans_t | transB, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
double | alpha, | ||
double const * | dA, | ||
magma_int_t | ldda, | ||
double const * | dB, | ||
magma_int_t | lddb, | ||
double | beta, | ||
double * | dC, | ||
magma_int_t | lddc, | ||
magma_queue_t | queue ) |
DGEMM performs one of the matrix-matrix operations.
C = alpha*op( A )*op( B ) + beta*C,
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
[in] | transA | magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
|
[in] | transB | magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
|
[in] | m | INTEGER. On entry, M specifies the number of rows of the matrix op( dA ) and of the matrix dC. M must be at least zero. |
[in] | n | INTEGER. On entry, N specifies the number of columns of the matrix op( dB ) and the number of columns of the matrix dC. N must be at least zero. |
[in] | k | INTEGER. On entry, K specifies the number of columns of the matrix op( dA ) and the number of rows of the matrix op( dB ). K must be at least zero. |
[in] | alpha | DOUBLE PRECISION On entry, ALPHA specifies the scalar alpha. |
[in] | dA | DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array dA must contain the matrix dA, otherwise the leading k by m part of the array dA must contain the matrix dA. |
[in] | ldda | INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When transA = MagmaNoTrans then LDA must be at least max( 1, m ), otherwise LDA must be at least max( 1, k ). |
[in] | dB | DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array dB must contain the matrix dB, otherwise the leading n by k part of the array dB must contain the matrix dB. |
[in] | lddb | INTEGER. On entry, LDB specifies the first dimension of dB as declared in the calling (sub) program. When transB = MagmaNoTrans then LDB must be at least max( 1, k ), otherwise LDB must be at least max( 1, n ). |
[in] | beta | DOUBLE PRECISION. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then dC need not be set on input. |
[in,out] | dC | DOUBLE PRECISION array of DIMENSION ( LDC, n ). Before entry, the leading m by n part of the array dC must contain the matrix dC, except when beta is zero, in which case dC need not be set on entry. On exit, the array dC is overwritten by the m by n matrix ( alpha*op( dA )*op( dB ) + beta*dC ). |
[in] | lddc | INTEGER. On entry, LDC specifies the first dimension of dC as declared in the calling (sub) program. LDC must be at least max( 1, m ). |
[in] | queue | magma_queue_t Queue to execute in. |
void magmablas_dgemm_reduce | ( | magma_int_t | m, |
magma_int_t | n, | ||
magma_int_t | k, | ||
double | alpha, | ||
magmaDouble_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDouble_const_ptr | dB, | ||
magma_int_t | lddb, | ||
double | beta, | ||
magmaDouble_ptr | dC, | ||
magma_int_t | lddc, | ||
magma_queue_t | queue ) |
DGEMM_REDUCE performs one of the matrix-matrix operations.
C := alpha*A^T*B + beta*C,
where alpha and beta are scalars, and A, B and C are matrices, with A a k-by-m matrix, B a k-by-n matrix, and C an m-by-n matrix.
This routine is tuned for m, n << k. Typically, m and n are expected to be less than 128.
void magmablas_sgemm | ( | magma_trans_t | transA, |
magma_trans_t | transB, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
float | alpha, | ||
float const * | dA, | ||
magma_int_t | ldda, | ||
float const * | dB, | ||
magma_int_t | lddb, | ||
float | beta, | ||
float * | dC, | ||
magma_int_t | lddc, | ||
magma_queue_t | queue ) |
SGEMM performs one of the matrix-matrix operations.
C = alpha*op( A )*op( B ) + beta*C,
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
[in] | transA | magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
|
[in] | transB | magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
|
[in] | m | INTEGER. On entry, M specifies the number of rows of the matrix op( dA ) and of the matrix dC. M must be at least zero. |
[in] | n | INTEGER. On entry, N specifies the number of columns of the matrix op( dB ) and the number of columns of the matrix dC. N must be at least zero. |
[in] | k | INTEGER. On entry, K specifies the number of columns of the matrix op( dA ) and the number of rows of the matrix op( dB ). K must be at least zero. |
[in] | alpha | REAL On entry, ALPHA specifies the scalar alpha. |
[in] | dA | REAL array of DIMENSION ( LDA, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array dA must contain the matrix dA, otherwise the leading k by m part of the array dA must contain the matrix dA. |
[in] | ldda | INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When transA = MagmaNoTrans then LDA must be at least max( 1, m ), otherwise LDA must be at least max( 1, k ). |
[in] | dB | REAL array of DIMENSION ( LDB, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array dB must contain the matrix dB, otherwise the leading n by k part of the array dB must contain the matrix dB. |
[in] | lddb | INTEGER. On entry, LDB specifies the first dimension of dB as declared in the calling (sub) program. When transB = MagmaNoTrans then LDB must be at least max( 1, k ), otherwise LDB must be at least max( 1, n ). |
[in] | beta | REAL. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then dC need not be set on input. |
[in,out] | dC | REAL array of DIMENSION ( LDC, n ). Before entry, the leading m by n part of the array dC must contain the matrix dC, except when beta is zero, in which case dC need not be set on entry. On exit, the array dC is overwritten by the m by n matrix ( alpha*op( dA )*op( dB ) + beta*dC ). |
[in] | lddc | INTEGER. On entry, LDC specifies the first dimension of dC as declared in the calling (sub) program. LDC must be at least max( 1, m ). |
[in] | queue | magma_queue_t Queue to execute in. |
void magmablas_sgemm_reduce | ( | magma_int_t | m, |
magma_int_t | n, | ||
magma_int_t | k, | ||
float | alpha, | ||
magmaFloat_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaFloat_const_ptr | dB, | ||
magma_int_t | lddb, | ||
float | beta, | ||
magmaFloat_ptr | dC, | ||
magma_int_t | lddc, | ||
magma_queue_t | queue ) |
SGEMM_REDUCE performs one of the matrix-matrix operations.
C := alpha*A^T*B + beta*C,
where alpha and beta are scalars, and A, B and C are matrices, with A a k-by-m matrix, B a k-by-n matrix, and C an m-by-n matrix.
This routine is tuned for m, n << k. Typically, m and n are expected to be less than 128.
void magmablas_zgemm | ( | magma_trans_t | transA, |
magma_trans_t | transB, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex const * | dA, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex const * | dB, | ||
magma_int_t | lddb, | ||
magmaDoubleComplex | beta, | ||
magmaDoubleComplex * | dC, | ||
magma_int_t | lddc, | ||
magma_queue_t | queue ) |
ZGEMM performs one of the matrix-matrix operations.
C = alpha*op( A )*op( B ) + beta*C,
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
[in] | transA | magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
|
[in] | transB | magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
|
[in] | m | INTEGER. On entry, M specifies the number of rows of the matrix op( dA ) and of the matrix dC. M must be at least zero. |
[in] | n | INTEGER. On entry, N specifies the number of columns of the matrix op( dB ) and the number of columns of the matrix dC. N must be at least zero. |
[in] | k | INTEGER. On entry, K specifies the number of columns of the matrix op( dA ) and the number of rows of the matrix op( dB ). K must be at least zero. |
[in] | alpha | COMPLEX_16 On entry, ALPHA specifies the scalar alpha. |
[in] | dA | COMPLEX_16 array of DIMENSION ( LDA, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array dA must contain the matrix dA, otherwise the leading k by m part of the array dA must contain the matrix dA. |
[in] | ldda | INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When transA = MagmaNoTrans then LDA must be at least max( 1, m ), otherwise LDA must be at least max( 1, k ). |
[in] | dB | COMPLEX_16 array of DIMENSION ( LDB, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array dB must contain the matrix dB, otherwise the leading n by k part of the array dB must contain the matrix dB. |
[in] | lddb | INTEGER. On entry, LDB specifies the first dimension of dB as declared in the calling (sub) program. When transB = MagmaNoTrans then LDB must be at least max( 1, k ), otherwise LDB must be at least max( 1, n ). |
[in] | beta | COMPLEX_16. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then dC need not be set on input. |
[in,out] | dC | COMPLEX_16 array of DIMENSION ( LDC, n ). Before entry, the leading m by n part of the array dC must contain the matrix dC, except when beta is zero, in which case dC need not be set on entry. On exit, the array dC is overwritten by the m by n matrix ( alpha*op( dA )*op( dB ) + beta*dC ). |
[in] | lddc | INTEGER. On entry, LDC specifies the first dimension of dC as declared in the calling (sub) program. LDC must be at least max( 1, m ). |
[in] | queue | magma_queue_t Queue to execute in. |
void magmablas_zgemm_reduce | ( | magma_int_t | m, |
magma_int_t | n, | ||
magma_int_t | k, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex_const_ptr | dB, | ||
magma_int_t | lddb, | ||
magmaDoubleComplex | beta, | ||
magmaDoubleComplex_ptr | dC, | ||
magma_int_t | lddc, | ||
magma_queue_t | queue ) |
ZGEMM_REDUCE performs one of the matrix-matrix operations.
C := alpha*A^T*B + beta*C,
where alpha and beta are scalars, and A, B and C are matrices, with A a k-by-m matrix, B a k-by-n matrix, and C an m-by-n matrix.
This routine is tuned for m, n << k. Typically, m and n are expected to be less than 128.