![]() |
MAGMA 2.9.0
Matrix Algebra for GPU and Multicore Architectures
|
\(C = \alpha \;op(A) \;op(B) + \beta C\) More...
Functions | |
void | magmablas_cgemm_batched (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex const *const *dA_array, magma_int_t ldda, magmaFloatComplex const *const *dB_array, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex **dC_array, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue) |
CGEMM performs one of the matrix-matrix operations. | |
void | magmablas_cgemm_batched_core (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex const *const *dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda, magmaFloatComplex const *const *dB_array, magma_int_t Bi, magma_int_t Bj, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex **dC_array, magma_int_t Ci, magma_int_t Cj, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue) |
CGEMM performs one of the matrix-matrix operations. | |
void | magmablas_cgemm_vbatched (magma_trans_t transA, magma_trans_t transB, magma_int_t *m, magma_int_t *n, magma_int_t *k, magmaFloatComplex alpha, magmaFloatComplex const *const *dA_array, magma_int_t *ldda, magmaFloatComplex const *const *dB_array, magma_int_t *lddb, magmaFloatComplex beta, magmaFloatComplex **dC_array, magma_int_t *lddc, magma_int_t batchCount, magma_queue_t queue) |
CGEMM performs one of the matrix-matrix operations. | |
void | magmablas_dgemm_batched (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, double alpha, double const *const *dA_array, magma_int_t ldda, double const *const *dB_array, magma_int_t lddb, double beta, double **dC_array, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue) |
DGEMM performs one of the matrix-matrix operations. | |
void | magmablas_dgemm_batched_core (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, double alpha, double const *const *dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda, double const *const *dB_array, magma_int_t Bi, magma_int_t Bj, magma_int_t lddb, double beta, double **dC_array, magma_int_t Ci, magma_int_t Cj, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue) |
DGEMM performs one of the matrix-matrix operations. | |
void | magmablas_dgemm_vbatched (magma_trans_t transA, magma_trans_t transB, magma_int_t *m, magma_int_t *n, magma_int_t *k, double alpha, double const *const *dA_array, magma_int_t *ldda, double const *const *dB_array, magma_int_t *lddb, double beta, double **dC_array, magma_int_t *lddc, magma_int_t batchCount, magma_queue_t queue) |
DGEMM performs one of the matrix-matrix operations. | |
void | magmablas_sgemm_batched (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, float alpha, float const *const *dA_array, magma_int_t ldda, float const *const *dB_array, magma_int_t lddb, float beta, float **dC_array, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue) |
SGEMM performs one of the matrix-matrix operations. | |
void | magmablas_sgemm_batched_core (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, float alpha, float const *const *dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda, float const *const *dB_array, magma_int_t Bi, magma_int_t Bj, magma_int_t lddb, float beta, float **dC_array, magma_int_t Ci, magma_int_t Cj, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue) |
SGEMM performs one of the matrix-matrix operations. | |
void | magmablas_sgemm_vbatched (magma_trans_t transA, magma_trans_t transB, magma_int_t *m, magma_int_t *n, magma_int_t *k, float alpha, float const *const *dA_array, magma_int_t *ldda, float const *const *dB_array, magma_int_t *lddb, float beta, float **dC_array, magma_int_t *lddc, magma_int_t batchCount, magma_queue_t queue) |
SGEMM performs one of the matrix-matrix operations. | |
void | magmablas_zgemm_batched (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex const *const *dA_array, magma_int_t ldda, magmaDoubleComplex const *const *dB_array, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex **dC_array, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue) |
ZGEMM performs one of the matrix-matrix operations. | |
void | magmablas_zgemm_batched_core (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex const *const *dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda, magmaDoubleComplex const *const *dB_array, magma_int_t Bi, magma_int_t Bj, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex **dC_array, magma_int_t Ci, magma_int_t Cj, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue) |
ZGEMM performs one of the matrix-matrix operations. | |
void | magmablas_zgemm_vbatched (magma_trans_t transA, magma_trans_t transB, magma_int_t *m, magma_int_t *n, magma_int_t *k, magmaDoubleComplex alpha, magmaDoubleComplex const *const *dA_array, magma_int_t *ldda, magmaDoubleComplex const *const *dB_array, magma_int_t *lddb, magmaDoubleComplex beta, magmaDoubleComplex **dC_array, magma_int_t *lddc, magma_int_t batchCount, magma_queue_t queue) |
ZGEMM performs one of the matrix-matrix operations. | |
\(C = \alpha \;op(A) \;op(B) + \beta C\)
void magmablas_cgemm_batched | ( | magma_trans_t | transA, |
magma_trans_t | transB, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
magmaFloatComplex | alpha, | ||
magmaFloatComplex const *const * | dA_array, | ||
magma_int_t | ldda, | ||
magmaFloatComplex const *const * | dB_array, | ||
magma_int_t | lddb, | ||
magmaFloatComplex | beta, | ||
magmaFloatComplex ** | dC_array, | ||
magma_int_t | lddc, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue ) |
CGEMM performs one of the matrix-matrix operations.
C = alpha*op( A )*op( B ) + beta*C,
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
[in] | transA | magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
|
[in] | transB | magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
|
[in] | m | INTEGER. On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero. |
[in] | n | INTEGER. On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero. |
[in] | k | INTEGER. On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero. |
[in] | alpha | COMPLEX On entry, ALPHA specifies the scalar alpha. |
[in] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX array A of DIMENSION ( ldda, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A. |
[in] | ldda | INTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When transA = MagmaNoTrans then ldda must be at least max( 1, m ), otherwise ldda must be at least max( 1, k ). |
[in] | dB_array | Array of pointers, dimension (batchCount). Each is a COMPLEX array B of DIMENSION ( lddb, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B. |
[in] | lddb | INTEGER. On entry, lddb specifies the first dimension of each array B as declared in the calling (sub) program. When transB = MagmaNoTrans then lddb must be at least max( 1, k ), otherwise lddb must be at least max( 1, n ). |
[in] | beta | COMPLEX. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input. |
[in,out] | dC_array | Array of pointers, dimension (batchCount). Each is a COMPLEX array C of DIMENSION ( lddc, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n matrix ( alpha*op( A )*op( B ) + beta*C ). |
[in] | lddc | INTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ). |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
void magmablas_cgemm_batched_core | ( | magma_trans_t | transA, |
magma_trans_t | transB, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
magmaFloatComplex | alpha, | ||
magmaFloatComplex const *const * | dA_array, | ||
magma_int_t | Ai, | ||
magma_int_t | Aj, | ||
magma_int_t | ldda, | ||
magmaFloatComplex const *const * | dB_array, | ||
magma_int_t | Bi, | ||
magma_int_t | Bj, | ||
magma_int_t | lddb, | ||
magmaFloatComplex | beta, | ||
magmaFloatComplex ** | dC_array, | ||
magma_int_t | Ci, | ||
magma_int_t | Cj, | ||
magma_int_t | lddc, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue ) |
CGEMM performs one of the matrix-matrix operations.
C = alpha*op( A )*op( B ) + beta*C,
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
[in] | transA | magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
|
[in] | transB | magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
|
[in] | m | INTEGER. On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero. |
[in] | n | INTEGER. On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero. |
[in] | k | INTEGER. On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero. |
[in] | alpha | COMPLEX On entry, ALPHA specifies the scalar alpha. |
[in] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX array A of DIMENSION ( ldda, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A. |
[in] | ldda | INTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When transA = MagmaNoTrans then ldda must be at least max( 1, m ), otherwise ldda must be at least max( 1, k ). |
[in] | dB_array | Array of pointers, dimension (batchCount). Each is a COMPLEX array B of DIMENSION ( lddb, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B. |
[in] | lddb | INTEGER. On entry, lddb specifies the first dimension of each array B as declared in the calling (sub) program. When transB = MagmaNoTrans then lddb must be at least max( 1, k ), otherwise lddb must be at least max( 1, n ). |
[in] | beta | COMPLEX. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input. |
[in,out] | dC_array | Array of pointers, dimension (batchCount). Each is a COMPLEX array C of DIMENSION ( lddc, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, each array C is overwritten by the m by n matrix ( alpha*op( A )*op( B ) + beta*C ). |
[in] | lddc | INTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ). |
[in] | Ai | INTEGER Row offset for all 'A' matrices. |
[in] | Aj | INTEGER Column offset for all 'A' matrices. |
[in] | Bi | INTEGER Row offset for all 'B' matrices. |
[in] | Bj | INTEGER Column offset for all 'B' matrices. |
[in] | Ci | INTEGER Row offset for all 'C' matrices. |
[in] | Cj | INTEGER Column offset for all 'C' matrices. |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
void magmablas_cgemm_vbatched | ( | magma_trans_t | transA, |
magma_trans_t | transB, | ||
magma_int_t * | m, | ||
magma_int_t * | n, | ||
magma_int_t * | k, | ||
magmaFloatComplex | alpha, | ||
magmaFloatComplex const *const * | dA_array, | ||
magma_int_t * | ldda, | ||
magmaFloatComplex const *const * | dB_array, | ||
magma_int_t * | lddb, | ||
magmaFloatComplex | beta, | ||
magmaFloatComplex ** | dC_array, | ||
magma_int_t * | lddc, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue ) |
CGEMM performs one of the matrix-matrix operations.
C = alpha*op( A )*op( B ) + beta*C,
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
[in] | transA | magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
|
[in] | transB | magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
|
[in] | m | Array of integers, dimension (batchCount + 1) On entry, each INTEGER M specifies the number of rows of the corresponding matrices op( A ) and C. M must be at least zero. The last element of the array is used internally by the routine. |
[in] | n | Array of integers, dimension (batchCount + 1). On entry, each INTEGER N specifies the number of columns of the corresponding matrix op( B ) and the number of columns of the corresponding matrix C. N must be at least zero. The last element of the array is used internally by the routine. |
[in] | k | Array of integers, dimension (batchCount + 1). On entry, each INTEGER K specifies the number of columns of the corresponding matrix op( A ) and the number of rows of the corresponding matrix op( B ). K must be at least zero. The last element of the array is used internally by the routine. |
[in] | alpha | COMPLEX On entry, ALPHA specifies the scalar alpha. |
[in] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX array A of DIMENSION ( LDDA, ka ), where ka is K when transA = MagmaNoTrans, and is M otherwise. Before entry with transA = MagmaNoTrans, the leading M by K part of the array A must contain the matrix A, otherwise the leading K by M part of the array A must contain the matrix A. |
[in] | ldda | Array of integers, dimension (batchCount + 1). On entry, each INTEGER LDDA specifies the first dimension of the corresponding matrix A as declared in the calling (sub) program. When transA = MagmaNoTrans then LDDA must be at least max( 1, M ), otherwise ldda must be at least max( 1, K ). The last element of the array is used internally by the routine. |
[in] | dB_array | Array of pointers, dimension (batchCount). Each is a COMPLEX array B of DIMENSION ( LDDB, kb ), where kb is N when transB = MagmaNoTrans, and is K otherwise. Before entry with transB = MagmaNoTrans, the leading K by N part of the array B must contain the matrix B, otherwise the leading N by K part of the array B must contain the matrix B. |
[in] | lddb | Array of integers, dimension (batchCount + 1). On entry, each INTEGER LDDB specifies the first dimension of the corresponding matrix B as declared in the calling (sub) program. When transB = MagmaNoTrans then LDDB must be at least max( 1, K ), otherwise LDDB must be at least max( 1, N ). The last element of the array is used internally by the routine. |
[in] | beta | COMPLEX. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input. |
[in,out] | dC_array | Array of pointers, dimension (batchCount). Each is a COMPLEX array C of DIMENSION ( LDDC, N ). Before entry, the leading M by N part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the M by N matrix ( alpha*op( A )*op( B ) + beta*C ). |
[in] | lddc | Array of integers, dimension (batchCount + 1). On entry, each INTEGER LDDC specifies the first dimension of the corresponding matrix C as declared in the calling (sub) program. LDDC must be at least max( 1, M ). The last element of the array is used internally by the routine. |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
void magmablas_dgemm_batched | ( | magma_trans_t | transA, |
magma_trans_t | transB, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
double | alpha, | ||
double const *const * | dA_array, | ||
magma_int_t | ldda, | ||
double const *const * | dB_array, | ||
magma_int_t | lddb, | ||
double | beta, | ||
double ** | dC_array, | ||
magma_int_t | lddc, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue ) |
DGEMM performs one of the matrix-matrix operations.
C = alpha*op( A )*op( B ) + beta*C,
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
[in] | transA | magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
|
[in] | transB | magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
|
[in] | m | INTEGER. On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero. |
[in] | n | INTEGER. On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero. |
[in] | k | INTEGER. On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero. |
[in] | alpha | DOUBLE PRECISION On entry, ALPHA specifies the scalar alpha. |
[in] | dA_array | Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of DIMENSION ( ldda, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A. |
[in] | ldda | INTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When transA = MagmaNoTrans then ldda must be at least max( 1, m ), otherwise ldda must be at least max( 1, k ). |
[in] | dB_array | Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of DIMENSION ( lddb, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B. |
[in] | lddb | INTEGER. On entry, lddb specifies the first dimension of each array B as declared in the calling (sub) program. When transB = MagmaNoTrans then lddb must be at least max( 1, k ), otherwise lddb must be at least max( 1, n ). |
[in] | beta | DOUBLE PRECISION. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input. |
[in,out] | dC_array | Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array C of DIMENSION ( lddc, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n matrix ( alpha*op( A )*op( B ) + beta*C ). |
[in] | lddc | INTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ). |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
void magmablas_dgemm_batched_core | ( | magma_trans_t | transA, |
magma_trans_t | transB, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
double | alpha, | ||
double const *const * | dA_array, | ||
magma_int_t | Ai, | ||
magma_int_t | Aj, | ||
magma_int_t | ldda, | ||
double const *const * | dB_array, | ||
magma_int_t | Bi, | ||
magma_int_t | Bj, | ||
magma_int_t | lddb, | ||
double | beta, | ||
double ** | dC_array, | ||
magma_int_t | Ci, | ||
magma_int_t | Cj, | ||
magma_int_t | lddc, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue ) |
DGEMM performs one of the matrix-matrix operations.
C = alpha*op( A )*op( B ) + beta*C,
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
[in] | transA | magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
|
[in] | transB | magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
|
[in] | m | INTEGER. On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero. |
[in] | n | INTEGER. On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero. |
[in] | k | INTEGER. On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero. |
[in] | alpha | DOUBLE PRECISION On entry, ALPHA specifies the scalar alpha. |
[in] | dA_array | Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of DIMENSION ( ldda, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A. |
[in] | ldda | INTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When transA = MagmaNoTrans then ldda must be at least max( 1, m ), otherwise ldda must be at least max( 1, k ). |
[in] | dB_array | Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of DIMENSION ( LDB, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B. |
[in] | lddb | INTEGER. On entry, lddb specifies the first dimension of each array B as declared in the calling (sub) program. When transB = MagmaNoTrans then lddb must be at least max( 1, k ), otherwise lddb must be at least max( 1, n ). |
[in] | beta | DOUBLE PRECISION. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input. |
[in,out] | dC_array | Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array C of DIMENSION ( lddc, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, each array C is overwritten by the m by n matrix ( alpha*op( A )*op( B ) + beta*C ). |
[in] | lddc | INTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ). |
[in] | Ai | INTEGER Row offset for all 'A' matrices. |
[in] | Aj | INTEGER Column offset for all 'A' matrices. |
[in] | Bi | INTEGER Row offset for all 'B' matrices. |
[in] | Bj | INTEGER Column offset for all 'B' matrices. |
[in] | Ci | INTEGER Row offset for all 'C' matrices. |
[in] | Cj | INTEGER Column offset for all 'C' matrices. |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
void magmablas_dgemm_vbatched | ( | magma_trans_t | transA, |
magma_trans_t | transB, | ||
magma_int_t * | m, | ||
magma_int_t * | n, | ||
magma_int_t * | k, | ||
double | alpha, | ||
double const *const * | dA_array, | ||
magma_int_t * | ldda, | ||
double const *const * | dB_array, | ||
magma_int_t * | lddb, | ||
double | beta, | ||
double ** | dC_array, | ||
magma_int_t * | lddc, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue ) |
DGEMM performs one of the matrix-matrix operations.
C = alpha*op( A )*op( B ) + beta*C,
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
[in] | transA | magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
|
[in] | transB | magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
|
[in] | m | Array of integers, dimension (batchCount + 1) On entry, each INTEGER M specifies the number of rows of the corresponding matrices op( A ) and C. M must be at least zero. The last element of the array is used internally by the routine. |
[in] | n | Array of integers, dimension (batchCount + 1). On entry, each INTEGER N specifies the number of columns of the corresponding matrix op( B ) and the number of columns of the corresponding matrix C. N must be at least zero. The last element of the array is used internally by the routine. |
[in] | k | Array of integers, dimension (batchCount + 1). On entry, each INTEGER K specifies the number of columns of the corresponding matrix op( A ) and the number of rows of the corresponding matrix op( B ). K must be at least zero. The last element of the array is used internally by the routine. |
[in] | alpha | DOUBLE PRECISION On entry, ALPHA specifies the scalar alpha. |
[in] | dA_array | Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of DIMENSION ( LDDA, ka ), where ka is K when transA = MagmaNoTrans, and is M otherwise. Before entry with transA = MagmaNoTrans, the leading M by K part of the array A must contain the matrix A, otherwise the leading K by M part of the array A must contain the matrix A. |
[in] | ldda | Array of integers, dimension (batchCount + 1). On entry, each INTEGER LDDA specifies the first dimension of the corresponding matrix A as declared in the calling (sub) program. When transA = MagmaNoTrans then LDDA must be at least max( 1, M ), otherwise ldda must be at least max( 1, K ). The last element of the array is used internally by the routine. |
[in] | dB_array | Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of DIMENSION ( LDDB, kb ), where kb is N when transB = MagmaNoTrans, and is K otherwise. Before entry with transB = MagmaNoTrans, the leading K by N part of the array B must contain the matrix B, otherwise the leading N by K part of the array B must contain the matrix B. |
[in] | lddb | Array of integers, dimension (batchCount + 1). On entry, each INTEGER LDDB specifies the first dimension of the corresponding matrix B as declared in the calling (sub) program. When transB = MagmaNoTrans then LDDB must be at least max( 1, K ), otherwise LDDB must be at least max( 1, N ). The last element of the array is used internally by the routine. |
[in] | beta | DOUBLE PRECISION. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input. |
[in,out] | dC_array | Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array C of DIMENSION ( LDDC, N ). Before entry, the leading M by N part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the M by N matrix ( alpha*op( A )*op( B ) + beta*C ). |
[in] | lddc | Array of integers, dimension (batchCount + 1). On entry, each INTEGER LDDC specifies the first dimension of the corresponding matrix C as declared in the calling (sub) program. LDDC must be at least max( 1, M ). The last element of the array is used internally by the routine. |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
void magmablas_sgemm_batched | ( | magma_trans_t | transA, |
magma_trans_t | transB, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
float | alpha, | ||
float const *const * | dA_array, | ||
magma_int_t | ldda, | ||
float const *const * | dB_array, | ||
magma_int_t | lddb, | ||
float | beta, | ||
float ** | dC_array, | ||
magma_int_t | lddc, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue ) |
SGEMM performs one of the matrix-matrix operations.
C = alpha*op( A )*op( B ) + beta*C,
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
[in] | transA | magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
|
[in] | transB | magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
|
[in] | m | INTEGER. On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero. |
[in] | n | INTEGER. On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero. |
[in] | k | INTEGER. On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero. |
[in] | alpha | REAL On entry, ALPHA specifies the scalar alpha. |
[in] | dA_array | Array of pointers, dimension (batchCount). Each is a REAL array A of DIMENSION ( ldda, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A. |
[in] | ldda | INTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When transA = MagmaNoTrans then ldda must be at least max( 1, m ), otherwise ldda must be at least max( 1, k ). |
[in] | dB_array | Array of pointers, dimension (batchCount). Each is a REAL array B of DIMENSION ( lddb, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B. |
[in] | lddb | INTEGER. On entry, lddb specifies the first dimension of each array B as declared in the calling (sub) program. When transB = MagmaNoTrans then lddb must be at least max( 1, k ), otherwise lddb must be at least max( 1, n ). |
[in] | beta | REAL. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input. |
[in,out] | dC_array | Array of pointers, dimension (batchCount). Each is a REAL array C of DIMENSION ( lddc, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n matrix ( alpha*op( A )*op( B ) + beta*C ). |
[in] | lddc | INTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ). |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
void magmablas_sgemm_batched_core | ( | magma_trans_t | transA, |
magma_trans_t | transB, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
float | alpha, | ||
float const *const * | dA_array, | ||
magma_int_t | Ai, | ||
magma_int_t | Aj, | ||
magma_int_t | ldda, | ||
float const *const * | dB_array, | ||
magma_int_t | Bi, | ||
magma_int_t | Bj, | ||
magma_int_t | lddb, | ||
float | beta, | ||
float ** | dC_array, | ||
magma_int_t | Ci, | ||
magma_int_t | Cj, | ||
magma_int_t | lddc, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue ) |
SGEMM performs one of the matrix-matrix operations.
C = alpha*op( A )*op( B ) + beta*C,
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
[in] | transA | magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
|
[in] | transB | magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
|
[in] | m | INTEGER. On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero. |
[in] | n | INTEGER. On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero. |
[in] | k | INTEGER. On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero. |
[in] | alpha | REAL On entry, ALPHA specifies the scalar alpha. |
[in] | dA_array | Array of pointers, dimension (batchCount). Each is a REAL array A of DIMENSION ( ldda, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A. |
[in] | ldda | INTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When transA = MagmaNoTrans then ldda must be at least max( 1, m ), otherwise ldda must be at least max( 1, k ). |
[in] | dB_array | Array of pointers, dimension (batchCount). Each is a REAL array B of DIMENSION ( lddb, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B. |
[in] | lddb | INTEGER. On entry, lddb specifies the first dimension of each array B as declared in the calling (sub) program. When transB = MagmaNoTrans then lddb must be at least max( 1, k ), otherwise lddb must be at least max( 1, n ). |
[in] | beta | REAL. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input. |
[in,out] | dC_array | Array of pointers, dimension (batchCount). REAL array of DIMENSION ( lddc, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, each array C is overwritten by the m by n matrix ( alpha*op( A )*op( B ) + beta*C ). |
[in] | lddc | INTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ). |
[in] | Ai | INTEGER Row offset for all 'A' matrices. |
[in] | Aj | INTEGER Column offset for all 'A' matrices. |
[in] | Bi | INTEGER Row offset for all 'B' matrices. |
[in] | Bj | INTEGER Column offset for all 'B' matrices. |
[in] | Ci | INTEGER Row offset for all 'C' matrices. |
[in] | Cj | INTEGER Column offset for all 'C' matrices. |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
void magmablas_sgemm_vbatched | ( | magma_trans_t | transA, |
magma_trans_t | transB, | ||
magma_int_t * | m, | ||
magma_int_t * | n, | ||
magma_int_t * | k, | ||
float | alpha, | ||
float const *const * | dA_array, | ||
magma_int_t * | ldda, | ||
float const *const * | dB_array, | ||
magma_int_t * | lddb, | ||
float | beta, | ||
float ** | dC_array, | ||
magma_int_t * | lddc, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue ) |
SGEMM performs one of the matrix-matrix operations.
C = alpha*op( A )*op( B ) + beta*C,
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
[in] | transA | magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
|
[in] | transB | magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
|
[in] | m | Array of integers, dimension (batchCount + 1) On entry, each INTEGER M specifies the number of rows of the corresponding matrices op( A ) and C. M must be at least zero. The last element of the array is used internally by the routine. |
[in] | n | Array of integers, dimension (batchCount + 1). On entry, each INTEGER N specifies the number of columns of the corresponding matrix op( B ) and the number of columns of the corresponding matrix C. N must be at least zero. The last element of the array is used internally by the routine. |
[in] | k | Array of integers, dimension (batchCount + 1). On entry, each INTEGER K specifies the number of columns of the corresponding matrix op( A ) and the number of rows of the corresponding matrix op( B ). K must be at least zero. The last element of the array is used internally by the routine. |
[in] | alpha | REAL On entry, ALPHA specifies the scalar alpha. |
[in] | dA_array | Array of pointers, dimension (batchCount). Each is a REAL array A of DIMENSION ( LDDA, ka ), where ka is K when transA = MagmaNoTrans, and is M otherwise. Before entry with transA = MagmaNoTrans, the leading M by K part of the array A must contain the matrix A, otherwise the leading K by M part of the array A must contain the matrix A. |
[in] | ldda | Array of integers, dimension (batchCount + 1). On entry, each INTEGER LDDA specifies the first dimension of the corresponding matrix A as declared in the calling (sub) program. When transA = MagmaNoTrans then LDDA must be at least max( 1, M ), otherwise ldda must be at least max( 1, K ). The last element of the array is used internally by the routine. |
[in] | dB_array | Array of pointers, dimension (batchCount). Each is a REAL array B of DIMENSION ( LDDB, kb ), where kb is N when transB = MagmaNoTrans, and is K otherwise. Before entry with transB = MagmaNoTrans, the leading K by N part of the array B must contain the matrix B, otherwise the leading N by K part of the array B must contain the matrix B. |
[in] | lddb | Array of integers, dimension (batchCount + 1). On entry, each INTEGER LDDB specifies the first dimension of the corresponding matrix B as declared in the calling (sub) program. When transB = MagmaNoTrans then LDDB must be at least max( 1, K ), otherwise LDDB must be at least max( 1, N ). The last element of the array is used internally by the routine. |
[in] | beta | REAL. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input. |
[in,out] | dC_array | Array of pointers, dimension (batchCount). Each is a REAL array C of DIMENSION ( LDDC, N ). Before entry, the leading M by N part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the M by N matrix ( alpha*op( A )*op( B ) + beta*C ). |
[in] | lddc | Array of integers, dimension (batchCount + 1). On entry, each INTEGER LDDC specifies the first dimension of the corresponding matrix C as declared in the calling (sub) program. LDDC must be at least max( 1, M ). The last element of the array is used internally by the routine. |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
void magmablas_zgemm_batched | ( | magma_trans_t | transA, |
magma_trans_t | transB, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex const *const * | dA_array, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex const *const * | dB_array, | ||
magma_int_t | lddb, | ||
magmaDoubleComplex | beta, | ||
magmaDoubleComplex ** | dC_array, | ||
magma_int_t | lddc, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue ) |
ZGEMM performs one of the matrix-matrix operations.
C = alpha*op( A )*op( B ) + beta*C,
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
[in] | transA | magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
|
[in] | transB | magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
|
[in] | m | INTEGER. On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero. |
[in] | n | INTEGER. On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero. |
[in] | k | INTEGER. On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero. |
[in] | alpha | COMPLEX_16 On entry, ALPHA specifies the scalar alpha. |
[in] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of DIMENSION ( ldda, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A. |
[in] | ldda | INTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When transA = MagmaNoTrans then ldda must be at least max( 1, m ), otherwise ldda must be at least max( 1, k ). |
[in] | dB_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of DIMENSION ( lddb, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B. |
[in] | lddb | INTEGER. On entry, lddb specifies the first dimension of each array B as declared in the calling (sub) program. When transB = MagmaNoTrans then lddb must be at least max( 1, k ), otherwise lddb must be at least max( 1, n ). |
[in] | beta | COMPLEX_16. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input. |
[in,out] | dC_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array C of DIMENSION ( lddc, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n matrix ( alpha*op( A )*op( B ) + beta*C ). |
[in] | lddc | INTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ). |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
void magmablas_zgemm_batched_core | ( | magma_trans_t | transA, |
magma_trans_t | transB, | ||
magma_int_t | m, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex const *const * | dA_array, | ||
magma_int_t | Ai, | ||
magma_int_t | Aj, | ||
magma_int_t | ldda, | ||
magmaDoubleComplex const *const * | dB_array, | ||
magma_int_t | Bi, | ||
magma_int_t | Bj, | ||
magma_int_t | lddb, | ||
magmaDoubleComplex | beta, | ||
magmaDoubleComplex ** | dC_array, | ||
magma_int_t | Ci, | ||
magma_int_t | Cj, | ||
magma_int_t | lddc, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue ) |
ZGEMM performs one of the matrix-matrix operations.
C = alpha*op( A )*op( B ) + beta*C,
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
[in] | transA | magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
|
[in] | transB | magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
|
[in] | m | INTEGER. On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero. |
[in] | n | INTEGER. On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero. |
[in] | k | INTEGER. On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero. |
[in] | alpha | COMPLEX_16 On entry, ALPHA specifies the scalar alpha. |
[in] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of DIMENSION ( ldda, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A. |
[in] | ldda | INTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When transA = MagmaNoTrans then ldda must be at least max( 1, m ), otherwise ldda must be at least max( 1, k ). |
[in] | dB_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of DIMENSION ( lddb, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B. |
[in] | lddb | INTEGER. On entry, lddb specifies the first dimension of each array B as declared in the calling (sub) program. When transB = MagmaNoTrans then lddb must be at least max( 1, k ), otherwise lddb must be at least max( 1, n ). |
[in] | beta | COMPLEX_16. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input. |
[in,out] | dC_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array C of DIMENSION ( lddc, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n matrix ( alpha*op( A )*op( B ) + beta*C ). |
[in] | lddc | INTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ). |
[in] | Ai | INTEGER Row offset for all 'A' matrices. |
[in] | Aj | INTEGER Column offset for all 'A' matrices. |
[in] | Bi | INTEGER Row offset for all 'B' matrices. |
[in] | Bj | INTEGER Column offset for all 'B' matrices. |
[in] | Ci | INTEGER Row offset for all 'C' matrices. |
[in] | Cj | INTEGER Column offset for all 'C' matrices. |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
void magmablas_zgemm_vbatched | ( | magma_trans_t | transA, |
magma_trans_t | transB, | ||
magma_int_t * | m, | ||
magma_int_t * | n, | ||
magma_int_t * | k, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex const *const * | dA_array, | ||
magma_int_t * | ldda, | ||
magmaDoubleComplex const *const * | dB_array, | ||
magma_int_t * | lddb, | ||
magmaDoubleComplex | beta, | ||
magmaDoubleComplex ** | dC_array, | ||
magma_int_t * | lddc, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue ) |
ZGEMM performs one of the matrix-matrix operations.
C = alpha*op( A )*op( B ) + beta*C,
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
[in] | transA | magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
|
[in] | transB | magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
|
[in] | m | Array of integers, dimension (batchCount + 1) On entry, each INTEGER M specifies the number of rows of the corresponding matrices op( A ) and C. M must be at least zero. The last element of the array is used internally by the routine. |
[in] | n | Array of integers, dimension (batchCount + 1). On entry, each INTEGER N specifies the number of columns of the corresponding matrix op( B ) and the number of columns of the corresponding matrix C. N must be at least zero. The last element of the array is used internally by the routine. |
[in] | k | Array of integers, dimension (batchCount + 1). On entry, each INTEGER K specifies the number of columns of the corresponding matrix op( A ) and the number of rows of the corresponding matrix op( B ). K must be at least zero. The last element of the array is used internally by the routine. |
[in] | alpha | COMPLEX_16 On entry, ALPHA specifies the scalar alpha. |
[in] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of DIMENSION ( LDDA, ka ), where ka is K when transA = MagmaNoTrans, and is M otherwise. Before entry with transA = MagmaNoTrans, the leading M by K part of the array A must contain the matrix A, otherwise the leading K by M part of the array A must contain the matrix A. |
[in] | ldda | Array of integers, dimension (batchCount + 1). On entry, each INTEGER LDDA specifies the first dimension of the corresponding matrix A as declared in the calling (sub) program. When transA = MagmaNoTrans then LDDA must be at least max( 1, M ), otherwise ldda must be at least max( 1, K ). The last element of the array is used internally by the routine. |
[in] | dB_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of DIMENSION ( LDDB, kb ), where kb is N when transB = MagmaNoTrans, and is K otherwise. Before entry with transB = MagmaNoTrans, the leading K by N part of the array B must contain the matrix B, otherwise the leading N by K part of the array B must contain the matrix B. |
[in] | lddb | Array of integers, dimension (batchCount + 1). On entry, each INTEGER LDDB specifies the first dimension of the corresponding matrix B as declared in the calling (sub) program. When transB = MagmaNoTrans then LDDB must be at least max( 1, K ), otherwise LDDB must be at least max( 1, N ). The last element of the array is used internally by the routine. |
[in] | beta | COMPLEX_16. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input. |
[in,out] | dC_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array C of DIMENSION ( LDDC, N ). Before entry, the leading M by N part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the M by N matrix ( alpha*op( A )*op( B ) + beta*C ). |
[in] | lddc | Array of integers, dimension (batchCount + 1). On entry, each INTEGER LDDC specifies the first dimension of the corresponding matrix C as declared in the calling (sub) program. LDDC must be at least max( 1, M ). The last element of the array is used internally by the routine. |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |