![]() |
MAGMA 2.9.0
Matrix Algebra for GPU and Multicore Architectures
|
\(C = \alpha A A^T + \beta C\) where \(C\) is Hermitian More...
Functions | |
void | magma_cherk (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, float alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, float beta, magmaFloatComplex_ptr dC, magma_int_t lddc, magma_queue_t queue) |
Perform Hermitian rank-k update. | |
void | magma_zherk (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, double alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, double beta, magmaDoubleComplex_ptr dC, magma_int_t lddc, magma_queue_t queue) |
Perform Hermitian rank-k update. | |
void | magma_cherk_mgpu (magma_int_t ngpu, magma_uplo_t uplo, magma_trans_t trans, magma_int_t nb, magma_int_t n, magma_int_t k, float alpha, magmaFloatComplex_ptr dB[], magma_int_t lddb, magma_int_t b_offset, float beta, magmaFloatComplex_ptr dC[], magma_int_t lddc, magma_int_t c_offset, magma_int_t nqueue, magma_queue_t queues[][10]) |
This cherk_mgpu is internal routine used by cpotrf_mgpu_right. | |
void | magmablas_cherk_small_reduce (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, float alpha, magmaFloatComplex *dA, magma_int_t ldda, float beta, magmaFloatComplex *dC, magma_int_t lddc, magma_int_t nthread_blocks, magma_queue_t queue) |
CHERK performs one of the Hermitian rank k operations. | |
void | magma_dsyrk_mgpu (magma_int_t ngpu, magma_uplo_t uplo, magma_trans_t trans, magma_int_t nb, magma_int_t n, magma_int_t k, double alpha, magmaDouble_ptr dB[], magma_int_t lddb, magma_int_t b_offset, double beta, magmaDouble_ptr dC[], magma_int_t lddc, magma_int_t c_offset, magma_int_t nqueue, magma_queue_t queues[][10]) |
This dsyrk_mgpu is internal routine used by dpotrf_mgpu_right. | |
void | magmablas_dsyrk_small_reduce (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, double alpha, double *dA, magma_int_t ldda, double beta, double *dC, magma_int_t lddc, magma_int_t nthread_blocks, magma_queue_t queue) |
DSYRK performs one of the symmetric rank k operations. | |
void | magma_ssyrk_mgpu (magma_int_t ngpu, magma_uplo_t uplo, magma_trans_t trans, magma_int_t nb, magma_int_t n, magma_int_t k, float alpha, magmaFloat_ptr dB[], magma_int_t lddb, magma_int_t b_offset, float beta, magmaFloat_ptr dC[], magma_int_t lddc, magma_int_t c_offset, magma_int_t nqueue, magma_queue_t queues[][10]) |
This ssyrk_mgpu is internal routine used by spotrf_mgpu_right. | |
void | magmablas_ssyrk_small_reduce (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, float alpha, float *dA, magma_int_t ldda, float beta, float *dC, magma_int_t lddc, magma_int_t nthread_blocks, magma_queue_t queue) |
SSYRK performs one of the symmetric rank k operations. | |
void | magma_zherk_mgpu (magma_int_t ngpu, magma_uplo_t uplo, magma_trans_t trans, magma_int_t nb, magma_int_t n, magma_int_t k, double alpha, magmaDoubleComplex_ptr dB[], magma_int_t lddb, magma_int_t b_offset, double beta, magmaDoubleComplex_ptr dC[], magma_int_t lddc, magma_int_t c_offset, magma_int_t nqueue, magma_queue_t queues[][10]) |
This zherk_mgpu is internal routine used by zpotrf_mgpu_right. | |
void | magmablas_zherk_small_reduce (magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, double alpha, magmaDoubleComplex *dA, magma_int_t ldda, double beta, magmaDoubleComplex *dC, magma_int_t lddc, magma_int_t nthread_blocks, magma_queue_t queue) |
ZHERK performs one of the Hermitian rank k operations. | |
\(C = \alpha A A^T + \beta C\) where \(C\) is Hermitian
void magma_cherk | ( | magma_uplo_t | uplo, |
magma_trans_t | trans, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
float | alpha, | ||
magmaFloatComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
float | beta, | ||
magmaFloatComplex_ptr | dC, | ||
magma_int_t | lddc, | ||
magma_queue_t | queue ) |
Perform Hermitian rank-k update.
\( C = \alpha A A^H + \beta C \) (trans == MagmaNoTrans), or
\( C = \alpha A^H A + \beta C \) (trans == MagmaConjTrans),
where \( C \) is Hermitian.
[in] | uplo | Whether the upper or lower triangle of C is referenced. |
[in] | trans | Operation to perform on A. |
[in] | n | Number of rows and columns of C. n >= 0. |
[in] | k | Number of columns of A (for MagmaNoTrans) or rows of A (for MagmaConjTrans). k >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dA | COMPLEX array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix A of dimension (ldda,k), ldda >= max(1,n); otherwise, the k-by-n matrix A of dimension (ldda,n), ldda >= max(1,k). |
[in] | ldda | Leading dimension of dA. |
[in] | beta | Scalar \( \beta \) |
[in,out] | dC | COMPLEX array on GPU device. The n-by-n Hermitian matrix C of dimension (lddc,n), lddc >= max(1,n). |
[in] | lddc | Leading dimension of dC. |
[in] | queue | magma_queue_t Queue to execute in. |
void magma_zherk | ( | magma_uplo_t | uplo, |
magma_trans_t | trans, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
double | alpha, | ||
magmaDoubleComplex_const_ptr | dA, | ||
magma_int_t | ldda, | ||
double | beta, | ||
magmaDoubleComplex_ptr | dC, | ||
magma_int_t | lddc, | ||
magma_queue_t | queue ) |
Perform Hermitian rank-k update.
\( C = \alpha A A^H + \beta C \) (trans == MagmaNoTrans), or
\( C = \alpha A^H A + \beta C \) (trans == MagmaConjTrans),
where \( C \) is Hermitian.
[in] | uplo | Whether the upper or lower triangle of C is referenced. |
[in] | trans | Operation to perform on A. |
[in] | n | Number of rows and columns of C. n >= 0. |
[in] | k | Number of columns of A (for MagmaNoTrans) or rows of A (for MagmaConjTrans). k >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dA | COMPLEX_16 array on GPU device. If trans == MagmaNoTrans, the n-by-k matrix A of dimension (ldda,k), ldda >= max(1,n); otherwise, the k-by-n matrix A of dimension (ldda,n), ldda >= max(1,k). |
[in] | ldda | Leading dimension of dA. |
[in] | beta | Scalar \( \beta \) |
[in,out] | dC | COMPLEX_16 array on GPU device. The n-by-n Hermitian matrix C of dimension (lddc,n), lddc >= max(1,n). |
[in] | lddc | Leading dimension of dC. |
[in] | queue | magma_queue_t Queue to execute in. |
void magma_cherk_mgpu | ( | magma_int_t | ngpu, |
magma_uplo_t | uplo, | ||
magma_trans_t | trans, | ||
magma_int_t | nb, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
float | alpha, | ||
magmaFloatComplex_ptr | dB[], | ||
magma_int_t | lddb, | ||
magma_int_t | b_offset, | ||
float | beta, | ||
magmaFloatComplex_ptr | dC[], | ||
magma_int_t | lddc, | ||
magma_int_t | c_offset, | ||
magma_int_t | nqueue, | ||
magma_queue_t | queues[][10] ) |
This cherk_mgpu is internal routine used by cpotrf_mgpu_right.
It has specific assumption on the block diagonal.
void magmablas_cherk_small_reduce | ( | magma_uplo_t | uplo, |
magma_trans_t | trans, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
float | alpha, | ||
magmaFloatComplex * | dA, | ||
magma_int_t | ldda, | ||
float | beta, | ||
magmaFloatComplex * | dC, | ||
magma_int_t | lddc, | ||
magma_int_t | nthread_blocks, | ||
magma_queue_t | queue ) |
CHERK performs one of the Hermitian rank k operations.
C := alpha*A*A**H + beta*C,
or
C := alpha*A**H*A + beta*C,
where alpha and beta are real scalars, C is an n by n Hermitian matrix and A is an n by k matrix in the first case and a k by n matrix in the second case.
This is a special routine that supports n up to 32 only. It assumes that k is very large so that the computation of the small matrix C is distributed across many thread blocks. The number of thread blocks can be defined by the user through the interface. However, the kernel can work with a maximum of ceil(k / n) thread blocks. Extra thread blocks, if any, are ignored by the kernel. Reduction across thread blocks is performed using atomics.
[in] | uplo | magma_uplo_t. On entry, uplo specifies whether the upper or lower triangular part of the array C is to be referenced as follows: |
uplo = MagmaUpper Only the upper triangular part of C is to be referenced.
uplo = MagmaLower Only the lower triangular part of C is to be referenced.
[in] | trans | magma_trans_t. On entry, trans specifies the operation to be performed as follows: |
trans = MagmaNoTrans, C := alpha*A*A**H + beta*C.
trans = MagmaConjTrans, C := alpha*A**H*A + beta*C.
[in] | n | INTEGER. On entry, specifies the order of the matrix C. N must be at least zero, and at most 32. |
[in] | k | INTEGER. On entry with trans = MagmaNoTrans, k specifies the number of columns of the matrix A, and on entry with trans = MagmaConjTrans, k specifies the number of rows of the matrix A. K must be at least zero. |
[in] | alpha | REAL On entry, ALPHA specifies the scalar alpha. |
[in] | dA | A COMPLEX array DIMENSION ( ldda, ka ), where ka is k when trans = MagmaNoTrans, and is n otherwise. Before entry with trans = MagmaNoTrans, the leading n by k part of the array A must contain the matrix A, otherwise the leading k by n part of the array A must contain the matrix A. |
[in] | ldda | INTEGER. On entry, ldda specifies the first dimension of A as declared in the calling (sub) program. When trans = MagmaNoTrans then ldda must be at least max( 1, n ), otherwise ldda must be at least max( 1, k ). |
[in] | beta | REAL. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input. |
[in,out] | dC | A COMPLEX array of DIMENSION ( lddc, n ). Before entry with uplo = MagmaUpper, the leading n by n upper triangular part of the array C must contain the upper triangular part of the Hermitian matrix and the strictly lower triangular part of C is not referenced. On exit, the upper triangular part of the array C is overwritten by the upper triangular part of the updated matrix. Before entry with uplo = MagmaLower, the leading n by n lower triangular part of the array C must contain the lower triangular part of the Hermitian matrix and the strictly upper triangular part of C is not referenced. On exit, the lower triangular part of the array C is overwritten by the lower triangular part of the updated matrix. Note that the imaginary parts of the diagonal elements need not be set, they are assumed to be zero, and on exit they are set to zero. |
[in] | lddc | INTEGER. On entry, lddc specifies the first dimension of C as declared in the calling (sub) program. lddc must be at least max( 1, n ). |
[in] | nthread_blocks | INTEGER The number of thread blocks used to update C. |
[in] | queue | magma_queue_t Queue to execute in. |
void magma_dsyrk_mgpu | ( | magma_int_t | ngpu, |
magma_uplo_t | uplo, | ||
magma_trans_t | trans, | ||
magma_int_t | nb, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
double | alpha, | ||
magmaDouble_ptr | dB[], | ||
magma_int_t | lddb, | ||
magma_int_t | b_offset, | ||
double | beta, | ||
magmaDouble_ptr | dC[], | ||
magma_int_t | lddc, | ||
magma_int_t | c_offset, | ||
magma_int_t | nqueue, | ||
magma_queue_t | queues[][10] ) |
This dsyrk_mgpu is internal routine used by dpotrf_mgpu_right.
It has specific assumption on the block diagonal.
void magmablas_dsyrk_small_reduce | ( | magma_uplo_t | uplo, |
magma_trans_t | trans, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
double | alpha, | ||
double * | dA, | ||
magma_int_t | ldda, | ||
double | beta, | ||
double * | dC, | ||
magma_int_t | lddc, | ||
magma_int_t | nthread_blocks, | ||
magma_queue_t | queue ) |
DSYRK performs one of the symmetric rank k operations.
C := alpha*A*A**H + beta*C,
or
C := alpha*A**H*A + beta*C,
where alpha and beta are real scalars, C is an n by n symmetric matrix and A is an n by k matrix in the first case and a k by n matrix in the second case.
This is a special routine that supports n up to 32 only. It assumes that k is very large so that the computation of the small matrix C is distributed across many thread blocks. The number of thread blocks can be defined by the user through the interface. However, the kernel can work with a maximum of ceil(k / n) thread blocks. Extra thread blocks, if any, are ignored by the kernel. Reduction across thread blocks is performed using atomics.
[in] | uplo | magma_uplo_t. On entry, uplo specifies whether the upper or lower triangular part of the array C is to be referenced as follows: |
uplo = MagmaUpper Only the upper triangular part of C is to be referenced.
uplo = MagmaLower Only the lower triangular part of C is to be referenced.
[in] | trans | magma_trans_t. On entry, trans specifies the operation to be performed as follows: |
trans = MagmaNoTrans, C := alpha*A*A**H + beta*C.
trans = MagmaConjTrans, C := alpha*A**H*A + beta*C.
[in] | n | INTEGER. On entry, specifies the order of the matrix C. N must be at least zero, and at most 32. |
[in] | k | INTEGER. On entry with trans = MagmaNoTrans, k specifies the number of columns of the matrix A, and on entry with trans = MagmaConjTrans, k specifies the number of rows of the matrix A. K must be at least zero. |
[in] | alpha | DOUBLE PRECISION On entry, ALPHA specifies the scalar alpha. |
[in] | dA | A DOUBLE PRECISION array DIMENSION ( ldda, ka ), where ka is k when trans = MagmaNoTrans, and is n otherwise. Before entry with trans = MagmaNoTrans, the leading n by k part of the array A must contain the matrix A, otherwise the leading k by n part of the array A must contain the matrix A. |
[in] | ldda | INTEGER. On entry, ldda specifies the first dimension of A as declared in the calling (sub) program. When trans = MagmaNoTrans then ldda must be at least max( 1, n ), otherwise ldda must be at least max( 1, k ). |
[in] | beta | DOUBLE PRECISION. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input. |
[in,out] | dC | A DOUBLE PRECISION array of DIMENSION ( lddc, n ). Before entry with uplo = MagmaUpper, the leading n by n upper triangular part of the array C must contain the upper triangular part of the symmetric matrix and the strictly lower triangular part of C is not referenced. On exit, the upper triangular part of the array C is overwritten by the upper triangular part of the updated matrix. Before entry with uplo = MagmaLower, the leading n by n lower triangular part of the array C must contain the lower triangular part of the symmetric matrix and the strictly upper triangular part of C is not referenced. On exit, the lower triangular part of the array C is overwritten by the lower triangular part of the updated matrix. Note that the imaginary parts of the diagonal elements need not be set, they are assumed to be zero, and on exit they are set to zero. |
[in] | lddc | INTEGER. On entry, lddc specifies the first dimension of C as declared in the calling (sub) program. lddc must be at least max( 1, n ). |
[in] | nthread_blocks | INTEGER The number of thread blocks used to update C. |
[in] | queue | magma_queue_t Queue to execute in. |
void magma_ssyrk_mgpu | ( | magma_int_t | ngpu, |
magma_uplo_t | uplo, | ||
magma_trans_t | trans, | ||
magma_int_t | nb, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
float | alpha, | ||
magmaFloat_ptr | dB[], | ||
magma_int_t | lddb, | ||
magma_int_t | b_offset, | ||
float | beta, | ||
magmaFloat_ptr | dC[], | ||
magma_int_t | lddc, | ||
magma_int_t | c_offset, | ||
magma_int_t | nqueue, | ||
magma_queue_t | queues[][10] ) |
This ssyrk_mgpu is internal routine used by spotrf_mgpu_right.
It has specific assumption on the block diagonal.
void magmablas_ssyrk_small_reduce | ( | magma_uplo_t | uplo, |
magma_trans_t | trans, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
float | alpha, | ||
float * | dA, | ||
magma_int_t | ldda, | ||
float | beta, | ||
float * | dC, | ||
magma_int_t | lddc, | ||
magma_int_t | nthread_blocks, | ||
magma_queue_t | queue ) |
SSYRK performs one of the symmetric rank k operations.
C := alpha*A*A**H + beta*C,
or
C := alpha*A**H*A + beta*C,
where alpha and beta are real scalars, C is an n by n symmetric matrix and A is an n by k matrix in the first case and a k by n matrix in the second case.
This is a special routine that supports n up to 32 only. It assumes that k is very large so that the computation of the small matrix C is distributed across many thread blocks. The number of thread blocks can be defined by the user through the interface. However, the kernel can work with a maximum of ceil(k / n) thread blocks. Extra thread blocks, if any, are ignored by the kernel. Reduction across thread blocks is performed using atomics.
[in] | uplo | magma_uplo_t. On entry, uplo specifies whether the upper or lower triangular part of the array C is to be referenced as follows: |
uplo = MagmaUpper Only the upper triangular part of C is to be referenced.
uplo = MagmaLower Only the lower triangular part of C is to be referenced.
[in] | trans | magma_trans_t. On entry, trans specifies the operation to be performed as follows: |
trans = MagmaNoTrans, C := alpha*A*A**H + beta*C.
trans = MagmaConjTrans, C := alpha*A**H*A + beta*C.
[in] | n | INTEGER. On entry, specifies the order of the matrix C. N must be at least zero, and at most 32. |
[in] | k | INTEGER. On entry with trans = MagmaNoTrans, k specifies the number of columns of the matrix A, and on entry with trans = MagmaConjTrans, k specifies the number of rows of the matrix A. K must be at least zero. |
[in] | alpha | REAL On entry, ALPHA specifies the scalar alpha. |
[in] | dA | A REAL array DIMENSION ( ldda, ka ), where ka is k when trans = MagmaNoTrans, and is n otherwise. Before entry with trans = MagmaNoTrans, the leading n by k part of the array A must contain the matrix A, otherwise the leading k by n part of the array A must contain the matrix A. |
[in] | ldda | INTEGER. On entry, ldda specifies the first dimension of A as declared in the calling (sub) program. When trans = MagmaNoTrans then ldda must be at least max( 1, n ), otherwise ldda must be at least max( 1, k ). |
[in] | beta | REAL. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input. |
[in,out] | dC | A REAL array of DIMENSION ( lddc, n ). Before entry with uplo = MagmaUpper, the leading n by n upper triangular part of the array C must contain the upper triangular part of the symmetric matrix and the strictly lower triangular part of C is not referenced. On exit, the upper triangular part of the array C is overwritten by the upper triangular part of the updated matrix. Before entry with uplo = MagmaLower, the leading n by n lower triangular part of the array C must contain the lower triangular part of the symmetric matrix and the strictly upper triangular part of C is not referenced. On exit, the lower triangular part of the array C is overwritten by the lower triangular part of the updated matrix. Note that the imaginary parts of the diagonal elements need not be set, they are assumed to be zero, and on exit they are set to zero. |
[in] | lddc | INTEGER. On entry, lddc specifies the first dimension of C as declared in the calling (sub) program. lddc must be at least max( 1, n ). |
[in] | nthread_blocks | INTEGER The number of thread blocks used to update C. |
[in] | queue | magma_queue_t Queue to execute in. |
void magma_zherk_mgpu | ( | magma_int_t | ngpu, |
magma_uplo_t | uplo, | ||
magma_trans_t | trans, | ||
magma_int_t | nb, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
double | alpha, | ||
magmaDoubleComplex_ptr | dB[], | ||
magma_int_t | lddb, | ||
magma_int_t | b_offset, | ||
double | beta, | ||
magmaDoubleComplex_ptr | dC[], | ||
magma_int_t | lddc, | ||
magma_int_t | c_offset, | ||
magma_int_t | nqueue, | ||
magma_queue_t | queues[][10] ) |
This zherk_mgpu is internal routine used by zpotrf_mgpu_right.
It has specific assumption on the block diagonal.
void magmablas_zherk_small_reduce | ( | magma_uplo_t | uplo, |
magma_trans_t | trans, | ||
magma_int_t | n, | ||
magma_int_t | k, | ||
double | alpha, | ||
magmaDoubleComplex * | dA, | ||
magma_int_t | ldda, | ||
double | beta, | ||
magmaDoubleComplex * | dC, | ||
magma_int_t | lddc, | ||
magma_int_t | nthread_blocks, | ||
magma_queue_t | queue ) |
ZHERK performs one of the Hermitian rank k operations.
C := alpha*A*A**H + beta*C,
or
C := alpha*A**H*A + beta*C,
where alpha and beta are real scalars, C is an n by n Hermitian matrix and A is an n by k matrix in the first case and a k by n matrix in the second case.
This is a special routine that supports n up to 32 only. It assumes that k is very large so that the computation of the small matrix C is distributed across many thread blocks. The number of thread blocks can be defined by the user through the interface. However, the kernel can work with a maximum of ceil(k / n) thread blocks. Extra thread blocks, if any, are ignored by the kernel. Reduction across thread blocks is performed using atomics.
[in] | uplo | magma_uplo_t. On entry, uplo specifies whether the upper or lower triangular part of the array C is to be referenced as follows: |
uplo = MagmaUpper Only the upper triangular part of C is to be referenced.
uplo = MagmaLower Only the lower triangular part of C is to be referenced.
[in] | trans | magma_trans_t. On entry, trans specifies the operation to be performed as follows: |
trans = MagmaNoTrans, C := alpha*A*A**H + beta*C.
trans = MagmaConjTrans, C := alpha*A**H*A + beta*C.
[in] | n | INTEGER. On entry, specifies the order of the matrix C. N must be at least zero, and at most 32. |
[in] | k | INTEGER. On entry with trans = MagmaNoTrans, k specifies the number of columns of the matrix A, and on entry with trans = MagmaConjTrans, k specifies the number of rows of the matrix A. K must be at least zero. |
[in] | alpha | DOUBLE PRECISION On entry, ALPHA specifies the scalar alpha. |
[in] | dA | A COMPLEX_16 array DIMENSION ( ldda, ka ), where ka is k when trans = MagmaNoTrans, and is n otherwise. Before entry with trans = MagmaNoTrans, the leading n by k part of the array A must contain the matrix A, otherwise the leading k by n part of the array A must contain the matrix A. |
[in] | ldda | INTEGER. On entry, ldda specifies the first dimension of A as declared in the calling (sub) program. When trans = MagmaNoTrans then ldda must be at least max( 1, n ), otherwise ldda must be at least max( 1, k ). |
[in] | beta | DOUBLE PRECISION. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input. |
[in,out] | dC | A COMPLEX_16 array of DIMENSION ( lddc, n ). Before entry with uplo = MagmaUpper, the leading n by n upper triangular part of the array C must contain the upper triangular part of the Hermitian matrix and the strictly lower triangular part of C is not referenced. On exit, the upper triangular part of the array C is overwritten by the upper triangular part of the updated matrix. Before entry with uplo = MagmaLower, the leading n by n lower triangular part of the array C must contain the lower triangular part of the Hermitian matrix and the strictly upper triangular part of C is not referenced. On exit, the lower triangular part of the array C is overwritten by the lower triangular part of the updated matrix. Note that the imaginary parts of the diagonal elements need not be set, they are assumed to be zero, and on exit they are set to zero. |
[in] | lddc | INTEGER. On entry, lddc specifies the first dimension of C as declared in the calling (sub) program. lddc must be at least max( 1, n ). |
[in] | nthread_blocks | INTEGER The number of thread blocks used to update C. |
[in] | queue | magma_queue_t Queue to execute in. |