![]() |
MAGMA
1.7.0
Matrix Algebra for GPU and Multicore Architectures
|
Functions | |
void | magma_dgemv (magma_trans_t transA, magma_int_t m, magma_int_t n, double alpha, magmaDouble_const_ptr dA, magma_int_t ldda, magmaDouble_const_ptr dx, magma_int_t incx, double beta, magmaDouble_ptr dy, magma_int_t incy) |
Perform matrix-vector product. More... | |
void | magma_dger (magma_int_t m, magma_int_t n, double alpha, magmaDouble_const_ptr dx, magma_int_t incx, magmaDouble_const_ptr dy, magma_int_t incy, magmaDouble_ptr dA, magma_int_t ldda) |
Perform rank-1 update, \( A = \alpha x y^H + A \). More... | |
void | magma_dsymv (magma_uplo_t uplo, magma_int_t n, double alpha, magmaDouble_const_ptr dA, magma_int_t ldda, magmaDouble_const_ptr dx, magma_int_t incx, double beta, magmaDouble_ptr dy, magma_int_t incy) |
Perform symmetric matrix-vector product, \( y = \alpha A x + \beta y \). More... | |
void | magma_dsyr (magma_uplo_t uplo, magma_int_t n, double alpha, magmaDouble_const_ptr dx, magma_int_t incx, magmaDouble_ptr dA, magma_int_t ldda) |
Perform symmetric rank-1 update, \( A = \alpha x x^H + A \). More... | |
void | magma_dsyr2 (magma_uplo_t uplo, magma_int_t n, double alpha, magmaDouble_const_ptr dx, magma_int_t incx, magmaDouble_const_ptr dy, magma_int_t incy, magmaDouble_ptr dA, magma_int_t ldda) |
Perform symmetric rank-2 update, \( A = \alpha x y^H + conj(\alpha) y x^H + A \). More... | |
void | magma_dtrmv (magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t n, magmaDouble_const_ptr dA, magma_int_t ldda, magmaDouble_ptr dx, magma_int_t incx) |
Perform triangular matrix-vector product. More... | |
void | magma_dtrsv (magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t n, magmaDouble_const_ptr dA, magma_int_t ldda, magmaDouble_ptr dx, magma_int_t incx) |
Solve triangular matrix-vector system (one right-hand side). More... | |
void | magmablas_dgemv_batched (magma_trans_t trans, magma_int_t m, magma_int_t n, double alpha, magmaDouble_ptr dA_array[], magma_int_t ldda, magmaDouble_ptr dx_array[], magma_int_t incx, double beta, magmaDouble_ptr dy_array[], magma_int_t incy, magma_int_t batchCount, magma_queue_t queue) |
DGEMV performs one of the matrix-vector operations. More... | |
void | magmablas_dgemv_conjv (magma_int_t m, magma_int_t n, double alpha, magmaDouble_const_ptr dA, magma_int_t ldda, magmaDouble_const_ptr dx, magma_int_t incx, double beta, magmaDouble_ptr dy, magma_int_t incy) |
DGEMV_CONJV performs the matrix-vector operation. More... | |
void | magmablas_dgemv_q (magma_trans_t trans, magma_int_t m, magma_int_t n, double alpha, magmaDouble_const_ptr dA, magma_int_t ldda, magmaDouble_const_ptr dx, magma_int_t incx, double beta, magmaDouble_ptr dy, magma_int_t incy, magma_queue_t queue) |
DGEMV performs one of the matrix-vector operations. More... | |
void | magmablas_dgemv_tesla (magma_trans_t trans, magma_int_t m, magma_int_t n, double alpha, const double *A, magma_int_t lda, const double *x, magma_int_t incx, double beta, double *y, magma_int_t incy) |
This routine computes: 1) y = A x if trans == MagmaNoTrans, alpha == 1, beta == 0, and incx == incy == 1 (using magmablas code) 2) y = alpha A^T x if trans == MagmaTrans, beta == 0, and incx == incy == 1 (using magmablas code) 3) y = alpha A^TRANS x + beta y otherwise, using CUBLAS. More... | |
void | magmablas_dswapblk_q (magma_order_t order, magma_int_t n, magmaDouble_ptr dA, magma_int_t ldda, magmaDouble_ptr dB, magma_int_t lddb, magma_int_t i1, magma_int_t i2, const magma_int_t *ipiv, magma_int_t inci, magma_int_t offset, magma_queue_t queue) |
| |
void | magmablas_dswapblk (magma_order_t order, magma_int_t n, magmaDouble_ptr dA, magma_int_t ldda, magmaDouble_ptr dB, magma_int_t lddb, magma_int_t i1, magma_int_t i2, const magma_int_t *ipiv, magma_int_t inci, magma_int_t offset) |
magma_int_t | magmablas_dsymv_work (magma_uplo_t uplo, magma_int_t n, double alpha, magmaDouble_const_ptr dA, magma_int_t ldda, magmaDouble_const_ptr dx, magma_int_t incx, double beta, magmaDouble_ptr dy, magma_int_t incy, magmaDouble_ptr dwork, magma_int_t lwork, magma_queue_t queue) |
magmablas_dsymv_work performs the matrix-vector operation: More... | |
magma_int_t | magmablas_dsymv (magma_uplo_t uplo, magma_int_t n, double alpha, magmaDouble_const_ptr dA, magma_int_t ldda, magmaDouble_const_ptr dx, magma_int_t incx, double beta, magmaDouble_ptr dy, magma_int_t incy) |
magmablas_dsymv performs the matrix-vector operation: More... | |
magma_int_t | magmablas_dsymv_mgpu (magma_uplo_t uplo, magma_int_t n, double alpha, magmaDouble_const_ptr const d_lA[], magma_int_t ldda, magma_int_t offset, double const *x, magma_int_t incx, double beta, double *y, magma_int_t incy, double *hwork, magma_int_t lhwork, magmaDouble_ptr dwork[], magma_int_t ldwork, magma_int_t ngpu, magma_int_t nb, magma_queue_t queues[]) |
magmablas_dsymv_mgpu performs the matrix-vector operation: More... | |
magma_int_t | magmablas_dsymv_mgpu_sync (magma_uplo_t uplo, magma_int_t n, double alpha, magmaDouble_const_ptr const d_lA[], magma_int_t ldda, magma_int_t offset, double const *x, magma_int_t incx, double beta, double *y, magma_int_t incy, double *hwork, magma_int_t lhwork, magmaDouble_ptr dwork[], magma_int_t ldwork, magma_int_t ngpu, magma_int_t nb, magma_queue_t queues[]) |
Synchronizes and acculumates final dsymv result. More... | |
void | magmablas_dtrsv (magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t n, magmaDouble_const_ptr dA, magma_int_t ldda, magmaDouble_ptr db, magma_int_t incb, magma_queue_t queue) |
dtrsv solves one of the matrix equations on gpu More... | |
void | magmablas_dtrsv_batched (magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t n, double **A_array, magma_int_t lda, double **b_array, magma_int_t incb, magma_int_t batchCount, magma_queue_t queue) |
dtrsv solves one of the matrix equations on gpu More... | |
void | magmablas_zgemv_batched (magma_trans_t trans, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_ptr dA_array[], magma_int_t ldda, magmaDoubleComplex_ptr dx_array[], magma_int_t incx, magmaDoubleComplex beta, magmaDoubleComplex_ptr dy_array[], magma_int_t incy, magma_int_t batchCount, magma_queue_t queue) |
ZGEMV performs one of the matrix-vector operations. More... | |
void magma_dgemv | ( | magma_trans_t | transA, |
magma_int_t | m, | ||
magma_int_t | n, | ||
double | alpha, | ||
magmaDouble_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDouble_const_ptr | dx, | ||
magma_int_t | incx, | ||
double | beta, | ||
magmaDouble_ptr | dy, | ||
magma_int_t | incy | ||
) |
Perform matrix-vector product.
\( y = \alpha A x + \beta y \) (transA == MagmaNoTrans), or
\( y = \alpha A^T x + \beta y \) (transA == MagmaTrans), or
\( y = \alpha A^H x + \beta y \) (transA == MagmaConjTrans).
[in] | transA | Operation to perform on A. |
[in] | m | Number of rows of A. m >= 0. |
[in] | n | Number of columns of A. n >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dA | DOUBLE_PRECISION array of dimension (ldda,n), ldda >= max(1,m). The m-by-n matrix A, on GPU device. |
[in] | ldda | Leading dimension of dA. |
[in] | dx | DOUBLE_PRECISION array on GPU device. If transA == MagmaNoTrans, the n element vector x of dimension (1 + (n-1)*incx); otherwise, the m element vector x of dimension (1 + (m-1)*incx). |
[in] | incx | Stride between consecutive elements of dx. incx != 0. |
[in] | beta | Scalar \( \beta \) |
[in,out] | dy | DOUBLE_PRECISION array on GPU device. If transA == MagmaNoTrans, the m element vector y of dimension (1 + (m-1)*incy); otherwise, the n element vector y of dimension (1 + (n-1)*incy). |
[in] | incy | Stride between consecutive elements of dy. incy != 0. |
void magma_dger | ( | magma_int_t | m, |
magma_int_t | n, | ||
double | alpha, | ||
magmaDouble_const_ptr | dx, | ||
magma_int_t | incx, | ||
magmaDouble_const_ptr | dy, | ||
magma_int_t | incy, | ||
magmaDouble_ptr | dA, | ||
magma_int_t | ldda | ||
) |
Perform rank-1 update, \( A = \alpha x y^H + A \).
[in] | m | Number of rows of A. m >= 0. |
[in] | n | Number of columns of A. n >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dx | DOUBLE_PRECISION array on GPU device. The m element vector x of dimension (1 + (m-1)*incx). |
[in] | incx | Stride between consecutive elements of dx. incx != 0. |
[in] | dy | DOUBLE_PRECISION array on GPU device. The n element vector y of dimension (1 + (n-1)*incy). |
[in] | incy | Stride between consecutive elements of dy. incy != 0. |
[in,out] | dA | DOUBLE_PRECISION array on GPU device. The m-by-n matrix A of dimension (ldda,n), ldda >= max(1,m). |
[in] | ldda | Leading dimension of dA. |
void magma_dsymv | ( | magma_uplo_t | uplo, |
magma_int_t | n, | ||
double | alpha, | ||
magmaDouble_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDouble_const_ptr | dx, | ||
magma_int_t | incx, | ||
double | beta, | ||
magmaDouble_ptr | dy, | ||
magma_int_t | incy | ||
) |
Perform symmetric matrix-vector product, \( y = \alpha A x + \beta y \).
[in] | uplo | Whether the upper or lower triangle of A is referenced. |
[in] | n | Number of rows and columns of A. n >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dA | DOUBLE_PRECISION array of dimension (ldda,n), ldda >= max(1,n). The n-by-n matrix A, on GPU device. |
[in] | ldda | Leading dimension of dA. |
[in] | dx | DOUBLE_PRECISION array on GPU device. The m element vector x of dimension (1 + (m-1)*incx). |
[in] | incx | Stride between consecutive elements of dx. incx != 0. |
[in] | beta | Scalar \( \beta \) |
[in,out] | dy | DOUBLE_PRECISION array on GPU device. The n element vector y of dimension (1 + (n-1)*incy). |
[in] | incy | Stride between consecutive elements of dy. incy != 0. |
void magma_dsyr | ( | magma_uplo_t | uplo, |
magma_int_t | n, | ||
double | alpha, | ||
magmaDouble_const_ptr | dx, | ||
magma_int_t | incx, | ||
magmaDouble_ptr | dA, | ||
magma_int_t | ldda | ||
) |
Perform symmetric rank-1 update, \( A = \alpha x x^H + A \).
[in] | uplo | Whether the upper or lower triangle of A is referenced. |
[in] | n | Number of rows and columns of A. n >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dx | DOUBLE_PRECISION array on GPU device. The n element vector x of dimension (1 + (n-1)*incx). |
[in] | incx | Stride between consecutive elements of dx. incx != 0. |
[in,out] | dA | DOUBLE_PRECISION array of dimension (ldda,n), ldda >= max(1,n). The n-by-n matrix A, on GPU device. |
[in] | ldda | Leading dimension of dA. |
void magma_dsyr2 | ( | magma_uplo_t | uplo, |
magma_int_t | n, | ||
double | alpha, | ||
magmaDouble_const_ptr | dx, | ||
magma_int_t | incx, | ||
magmaDouble_const_ptr | dy, | ||
magma_int_t | incy, | ||
magmaDouble_ptr | dA, | ||
magma_int_t | ldda | ||
) |
Perform symmetric rank-2 update, \( A = \alpha x y^H + conj(\alpha) y x^H + A \).
[in] | uplo | Whether the upper or lower triangle of A is referenced. |
[in] | n | Number of rows and columns of A. n >= 0. |
[in] | alpha | Scalar \( \alpha \) |
[in] | dx | DOUBLE_PRECISION array on GPU device. The n element vector x of dimension (1 + (n-1)*incx). |
[in] | incx | Stride between consecutive elements of dx. incx != 0. |
[in] | dy | DOUBLE_PRECISION array on GPU device. The n element vector y of dimension (1 + (n-1)*incy). |
[in] | incy | Stride between consecutive elements of dy. incy != 0. |
[in,out] | dA | DOUBLE_PRECISION array of dimension (ldda,n), ldda >= max(1,n). The n-by-n matrix A, on GPU device. |
[in] | ldda | Leading dimension of dA. |
void magma_dtrmv | ( | magma_uplo_t | uplo, |
magma_trans_t | trans, | ||
magma_diag_t | diag, | ||
magma_int_t | n, | ||
magmaDouble_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDouble_ptr | dx, | ||
magma_int_t | incx | ||
) |
Perform triangular matrix-vector product.
\( x = A x \) (trans == MagmaNoTrans), or
\( x = A^T x \) (trans == MagmaTrans), or
\( x = A^H x \) (trans == MagmaConjTrans).
[in] | uplo | Whether the upper or lower triangle of A is referenced. |
[in] | trans | Operation to perform on A. |
[in] | diag | Whether the diagonal of A is assumed to be unit or non-unit. |
[in] | n | Number of rows and columns of A. n >= 0. |
[in] | dA | DOUBLE_PRECISION array of dimension (ldda,n), ldda >= max(1,n). The n-by-n matrix A, on GPU device. |
[in] | ldda | Leading dimension of dA. |
[in] | dx | DOUBLE_PRECISION array on GPU device. The n element vector x of dimension (1 + (n-1)*incx). |
[in] | incx | Stride between consecutive elements of dx. incx != 0. |
void magma_dtrsv | ( | magma_uplo_t | uplo, |
magma_trans_t | trans, | ||
magma_diag_t | diag, | ||
magma_int_t | n, | ||
magmaDouble_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDouble_ptr | dx, | ||
magma_int_t | incx | ||
) |
Solve triangular matrix-vector system (one right-hand side).
\( A x = b \) (trans == MagmaNoTrans), or
\( A^T x = b \) (trans == MagmaTrans), or
\( A^H x = b \) (trans == MagmaConjTrans).
[in] | uplo | Whether the upper or lower triangle of A is referenced. |
[in] | trans | Operation to perform on A. |
[in] | diag | Whether the diagonal of A is assumed to be unit or non-unit. |
[in] | n | Number of rows and columns of A. n >= 0. |
[in] | dA | DOUBLE_PRECISION array of dimension (ldda,n), ldda >= max(1,n). The n-by-n matrix A, on GPU device. |
[in] | ldda | Leading dimension of dA. |
[in,out] | dx | DOUBLE_PRECISION array on GPU device. On entry, the n element RHS vector b of dimension (1 + (n-1)*incx). On exit, overwritten with the solution vector x. |
[in] | incx | Stride between consecutive elements of dx. incx != 0. |
void magmablas_dgemv_batched | ( | magma_trans_t | trans, |
magma_int_t | m, | ||
magma_int_t | n, | ||
double | alpha, | ||
magmaDouble_ptr | dA_array[], | ||
magma_int_t | ldda, | ||
magmaDouble_ptr | dx_array[], | ||
magma_int_t | incx, | ||
double | beta, | ||
magmaDouble_ptr | dy_array[], | ||
magma_int_t | incy, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
DGEMV performs one of the matrix-vector operations.
y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, or y := alpha*A**H*x + beta*y,
where alpha and beta are scalars, x and y are vectors and A is an m by n matrix.
[in] | trans | magma_trans_t On entry, TRANS specifies the operation to be performed as follows:
|
[in] | m | INTEGER On entry, m specifies the number of rows of the matrix A. |
[in] | n | INTEGER On entry, n specifies the number of columns of the matrix A |
[in] | alpha | DOUBLE_PRECISION On entry, ALPHA specifies the scalar alpha. |
[in] | dA_array | Array of pointers, dimension (batchCount). Each is a DOUBLE_PRECISION array A of DIMENSION ( ldda, n ) on the GPU |
[in] | ldda | INTEGER LDDA specifies the leading dimension of A. |
[in] | dx_array | Array of pointers, dimension (batchCount). Each is a DOUBLE_PRECISION array of dimension n if trans == MagmaNoTrans m if trans == MagmaTrans or MagmaConjTrans |
[in] | incx | Specifies the increment for the elements of X. INCX must not be zero. |
[in] | beta | DOUBLE_PRECISION On entry, ALPHA specifies the scalar beta. When BETA is supplied as zero then Y need not be set on input. |
[out] | dy_array | Array of pointers, dimension (batchCount). Each is a DOUBLE_PRECISION array of dimension m if trans == MagmaNoTrans n if trans == MagmaTrans or MagmaConjTrans |
[in] | incy | Specifies the increment for the elements of Y. INCY must not be zero. |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
void magmablas_dgemv_conjv | ( | magma_int_t | m, |
magma_int_t | n, | ||
double | alpha, | ||
magmaDouble_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDouble_const_ptr | dx, | ||
magma_int_t | incx, | ||
double | beta, | ||
magmaDouble_ptr | dy, | ||
magma_int_t | incy | ||
) |
DGEMV_CONJV performs the matrix-vector operation.
y := alpha*A*conj(x) + beta*y,
where alpha and beta are scalars, x and y are vectors and A is an m by n matrix.
[in] | m | INTEGER On entry, m specifies the number of rows of the matrix A. |
[in] | n | INTEGER On entry, n specifies the number of columns of the matrix A |
[in] | alpha | DOUBLE_PRECISION On entry, ALPHA specifies the scalar alpha. |
[in] | dA | DOUBLE_PRECISION array of dimension ( LDDA, n ) on the GPU. |
[in] | ldda | INTEGER LDDA specifies the leading dimension of A. |
[in] | dx | DOUBLE_PRECISION array of dimension n |
[in] | incx | Specifies the increment for the elements of X. INCX must not be zero. |
[in] | beta | DOUBLE REAL On entry, BETA specifies the scalar beta. When BETA is supplied as zero then Y need not be set on input. |
[out] | dy | DOUBLE PRECISION array of dimension m |
[in] | incy | Specifies the increment for the elements of Y. INCY must not be zero. |
void magmablas_dgemv_q | ( | magma_trans_t | trans, |
magma_int_t | m, | ||
magma_int_t | n, | ||
double | alpha, | ||
magmaDouble_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDouble_const_ptr | dx, | ||
magma_int_t | incx, | ||
double | beta, | ||
magmaDouble_ptr | dy, | ||
magma_int_t | incy, | ||
magma_queue_t | queue | ||
) |
DGEMV performs one of the matrix-vector operations.
y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, or y := alpha*A**H*x + beta*y,
where alpha and beta are scalars, x and y are vectors and A is an m by n matrix.
[in] | trans | magma_trans_t On entry, TRANS specifies the operation to be performed as follows:
|
[in] | m | INTEGER On entry, m specifies the number of rows of the matrix A. |
[in] | n | INTEGER On entry, n specifies the number of columns of the matrix A |
[in] | alpha | DOUBLE_PRECISION On entry, ALPHA specifies the scalar alpha. |
[in] | dA | DOUBLE_PRECISION array of dimension ( LDDA, n ) on the GPU. |
[in] | ldda | INTEGER LDDA specifies the leading dimension of A. |
[in] | dx | DOUBLE_PRECISION array of dimension n if trans == MagmaNoTrans m if trans == MagmaTrans or MagmaConjTrans |
[in] | incx | Specifies the increment for the elements of X. INCX must not be zero. |
[in] | beta | DOUBLE_PRECISION On entry, BETA specifies the scalar beta. When BETA is supplied as zero then Y need not be set on input. |
[out] | dy | DOUBLE_PRECISION array of dimension m if trans == MagmaNoTrans n if trans == MagmaTrans or MagmaConjTrans |
[in] | incy | Specifies the increment for the elements of Y. INCY must not be zero. |
[in] | queue | magma_queue_t Queue to execute in. |
void magmablas_dgemv_tesla | ( | magma_trans_t | trans, |
magma_int_t | m, | ||
magma_int_t | n, | ||
double | alpha, | ||
const double * | A, | ||
magma_int_t | lda, | ||
const double * | x, | ||
magma_int_t | incx, | ||
double | beta, | ||
double * | y, | ||
magma_int_t | incy | ||
) |
This routine computes: 1) y = A x if trans == MagmaNoTrans, alpha == 1, beta == 0, and incx == incy == 1 (using magmablas code) 2) y = alpha A^T x if trans == MagmaTrans, beta == 0, and incx == incy == 1 (using magmablas code) 3) y = alpha A^TRANS x + beta y otherwise, using CUBLAS.
[in] | trans | magma_trans_t On entry, TRANS specifies the operation to be performed as follows:
|
[in] | m | INTEGER On entry, M specifies the number of rows of the matrix A. |
[in] | n | INTEGER On entry, N specifies the number of columns of the matrix A |
[in] | alpha | DOUBLE PRECISION On entry, ALPHA specifies the scalar alpha. |
[in] | A | DOUBLE PRECISION array of dimension (LDA, N) on the GPU. |
[in] | lda | INTEGER LDA specifies the leading dimension of A. |
[in] | x | DOUBLE PRECISION array of dimension n if trans == 'n' m if trans == 't' |
[in] | incx | Specifies the increment for the elements of X. INCX must not be zero. |
[in] | beta | DOUBLE PRECISION On entry, BETA specifies the scalar beta. When BETA is supplied as zero then Y need not be set on input. |
[out] | y | DOUBLE PRECISION array of dimension m if trans == 'n' n if trans == 't' |
[in] | incy | Specifies the increment for the elements of Y. INCY must not be zero. |
void magmablas_dswapblk | ( | magma_order_t | order, |
magma_int_t | n, | ||
magmaDouble_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDouble_ptr | dB, | ||
magma_int_t | lddb, | ||
magma_int_t | i1, | ||
magma_int_t | i2, | ||
const magma_int_t * | ipiv, | ||
magma_int_t | inci, | ||
magma_int_t | offset | ||
) |
magma_int_t magmablas_dsymv | ( | magma_uplo_t | uplo, |
magma_int_t | n, | ||
double | alpha, | ||
magmaDouble_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDouble_const_ptr | dx, | ||
magma_int_t | incx, | ||
double | beta, | ||
magmaDouble_ptr | dy, | ||
magma_int_t | incy | ||
) |
magmablas_dsymv performs the matrix-vector operation:
y := alpha*A*x + beta*y,
where alpha and beta are scalars, x and y are n element vectors and A is an n by n symmetric matrix.
[in] | uplo | magma_uplo_t. On entry, UPLO specifies whether the upper or lower triangular part of the array A is to be referenced as follows:
|
[in] | n | INTEGER. On entry, N specifies the order of the matrix A. N must be at least zero. |
[in] | alpha | DOUBLE_PRECISION. On entry, ALPHA specifies the scalar alpha. |
[in] | dA | DOUBLE_PRECISION array of DIMENSION ( LDDA, n ). Before entry with UPLO = MagmaUpper, the leading n by n upper triangular part of the array A must contain the upper triangular part of the symmetric matrix and the strictly lower triangular part of A is not referenced. Before entry with UPLO = MagmaLower, the leading n by n lower triangular part of the array A must contain the lower triangular part of the symmetric matrix and the strictly upper triangular part of A is not referenced. Note that the imaginary parts of the diagonal elements need not be set and are assumed to be zero. |
[in] | ldda | INTEGER. On entry, LDDA specifies the first dimension of A as declared in the calling (sub) program. LDDA must be at least max( 1, n ). It is recommended that ldda is multiple of 16. Otherwise performance would be deteriorated as the memory accesses would not be fully coalescent. |
[in] | dx | DOUBLE_PRECISION array of dimension at least ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented array X must contain the n element vector x. |
[in] | incx | INTEGER. On entry, INCX specifies the increment for the elements of X. INCX must not be zero. |
[in] | beta | DOUBLE_PRECISION. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then Y need not be set on input. |
[in,out] | dy | DOUBLE_PRECISION array of dimension at least ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremented array Y must contain the n element vector y. On exit, Y is overwritten by the updated vector y. |
[in] | incy | INTEGER. On entry, INCY specifies the increment for the elements of Y. INCY must not be zero. |
magma_int_t magmablas_dsymv_mgpu | ( | magma_uplo_t | uplo, |
magma_int_t | n, | ||
double | alpha, | ||
magmaDouble_const_ptr const | d_lA[], | ||
magma_int_t | ldda, | ||
magma_int_t | offset, | ||
double const * | x, | ||
magma_int_t | incx, | ||
double | beta, | ||
double * | y, | ||
magma_int_t | incy, | ||
double * | hwork, | ||
magma_int_t | lhwork, | ||
magmaDouble_ptr | dwork[], | ||
magma_int_t | ldwork, | ||
magma_int_t | ngpu, | ||
magma_int_t | nb, | ||
magma_queue_t | queues[] | ||
) |
magmablas_dsymv_mgpu performs the matrix-vector operation:
y := alpha*A*x + beta*y,
where alpha and beta are scalars, x and y are n element vectors and A is an n by n symmetric matrix.
[in] | uplo | magma_uplo_t. On entry, UPLO specifies whether the upper or lower triangular part of the array A is to be referenced as follows:
|
[in] | n | INTEGER. On entry, N specifies the order of the matrix A. N must be at least zero. |
[in] | alpha | DOUBLE_PRECISION. On entry, ALPHA specifies the scalar alpha. |
[in] | d_lA | Array of pointers, dimension (ngpu), to block-column distributed matrix A, with block size nb. d_lA[dev] is a DOUBLE_PRECISION array on GPU dev, of dimension (LDDA, nlocal), where { floor(n/nb/ngpu)*nb + nb if dev < floor(n/nb) % ngpu, nlocal = { floor(n/nb/ngpu)*nb + nnb if dev == floor(n/nb) % ngpu, { floor(n/nb/ngpu)*nb otherwise. Before entry with UPLO = MagmaUpper, the leading n by n upper triangular part of the array A must contain the upper triangular part of the symmetric matrix and the strictly lower triangular part of A is not referenced. Before entry with UPLO = MagmaLower, the leading n by n lower triangular part of the array A must contain the lower triangular part of the symmetric matrix and the strictly upper triangular part of A is not referenced. Note that the imaginary parts of the diagonal elements need not be set and are assumed to be zero. |
[in] | offset | INTEGER. Row & column offset to start of matrix A within the distributed d_lA structure. Note that N is the size of this multiply, excluding the offset, so the size of the original parent matrix is N+offset. Also, x and y do not have an offset. |
[in] | ldda | INTEGER. On entry, LDDA specifies the first dimension of A as declared in the calling (sub) program. LDDA must be at least max( 1, n + offset ). It is recommended that ldda is multiple of 16. Otherwise performance would be deteriorated as the memory accesses would not be fully coalescent. |
[in] | x | DOUBLE_PRECISION array on the CPU (not the GPU), of dimension at least ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented array X must contain the n element vector x. |
[in] | incx | INTEGER. On entry, INCX specifies the increment for the elements of X. INCX must not be zero. |
[in] | beta | DOUBLE_PRECISION. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then Y need not be set on input. |
[in,out] | y | DOUBLE_PRECISION array on the CPU (not the GPU), of dimension at least ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremented array Y must contain the n element vector y. On exit, Y is overwritten by the updated vector y. |
[in] | incy | INTEGER. On entry, INCY specifies the increment for the elements of Y. INCY must not be zero. |
hwork | (workspace) DOUBLE_PRECISION array on the CPU, of dimension (lhwork). | |
[in] | lhwork | INTEGER. The dimension of the array hwork. lhwork >= ngpu*nb. |
dwork | (workspaces) Array of pointers, dimension (ngpu), to workspace on each GPU. dwork[dev] is a DOUBLE_PRECISION array on GPU dev, of dimension (ldwork). | |
[in] | ldwork | INTEGER. The dimension of each array dwork[dev]. ldwork >= ldda*( ceil((n + offset % nb) / nb) + 1 ). |
[in] | ngpu | INTEGER. The number of GPUs to use. |
[in] | nb | INTEGER. The block size used for distributing d_lA. Must be 64. |
[in] | queues | magma_queue_t array of dimension (ngpu). queues[dev] is an execution queue on GPU dev. |
magma_int_t magmablas_dsymv_mgpu_sync | ( | magma_uplo_t | uplo, |
magma_int_t | n, | ||
double | alpha, | ||
magmaDouble_const_ptr const | d_lA[], | ||
magma_int_t | ldda, | ||
magma_int_t | offset, | ||
double const * | x, | ||
magma_int_t | incx, | ||
double | beta, | ||
double * | y, | ||
magma_int_t | incy, | ||
double * | hwork, | ||
magma_int_t | lhwork, | ||
magmaDouble_ptr | dwork[], | ||
magma_int_t | ldwork, | ||
magma_int_t | ngpu, | ||
magma_int_t | nb, | ||
magma_queue_t | queues[] | ||
) |
Synchronizes and acculumates final dsymv result.
For convenience, the parameters are identical to magmablas_dsymv_mgpu (though some are unused here).
magma_int_t magmablas_dsymv_work | ( | magma_uplo_t | uplo, |
magma_int_t | n, | ||
double | alpha, | ||
magmaDouble_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDouble_const_ptr | dx, | ||
magma_int_t | incx, | ||
double | beta, | ||
magmaDouble_ptr | dy, | ||
magma_int_t | incy, | ||
magmaDouble_ptr | dwork, | ||
magma_int_t | lwork, | ||
magma_queue_t | queue | ||
) |
magmablas_dsymv_work performs the matrix-vector operation:
y := alpha*A*x + beta*y,
where alpha and beta are scalars, x and y are n element vectors and A is an n by n symmetric matrix.
[in] | uplo | magma_uplo_t. On entry, UPLO specifies whether the upper or lower triangular part of the array A is to be referenced as follows:
|
[in] | n | INTEGER. On entry, N specifies the order of the matrix A. N must be at least zero. |
[in] | alpha | DOUBLE_PRECISION. On entry, ALPHA specifies the scalar alpha. |
[in] | dA | DOUBLE_PRECISION array of DIMENSION ( LDDA, n ). Before entry with UPLO = MagmaUpper, the leading n by n upper triangular part of the array A must contain the upper triangular part of the symmetric matrix and the strictly lower triangular part of A is not referenced. Before entry with UPLO = MagmaLower, the leading n by n lower triangular part of the array A must contain the lower triangular part of the symmetric matrix and the strictly upper triangular part of A is not referenced. Note that the imaginary parts of the diagonal elements need not be set and are assumed to be zero. |
[in] | ldda | INTEGER. On entry, LDDA specifies the first dimension of A as declared in the calling (sub) program. LDDA must be at least max( 1, n ). It is recommended that ldda is multiple of 16. Otherwise performance would be deteriorated as the memory accesses would not be fully coalescent. |
[in] | dx | DOUBLE_PRECISION array of dimension at least ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented array X must contain the n element vector x. |
[in] | incx | INTEGER. On entry, INCX specifies the increment for the elements of X. INCX must not be zero. |
[in] | beta | DOUBLE_PRECISION. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then Y need not be set on input. |
[in,out] | dy | DOUBLE_PRECISION array of dimension at least ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremented array Y must contain the n element vector y. On exit, Y is overwritten by the updated vector y. |
[in] | incy | INTEGER. On entry, INCY specifies the increment for the elements of Y. INCY must not be zero. |
[in] | dwork | (workspace) DOUBLE_PRECISION array on the GPU, dimension (MAX(1, LWORK)), |
[in] | lwork | INTEGER. The dimension of the array DWORK. LWORK >= LDDA * ceil( N / NB_X ), where NB_X = 64. |
[in] | queue | magma_queue_t. Queue to execute in. |
MAGMA implements dsymv through two steps: 1) perform the multiplication in each thread block and put the intermediate value in dwork. 2) sum the intermediate values and store the final result in y.
magamblas_dsymv_work requires users to provide a workspace, while magmablas_dsymv is a wrapper routine allocating the workspace inside the routine and provides the same interface as cublas.
If users need to call dsymv frequently, we suggest using magmablas_dsymv_work instead of magmablas_dsymv. As the overhead to allocate and free in device memory in magmablas_dsymv would hurt performance. Our tests show that this penalty is about 10 Gflop/s when the matrix size is around 10000.
void magmablas_dtrsv | ( | magma_uplo_t | uplo, |
magma_trans_t | trans, | ||
magma_diag_t | diag, | ||
magma_int_t | n, | ||
magmaDouble_const_ptr | dA, | ||
magma_int_t | ldda, | ||
magmaDouble_ptr | db, | ||
magma_int_t | incb, | ||
magma_queue_t | queue | ||
) |
dtrsv solves one of the matrix equations on gpu
op(A)*x = B, or x*op(A) = B,
where alpha is a scalar, X and B are vectors, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of
op(A) = A, or op(A) = A^T, or op(A) = A^H.
The vector x is overwritten on b.
[in] | uplo | magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
|
[in] | trans | magma_trans_t. On entry, trans specifies the form of op(A) to be used in the matrix multiplication as follows:
|
[in] | diag | magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
|
[in] | n | INTEGER. On entry, n N specifies the order of the matrix A. n >= 0. |
[in] | dA | DOUBLE_PRECISION array of dimension ( lda, n ) Before entry with uplo = MagmaUpper, the leading n by n upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading n by n lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. |
[in] | ldda | INTEGER. On entry, lda specifies the first dimension of A. lda >= max( 1, n ). |
[in] | db | DOUBLE_PRECISION array of dimension n On exit, b is overwritten with the solution vector X. |
[in] | incb | INTEGER. On entry, incb specifies the increment for the elements of b. incb must not be zero. Unchanged on exit. |
[in] | queue | magma_queue_t Queue to execute in. |
void magmablas_dtrsv_batched | ( | magma_uplo_t | uplo, |
magma_trans_t | trans, | ||
magma_diag_t | diag, | ||
magma_int_t | n, | ||
double ** | A_array, | ||
magma_int_t | lda, | ||
double ** | b_array, | ||
magma_int_t | incb, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
dtrsv solves one of the matrix equations on gpu
op(A)*x = b, or x*op(A) = b,
where alpha is a scalar, X and B are vectors, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of
op(A) = A, or op(A) = A^T, or op(A) = A^H.
The vector x is overwritten on b.
[in] | uplo | magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows:
|
[in] | trans | magma_trans_t. On entry, trans specifies the form of op(A) to be used in the matrix multiplication as follows:
|
[in] | diag | magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows:
|
[in] | n | INTEGER. On entry, n N specifies the order of the matrix A. n >= 0. |
[in] | A_array | Array of pointers, dimension (batchCount). Each is a DOUBLE_PRECISION array A of dimension ( lda, n ), Before entry with uplo = MagmaUpper, the leading n by n upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading n by n lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. |
[in] | lda | INTEGER. On entry, lda specifies the first dimension of A. lda >= max( 1, n ). |
[in] | b_array | Array of pointers, dimension (batchCount). Each is a DOUBLE_PRECISION array of dimension n On exit, b is overwritten with the solution vector X. |
[in] | incb | INTEGER. On entry, incb specifies the increment for the elements of b. incb must not be zero. Unchanged on exit. |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |
void magmablas_zgemv_batched | ( | magma_trans_t | trans, |
magma_int_t | m, | ||
magma_int_t | n, | ||
magmaDoubleComplex | alpha, | ||
magmaDoubleComplex_ptr | dA_array[], | ||
magma_int_t | ldda, | ||
magmaDoubleComplex_ptr | dx_array[], | ||
magma_int_t | incx, | ||
magmaDoubleComplex | beta, | ||
magmaDoubleComplex_ptr | dy_array[], | ||
magma_int_t | incy, | ||
magma_int_t | batchCount, | ||
magma_queue_t | queue | ||
) |
ZGEMV performs one of the matrix-vector operations.
y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, or y := alpha*A**H*x + beta*y,
where alpha and beta are scalars, x and y are vectors and A is an m by n matrix.
[in] | trans | magma_trans_t On entry, TRANS specifies the operation to be performed as follows:
|
[in] | m | INTEGER On entry, m specifies the number of rows of the matrix A. |
[in] | n | INTEGER On entry, n specifies the number of columns of the matrix A |
[in] | alpha | COMPLEX_16 On entry, ALPHA specifies the scalar alpha. |
[in] | dA_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of DIMENSION ( ldda, n ) on the GPU |
[in] | ldda | INTEGER LDDA specifies the leading dimension of A. |
[in] | dx_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array of dimension n if trans == MagmaNoTrans m if trans == MagmaTrans or MagmaConjTrans |
[in] | incx | Specifies the increment for the elements of X. INCX must not be zero. |
[in] | beta | COMPLEX_16 On entry, ALPHA specifies the scalar beta. When BETA is supplied as zero then Y need not be set on input. |
[out] | dy_array | Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array of dimension m if trans == MagmaNoTrans n if trans == MagmaTrans or MagmaConjTrans |
[in] | incy | Specifies the increment for the elements of Y. INCY must not be zero. |
[in] | batchCount | INTEGER The number of matrices to operate on. |
[in] | queue | magma_queue_t Queue to execute in. |