PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
Simple Interface - Double Complex

Functions

int PLASMA_zcgels (PLASMA_enum trans, int M, int N, int NRHS, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB, PLASMA_Complex64_t *X, int LDX, int *ITER)
int PLASMA_zcgesv (int N, int NRHS, PLASMA_Complex64_t *A, int LDA, int *IPIV, PLASMA_Complex64_t *B, int LDB, PLASMA_Complex64_t *X, int LDX, int *ITER)
int PLASMA_zcposv (PLASMA_enum uplo, int N, int NRHS, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB, PLASMA_Complex64_t *X, int LDX, int *ITER)
int PLASMA_zcungesv (PLASMA_enum trans, int N, int NRHS, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB, PLASMA_Complex64_t *X, int LDX, int *ITER)
int PLASMA_zgebrd (PLASMA_enum jobu, PLASMA_enum jobvt, int M, int N, PLASMA_Complex64_t *A, int LDA, double *D, double *E, PLASMA_Complex64_t *U, int LDU, PLASMA_Complex64_t *VT, int LDVT, PLASMA_desc *descT)
int PLASMA_zgelqf (int M, int N, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *T)
int PLASMA_zgelqs (int M, int N, int NRHS, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *T, PLASMA_Complex64_t *B, int LDB)
int PLASMA_zgels (PLASMA_enum trans, int M, int N, int NRHS, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *T, PLASMA_Complex64_t *B, int LDB)
int PLASMA_zgemm (PLASMA_enum transA, PLASMA_enum transB, int M, int N, int K, PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB, PLASMA_Complex64_t beta, PLASMA_Complex64_t *C, int LDC)
int PLASMA_zgeqrf (int M, int N, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *T)
int PLASMA_zgeqrs (int M, int N, int NRHS, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *T, PLASMA_Complex64_t *B, int LDB)
int PLASMA_zgesv (int N, int NRHS, PLASMA_Complex64_t *A, int LDA, int *IPIV, PLASMA_Complex64_t *B, int LDB)
int PLASMA_zgesv_incpiv (int N, int NRHS, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *L, int *IPIV, PLASMA_Complex64_t *B, int LDB)
int PLASMA_zgesvd (PLASMA_enum jobu, PLASMA_enum jobvt, int M, int N, PLASMA_Complex64_t *A, int LDA, double *S, PLASMA_Complex64_t *U, int LDU, PLASMA_Complex64_t *VT, int LDVT, PLASMA_desc *descT)
int PLASMA_zgetrf (int M, int N, PLASMA_Complex64_t *A, int LDA, int *IPIV)
int PLASMA_zgetrf_incpiv (int M, int N, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *L, int *IPIV)
int PLASMA_zgetri (int N, PLASMA_Complex64_t *A, int LDA, int *IPIV)
int PLASMA_zgetrs (PLASMA_enum trans, int N, int NRHS, PLASMA_Complex64_t *A, int LDA, int *IPIV, PLASMA_Complex64_t *B, int LDB)
int PLASMA_zgetrs_incpiv (PLASMA_enum trans, int N, int NRHS, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *L, int *IPIV, PLASMA_Complex64_t *B, int LDB)
int PLASMA_zheev (PLASMA_enum jobz, PLASMA_enum uplo, int N, PLASMA_Complex64_t *A, int LDA, double *W, PLASMA_desc *descT, PLASMA_Complex64_t *Q, int LDQ)
int PLASMA_zheevd (PLASMA_enum jobz, PLASMA_enum uplo, int N, PLASMA_Complex64_t *A, int LDA, double *W, PLASMA_desc *T, PLASMA_Complex64_t *Q, int LDQ)
int PLASMA_zhegst (PLASMA_enum itype, PLASMA_enum uplo, int N, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB)
int PLASMA_zhegv (PLASMA_enum itype, PLASMA_enum jobz, PLASMA_enum uplo, int N, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB, double *W, PLASMA_desc *descT, PLASMA_Complex64_t *Q, int LDQ)
int PLASMA_zhemm (PLASMA_enum side, PLASMA_enum uplo, int M, int N, PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB, PLASMA_Complex64_t beta, PLASMA_Complex64_t *C, int LDC)
int PLASMA_zher2k (PLASMA_enum uplo, PLASMA_enum trans, int N, int K, PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB, double beta, PLASMA_Complex64_t *C, int LDC)
int PLASMA_zherk (PLASMA_enum uplo, PLASMA_enum trans, int N, int K, double alpha, PLASMA_Complex64_t *A, int LDA, double beta, PLASMA_Complex64_t *C, int LDC)
int PLASMA_zhetrd (PLASMA_enum jobz, PLASMA_enum uplo, int N, PLASMA_Complex64_t *A, int LDA, double *D, double *E, PLASMA_desc *descT, PLASMA_Complex64_t *Q, int LDQ)
int PLASMA_zlacpy (PLASMA_enum uplo, int M, int N, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB)
double PLASMA_zlange (PLASMA_enum norm, int M, int N, PLASMA_Complex64_t *A, int LDA, double *work)
double PLASMA_zlanhe (PLASMA_enum norm, PLASMA_enum uplo, int N, PLASMA_Complex64_t *A, int LDA, double *work)
double PLASMA_zlansy (PLASMA_enum norm, PLASMA_enum uplo, int N, PLASMA_Complex64_t *A, int LDA, double *work)
int PLASMA_zlaset (PLASMA_enum uplo, int M, int N, PLASMA_Complex64_t alpha, PLASMA_Complex64_t beta, PLASMA_Complex64_t *A, int LDA)
int PLASMA_zlaswp (int N, PLASMA_Complex64_t *A, int LDA, int K1, int K2, int *IPIV, int INCX)
int PLASMA_zlaswpc (int N, PLASMA_Complex64_t *A, int LDA, int K1, int K2, int *IPIV, int INCX)
int PLASMA_zlauum (PLASMA_enum uplo, int N, PLASMA_Complex64_t *A, int LDA)
int PLASMA_zplghe (double bump, int N, PLASMA_Complex64_t *A, int LDA, unsigned long long int seed)
int PLASMA_zplgsy (PLASMA_Complex64_t bump, int N, PLASMA_Complex64_t *A, int LDA, unsigned long long int seed)
int PLASMA_zplrnt (int M, int N, PLASMA_Complex64_t *A, int LDA, unsigned long long int seed)
int PLASMA_zposv (PLASMA_enum uplo, int N, int NRHS, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB)
int PLASMA_zpotrf (PLASMA_enum uplo, int N, PLASMA_Complex64_t *A, int LDA)
int PLASMA_zpotri (PLASMA_enum uplo, int N, PLASMA_Complex64_t *A, int LDA)
int PLASMA_zpotrs (PLASMA_enum uplo, int N, int NRHS, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB)
int PLASMA_zsymm (PLASMA_enum side, PLASMA_enum uplo, int M, int N, PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB, PLASMA_Complex64_t beta, PLASMA_Complex64_t *C, int LDC)
int PLASMA_zsyr2k (PLASMA_enum uplo, PLASMA_enum trans, int N, int K, PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB, PLASMA_Complex64_t beta, PLASMA_Complex64_t *C, int LDC)
int PLASMA_zsyrk (PLASMA_enum uplo, PLASMA_enum trans, int N, int K, PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t beta, PLASMA_Complex64_t *C, int LDC)
int PLASMA_ztrmm (PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum transA, PLASMA_enum diag, int N, int NRHS, PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB)
int PLASMA_ztrsm (PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum transA, PLASMA_enum diag, int N, int NRHS, PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB)
int PLASMA_ztrsmpl (int N, int NRHS, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *L, int *IPIV, PLASMA_Complex64_t *B, int LDB)
int PLASMA_ztrsmrv (PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum transA, PLASMA_enum diag, int N, int NRHS, PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB)
int PLASMA_ztrtri (PLASMA_enum uplo, PLASMA_enum diag, int N, PLASMA_Complex64_t *A, int LDA)
int PLASMA_zunglq (int M, int N, int K, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *T, PLASMA_Complex64_t *Q, int LDQ)
int PLASMA_zungqr (int M, int N, int K, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *T, PLASMA_Complex64_t *Q, int LDQ)
int PLASMA_zunmlq (PLASMA_enum side, PLASMA_enum trans, int M, int N, int K, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *T, PLASMA_Complex64_t *B, int LDB)
int PLASMA_zunmqr (PLASMA_enum side, PLASMA_enum trans, int M, int N, int K, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *T, PLASMA_Complex64_t *B, int LDB)
int PLASMA_zLapack_to_Tile (PLASMA_Complex64_t *Af77, int LDA, PLASMA_desc *A)
int PLASMA_zTile_to_Lapack (PLASMA_desc *A, PLASMA_Complex64_t *Af77, int LDA)

Detailed Description

This is the group of double complex functions using the simple user interface.


Function Documentation

int PLASMA_zcgels ( PLASMA_enum  trans,
int  M,
int  N,
int  NRHS,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t B,
int  LDB,
PLASMA_Complex64_t X,
int  LDX,
int *  ITER 
)

PLASMA_zcgels - Solves overdetermined or underdetermined linear systems involving an M-by-N matrix A using the QR or the LQ factorization of A. It is assumed that A has full rank. The following options are provided:

trans = PlasmaNoTrans and M >= N: find the least squares solution of an overdetermined

system, i.e., solve the least squares problem: minimize || B - A*X ||.

trans = PlasmaNoTrans and M < N: find the minimum norm solution of an underdetermined

system A * X = B.

Several right hand side vectors B and solution vectors X can be handled in a single call; they are stored as the columns of the M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix X.

PLASMA_zcgels first attempts to factorize the matrix in COMPLEX and use this factorization within an iterative refinement procedure to produce a solution with COMPLEX*16 normwise backward error quality (see below). If the approach fails the method switches to a COMPLEX*16 factorization and solve.

The iterative refinement is not going to be a winning strategy if the ratio COMPLEX performance over COMPLEX*16 performance is too small. A reasonable strategy should take the number of right-hand sides and the size of the matrix into account. This might be done with a call to ILAENV in the future. Up to now, we always try iterative refinement.

The iterative refinement process is stopped if ITER > ITERMAX or for all the RHS we have: RNRM < N*XNRM*ANRM*EPS*BWDMAX where:

  • ITER is the number of the current iteration in the iterative refinement process
  • RNRM is the infinity-norm of the residual
  • XNRM is the infinity-norm of the solution
  • ANRM is the infinity-operator-norm of the matrix A
  • EPS is the machine epsilon returned by DLAMCH('Epsilon').

Actually, in its current state (PLASMA 2.1.0), the test is slightly relaxed.

The values ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 respectively.

We follow Bjorck's algorithm proposed in "Iterative Refinement of Linear Least Squares solutions I", BIT, 7:257-278, 1967.

Parameters:
[in]transIntended usage: = PlasmaNoTrans: the linear system involves A; = PlasmaConjTrans: the linear system involves A**H. Currently only PlasmaNoTrans is supported.
[in]MThe number of rows of the matrix A. M >= 0.
[in]NThe number of columns of the matrix A. N >= 0.
[in]NRHSThe number of right hand sides, i.e., the number of columns of the matrices B and X. NRHS >= 0.
[in]AThe M-by-N matrix A. This matrix is not modified.
[in]LDAThe leading dimension of the array A. LDA >= max(1,M).
[in]BThe M-by-NRHS matrix B of right hand side vectors, stored columnwise. Not modified.
[in]LDBThe leading dimension of the array B. LDB >= MAX(1,M,N).
[out]XIf return value = 0, the solution vectors, stored columnwise. if M >= N, rows 1 to N of X contain the least squares solution vectors; the residual sum of squares for the solution in each column is given by the sum of squares of the modulus of elements N+1 to M in that column; if M < N, rows 1 to N of X contain the minimum norm solution vectors;
[in]LDXThe leading dimension of the array X. LDX >= MAX(1,M,N).
[out]ITERThe number of the current iteration in the iterative refinement process
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
See also:
PLASMA_zcgels_Tile
PLASMA_zcgels_Tile_Async
PLASMA_dsgels
PLASMA_zgels

Definition at line 166 of file zcgels.c.

References max, min, PLASMA_Alloc_Workspace_zgels_Tile(), plasma_context_self(), PLASMA_Dealloc_Handle_Tile(), plasma_desc_init(), plasma_desc_mat_alloc(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_NOT_SUPPORTED, PLASMA_ERR_OUT_OF_RESOURCES, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZCGELS, PLASMA_NB, plasma_parallel_call_5, plasma_pzlapack_to_tile(), plasma_pztile_to_lapack(), PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, plasma_tune(), PLASMA_zcgels_Tile_Async(), PlasmaComplexDouble, and PlasmaNoTrans.

{
int i, j;
int NB, NBNB, MT, NT, NTRHS;
int status;
PLASMA_desc descA;
PLASMA_desc descB;
PLASMA_desc *descT;
PLASMA_desc descX;
PLASMA_sequence *sequence = NULL;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zcgels", "PLASMA not initialized");
}
/* Check input arguments */
if (trans != PlasmaNoTrans) {
plasma_error("PLASMA_zcgels", "only PlasmaNoTrans supported");
}
if (M < 0) {
plasma_error("PLASMA_zcgels", "illegal value of M");
return -2;
}
if (N < 0) {
plasma_error("PLASMA_zcgels", "illegal value of N");
return -3;
}
if (NRHS < 0) {
plasma_error("PLASMA_zcgels", "illegal value of NRHS");
return -4;
}
if (LDA < max(1, M)) {
plasma_error("PLASMA_zcgels", "illegal value of LDA");
return -6;
}
if (LDB < max(1, max(M, N))) {
plasma_error("PLASMA_zcgels", "illegal value of LDB");
return -9;
}
if (LDX < max(1, max(M, N))) {
plasma_error("PLASMA_zcgels", "illegal value of LDX");
return -10;
}
/* Quick return */
if (min(M, min(N, NRHS)) == 0) {
for (i = 0; i < max(M, N); i++)
for (j = 0; j < NRHS; j++)
B[j*LDB+i] = 0.0;
}
/* Tune NB & IB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZCGELS, M, N, NRHS);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zcgels", "plasma_tune() failed");
return status;
}
/* Set MT, NT & NTRHS */
NB = PLASMA_NB;
NBNB = NB*NB;
NT = (N%NB==0) ? (N/NB) : (N/NB+1);
MT = (M%NB==0) ? (M/NB) : (M/NB+1);
NTRHS = (NRHS%NB==0) ? (NRHS/NB) : (NRHS/NB+1);
printf("M %d, N %d, NRHS %d, NB %d, MT %d, NT %d, NTRHS %d\n", M, N, NRHS, NB, MT, NT, NTRHS);
plasma_sequence_create(plasma, &sequence);
NB, NB, NBNB,
M, N, 0, 0, M, N);
if (M >= N) {
NB, NB, NBNB,
M, NRHS, 0, 0, M, NRHS);
NB, NB, NBNB,
M, NRHS, 0, 0, M, NRHS);
}
else {
NB, NB, NBNB,
N, NRHS, 0, 0, N, NRHS);
NB, NB, NBNB,
N, NRHS, 0, 0, N, NRHS);
}
/* DOUBLE PRECISION INITIALIZATION */
/* Allocate memory for matrices in block layout */
plasma_error("PLASMA_zcgels", "plasma_shared_alloc() failed");
}
int, LDA,
PLASMA_desc, descA,
PLASMA_sequence*, sequence,
PLASMA_request*, &request);
int, LDB,
PLASMA_desc, descB,
PLASMA_sequence*, sequence,
PLASMA_request*, &request);
/* Allocate workspace */
/* Call the native interface */
status = PLASMA_zcgels_Tile_Async(PlasmaNoTrans, &descA, descT, &descB, &descX, ITER,
sequence, &request);
if (status == PLASMA_SUCCESS) {
PLASMA_desc, descX,
int, LDX,
PLASMA_sequence*, sequence,
PLASMA_request*, &request);
}
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zcgesv ( int  N,
int  NRHS,
PLASMA_Complex64_t A,
int  LDA,
int *  IPIV,
PLASMA_Complex64_t B,
int  LDB,
PLASMA_Complex64_t X,
int  LDX,
int *  ITER 
)

PLASMA_zcgesv - Computes the solution to a system of linear equations A * X = B, where A is an N-by-N matrix and X and B are N-by-NRHS matrices.

PLASMA_zcgesv first attempts to factorize the matrix in COMPLEX and use this factorization within an iterative refinement procedure to produce a solution with COMPLEX*16 normwise backward error quality (see below). If the approach fails the method switches to a COMPLEX*16 factorization and solve.

The iterative refinement is not going to be a winning strategy if the ratio COMPLEX performance over COMPLEX*16 performance is too small. A reasonable strategy should take the number of right-hand sides and the size of the matrix into account. This might be done with a call to ILAENV in the future. Up to now, we always try iterative refinement.

The iterative refinement process is stopped if ITER > ITERMAX or for all the RHS we have: RNRM < N*XNRM*ANRM*EPS*BWDMAX where:

  • ITER is the number of the current iteration in the iterative refinement process
  • RNRM is the infinity-norm of the residual
  • XNRM is the infinity-norm of the solution
  • ANRM is the infinity-operator-norm of the matrix A
  • EPS is the machine epsilon returned by DLAMCH('Epsilon').

Actually, in its current state (PLASMA 2.1.0), the test is slightly relaxed.

The values ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 respectively.

Parameters:
[in]NThe number of linear equations, i.e., the order of the matrix A. N >= 0.
[in]NRHSThe number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]AThe N-by-N coefficient matrix A. This matrix is not modified.
[in]LDAThe leading dimension of the array A. LDA >= max(1,N).
[out]IPIVOn exit, the pivot indices that define the permutations.
[in]BThe N-by-NRHS matrix of right hand side matrix B.
[in]LDBThe leading dimension of the array B. LDB >= max(1,N).
[out]XIf return value = 0, the N-by-NRHS solution matrix X.
[in]LDXThe leading dimension of the array B. LDX >= max(1,N).
[out]ITERThe number of the current iteration in the iterative refinement process
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
>0if i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, so the solution could not be computed.
See also:
PLASMA_zcgesv_Tile
PLASMA_zcgesv_Tile_Async
PLASMA_dsgesv
PLASMA_zgesv

Definition at line 227 of file zcgesv.c.

References plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZCGESV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zcgesv_Tile_Async(), plasma_zdesc_alloc, plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaComplexDouble, and plasma_sequence_t::status.

{
int NB;
int status;
PLASMA_desc descA;
PLASMA_desc descB;
PLASMA_desc descX;
PLASMA_sequence *sequence = NULL;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zcgesv", "PLASMA not initialized");
}
/* Check input arguments */
if (N < 0) {
plasma_error("PLASMA_zcgesv", "illegal value of N");
return -1;
}
if (NRHS < 0) {
plasma_error("PLASMA_zcgesv", "illegal value of NRHS");
return -2;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_zcgesv", "illegal value of LDA");
return -4;
}
if (LDB < max(1, N)) {
plasma_error("PLASMA_zcgesv", "illegal value of LDB");
return -8;
}
if (LDX < max(1, N)) {
plasma_error("PLASMA_zcgesv", "illegal value of LDX");
return -10;
}
/* Quick return */
if (min(N, NRHS) == 0)
/* Tune NB & IB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZCGESV, N, N, NRHS);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zcgesv", "plasma_tune() failed");
return status;
}
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
/* DOUBLE PRECISION INITIALIZATION */
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N , plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)) );
plasma_zdesc_alloc( descX, NB, NB, N, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descX)) );
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N );
plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
PlasmaComplexDouble, NB, NB, (NB*NB),
LDX, NRHS, 0, 0, N, NRHS);
descX.mat = X;
}
/* Call the native interface */
status = PLASMA_zcgesv_Tile_Async(&descA, IPIV, &descB, &descX, ITER, sequence, &request);
if (status == PLASMA_SUCCESS) {
plasma_zooptile2lap( descX, X, NB, NB, LDX, NRHS );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
plasma_ziptile2lap( descX, X, NB, NB, LDX, NRHS );
}
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zcposv ( PLASMA_enum  uplo,
int  N,
int  NRHS,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t B,
int  LDB,
PLASMA_Complex64_t X,
int  LDX,
int *  ITER 
)

PLASMA_zcposv - Computes the solution to a system of linear equations A * X = B, where A is an N-by-N symmetric positive definite (or Hermitian positive definite in the complex case) matrix and X and B are N-by-NRHS matrices. The Cholesky decomposition is used to factor A as

A = U**H * U, if uplo = PlasmaUpper, or A = L * L**H, if uplo = PlasmaLower,

where U is an upper triangular matrix and L is a lower triangular matrix. The factored form of A is then used to solve the system of equations A * X = B.

PLASMA_zcposv first attempts to factorize the matrix in COMPLEX and use this factorization within an iterative refinement procedure to produce a solution with COMPLEX*16 normwise backward error quality (see below). If the approach fails the method switches to a COMPLEX*16 factorization and solve.

The iterative refinement is not going to be a winning strategy if the ratio COMPLEX performance over COMPLEX*16 performance is too small. A reasonable strategy should take the number of right-hand sides and the size of the matrix into account. This might be done with a call to ILAENV in the future. Up to now, we always try iterative refinement.

The iterative refinement process is stopped if ITER > ITERMAX or for all the RHS we have: RNRM < N*XNRM*ANRM*EPS*BWDMAX where:

  • ITER is the number of the current iteration in the iterative refinement process
  • RNRM is the infinity-norm of the residual
  • XNRM is the infinity-norm of the solution
  • ANRM is the infinity-operator-norm of the matrix A
  • EPS is the machine epsilon returned by DLAMCH('Epsilon').

Actually, in its current state (PLASMA 2.1.0), the test is slightly relaxed.

The values ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 respectively.

Parameters:
[in]uploSpecifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]NThe number of linear equations, i.e., the order of the matrix A. N >= 0.
[in]NRHSThe number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]AThe N-by-N symmetric positive definite (or Hermitian) coefficient matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. This matrix is not modified.
[in]LDAThe leading dimension of the array A. LDA >= max(1,N).
[in]BThe N-by-NRHS matrix of right hand side matrix B.
[in]LDBThe leading dimension of the array B. LDB >= max(1,N).
[out]XIf return value = 0, the N-by-NRHS solution matrix X.
[in]LDXThe leading dimension of the array B. LDX >= max(1,N).
[out]ITERThe number of the current iteration in the iterative refinement process
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
>0if i, the leading minor of order i of A is not positive definite, so the factorization could not be completed, and the solution has not been computed.
See also:
PLASMA_zcposv_Tile
PLASMA_zcposv_Tile_Async
PLASMA_dsposv
PLASMA_zposv

Definition at line 171 of file zcposv.c.

References plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZCPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zcposv_Tile_Async(), plasma_zdesc_alloc, plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaComplexDouble, PlasmaLower, PlasmaUpper, and plasma_sequence_t::status.

{
int NB;
int status;
PLASMA_desc descA;
PLASMA_desc descB;
PLASMA_desc descX;
PLASMA_sequence *sequence = NULL;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zcposv", "PLASMA not initialized");
}
/* Check input arguments */
if (uplo != PlasmaUpper && uplo != PlasmaLower) {
plasma_error("PLASMA_zcposv", "illegal value of uplo");
return -1;
}
if (N < 0) {
plasma_error("PLASMA_zcposv", "illegal value of N");
return -2;
}
if (NRHS < 0) {
plasma_error("PLASMA_zcposv", "illegal value of NRHS");
return -3;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_zcposv", "illegal value of LDA");
return -5;
}
if (LDB < max(1, N)) {
plasma_error("PLASMA_zcposv", "illegal value of LDB");
return -7;
}
if (LDX < max(1, N)) {
plasma_error("PLASMA_zcposv", "illegal value of LDX");
return -10;
}
/* Quick return - currently NOT equivalent to LAPACK's
* LAPACK does not have such check for ZCPOSV */
if (min(N, NRHS) == 0)
/* Tune NB depending on M, N & NRHS; Set NBNBSIZE */
status = plasma_tune(PLASMA_FUNC_ZCPOSV, N, N, NRHS);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zcposv", "plasma_tune() failed");
return status;
}
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
/* DOUBLE PRECISION INITIALIZATION */
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N , plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)) );
plasma_zdesc_alloc( descX, NB, NB, N, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descX)) );
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N );
plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
PlasmaComplexDouble, NB, NB, (NB*NB),
LDX, NRHS, 0, 0, N, NRHS);
descX.mat = X;
}
/* Call the native interface */
status = PLASMA_zcposv_Tile_Async(uplo, &descA, &descB, &descX, ITER, sequence, &request);
if (status == PLASMA_SUCCESS) {
plasma_zooptile2lap( descX, X, NB, NB, LDX, NRHS );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
plasma_ziptile2lap( descX, X, NB, NB, LDX, NRHS );
}
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zcungesv ( PLASMA_enum  trans,
int  N,
int  NRHS,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t B,
int  LDB,
PLASMA_Complex64_t X,
int  LDX,
int *  ITER 
)

PLASMA_zcungesv - Solves overdetermined or underdetermined linear systems involving an M-by-N matrix A using the QR or the LQ factorization of A. It is assumed that A has full rank. The following options are provided:

trans = PlasmaNoTrans and M >= N: find the least squares solution of an overdetermined

system, i.e., solve the least squares problem: minimize || B - A*X ||.

trans = PlasmaNoTrans and M < N: find the minimum norm solution of an underdetermined

system A * X = B.

Several right hand side vectors B and solution vectors X can be handled in a single call; they are stored as the columns of the M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix X.

PLASMA_zcungesv first attempts to factorize the matrix in COMPLEX and use this factorization within an iterative refinement procedure to produce a solution with COMPLEX*16 normwise backward error quality (see below). If the approach fails the method switches to a COMPLEX*16 factorization and solve.

The iterative refinement is not going to be a winning strategy if the ratio COMPLEX performance over COMPLEX*16 performance is too small. A reasonable strategy should take the number of right-hand sides and the size of the matrix into account. This might be done with a call to ILAENV in the future. Up to now, we always try iterative refinement.

The iterative refinement process is stopped if ITER > ITERMAX or for all the RHS we have: RNRM < N*XNRM*ANRM*EPS*BWDMAX where:

  • ITER is the number of the current iteration in the iterative refinement process
  • RNRM is the infinity-norm of the residual
  • XNRM is the infinity-norm of the solution
  • ANRM is the infinity-operator-norm of the matrix A
  • EPS is the machine epsilon returned by DLAMCH('Epsilon').

Actually, in its current state (PLASMA 2.1.0), the test is slightly relaxed.

The values ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 respectively.

We follow Bjorck's algorithm proposed in "Iterative Refinement of Linear Least Squares solutions I", BIT, 7:257-278, 1967.4

Parameters:
[in]transIntended usage: = PlasmaNoTrans: the linear system involves A; = PlasmaConjTrans: the linear system involves A**H. Currently only PlasmaNoTrans is supported.
[in]NThe number of columns of the matrix A. N >= 0.
[in]NRHSThe number of right hand sides, i.e., the number of columns of the matrices B and X. NRHS >= 0.
[in]AThe M-by-N matrix A. This matrix is not modified.
[in]LDAThe leading dimension of the array A. LDA >= max(1,M).
[in]BThe M-by-NRHS matrix B of right hand side vectors, stored columnwise. Not modified.
[in]LDBThe leading dimension of the array B. LDB >= MAX(1,M,N).
[out]XIf return value = 0, the solution vectors, stored columnwise. if M >= N, rows 1 to N of B contain the least squares solution vectors; the residual sum of squares for the solution in each column is given by the sum of squares of the modulus of elements N+1 to M in that column; if M < N, rows 1 to N of B contain the minimum norm solution vectors;
[in]LDXThe leading dimension of the array B. LDB >= MAX(1,M,N).
[out]ITERThe number of the current iteration in the iterative refinement process
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
See also:
PLASMA_zcungesv_Tile
PLASMA_zcungesv_Tile_Async
PLASMA_dsungesv
PLASMA_zgels

Definition at line 163 of file zcungesv.c.

References plasma_desc_t::mat, max, PLASMA_Alloc_Workspace_zgels_Tile(), plasma_context_self(), PLASMA_Dealloc_Handle_Tile(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_NOT_SUPPORTED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZCGELS, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zcungesv_Tile_Async(), plasma_zdesc_alloc, plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaComplexDouble, and PlasmaNoTrans.

{
int NB;
int status;
PLASMA_desc descA;
PLASMA_desc descB;
PLASMA_desc *descT;
PLASMA_desc descX;
PLASMA_sequence *sequence = NULL;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zcungesv", "PLASMA not initialized");
}
/* Check input arguments */
if (trans != PlasmaNoTrans) {
plasma_error("PLASMA_zcungesv", "only PlasmaNoTrans supported");
}
if (N < 0) {
plasma_error("PLASMA_zcungesv", "illegal value of N");
return -2;
}
if (NRHS < 0) {
plasma_error("PLASMA_zcungesv", "illegal value of NRHS");
return -3;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_zcungesv", "illegal value of LDA");
return -5;
}
if (LDB < max(1, N)) {
plasma_error("PLASMA_zcungesv", "illegal value of LDB");
return -8;
}
if (LDX < max(1, N)) {
plasma_error("PLASMA_zcungesv", "illegal value of LDX");
return -9;
}
/* Quick return */
if ( N == 0 )
/* Tune NB & IB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZCGELS, N, N, NRHS);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zcungesv", "plasma_tune() failed");
return status;
}
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
/* DOUBLE PRECISION INITIALIZATION */
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N , plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)) );
plasma_zdesc_alloc( descX, NB, NB, N, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descX)) );
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N );
plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
PlasmaComplexDouble, NB, NB, (NB*NB),
LDX, NRHS, 0, 0, N, NRHS);
descX.mat = X;
}
/* Allocate workspace */
/* Call the native interface */
status = PLASMA_zcungesv_Tile_Async(PlasmaNoTrans, &descA, descT, &descB, &descX, ITER,
sequence, &request);
if (status == PLASMA_SUCCESS) {
plasma_zooptile2lap( descX, X, NB, NB, LDX, NRHS );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
plasma_ziptile2lap( descX, X, NB, NB, LDX, NRHS );
}
}
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgebrd ( PLASMA_enum  jobu,
PLASMA_enum  jobvt,
int  M,
int  N,
PLASMA_Complex64_t A,
int  LDA,
double *  D,
double *  E,
PLASMA_Complex64_t U,
int  LDU,
PLASMA_Complex64_t VT,
int  LDVT,
PLASMA_desc descT 
)

PLASMA_zgebrd - computes the singular value decomposition (SVD) of a complex M-by-N matrix A, optionally computing the left and/or right singular vectors. The SVD is written

 A = U * SIGMA * transpose(V)

where SIGMA is an M-by-N matrix which is zero except for its min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA are the singular values of A; they are real and non-negative, and are returned in descending order. The first min(m,n) columns of U and V are the left and right singular vectors of A.

Note that the routine returns V**T, not V. Not LAPACK Compliant for now! Note: Only PlasmaNoVec supported!

Parameters:
[in]jobuSpecifies options for computing all or part of the matrix U. Intended usage: = PlasmaVec: all M columns of U are returned in array U; = PlasmaNoVec: no columns of U (no left singular vectors) are computed. Note: Only PlasmaNoVec supported!
[in]jobvtSpecifies options for computing all or part of the matrix V**H. Intended usage: = PlasmaVec: all M columns of U are returned in array U; = PlasmaNoVec: no columns of U (no left singular vectors) are computed. Note: Only PlasmaNoVec supported!
[in]MThe number of rows of the matrix A. M >= 0.
[in]NThe number of columns of the matrix A. N >= 0.
[in,out]AOn entry, the M-by-N matrix A. On exit, if JOBU = 'O', A is overwritten with the first min(m,n) columns of U (the left singular vectors, stored columnwise); if JOBVT = 'O', A is overwritten with the first min(m,n) rows of V**H (the right singular vectors, stored rowwise); if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A are destroyed.
[in]LDAThe leading dimension of the array A. LDA >= max(1,M).
[out]SThe double precision singular values of A, sorted so that S(i) >= S(i+1).
[out]U(LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. If JOBU = 'A', U contains the M-by-M unitary matrix U; if JOBU = 'S', U contains the first min(m,n) columns of U (the left singular vectors, stored columnwise); if JOBU = 'N' or 'O', U is not referenced.
[in]LDUThe leading dimension of the array U. LDU >= 1; if JOBU = 'S' or 'A', LDU >= M.
[out]VTIf JOBVT = 'A', VT contains the N-by-N unitary matrix V**H; if JOBVT = 'S', VT contains the first min(m,n) rows of V**H (the right singular vectors, stored rowwise); if JOBVT = 'N' or 'O', VT is not referenced.
[in]LDVTThe leading dimension of the array VT. LDVT >= 1; if JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
[in,out]descTOn entry, descriptor as return by PLASMA_Alloc_Workspace_zgesvd On exit, contains auxiliary factorization data.
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
See also:
PLASMA_zgebrd_Tile
PLASMA_zgebrd_Tile_Async
PLASMA_cgebrd
PLASMA_dgebrd
PLASMA_sgebrd

Definition at line 122 of file zgebrd.c.

References plasma_desc_t::m, max, min, plasma_desc_t::n, plasma_context_self(), plasma_desc_check(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGEBRD, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zgebrd_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaNoVec, PlasmaVec, and plasma_sequence_t::status.

{
int NB, IB, MT, NT;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA, descU, descVT;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zgebrd", "PLASMA not initialized");
}
/* Tune NB & IB depending on M & N; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZGEBRD, M, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zgebrd", "plasma_tune() failed");
return status;
}
/* Set MT, NT */
NB = PLASMA_NB;
IB = PLASMA_IB;
MT = (M%NB==0) ? (M/NB) : (M/NB+1);
NT = (N%NB==0) ? (N/NB) : (N/NB+1);
/* Check input arguments */
if (jobu != PlasmaNoVec && jobu !=PlasmaVec) {
plasma_error("PLASMA_zgebrd", "illegal value of jobu");
return -1;
}
if (jobvt != PlasmaNoVec && jobvt != PlasmaVec) {
plasma_error("PLASMA_zgebrd", "illegal value of jobvt");
return -2;
}
if (M < 0) {
plasma_error("PLASMA_zgebrd", "illegal value of M");
return -3;
}
if (N < 0) {
plasma_error("PLASMA_zgebrd", "illegal value of N");
return -4;
}
if (LDA < max(1, M)) {
plasma_error("PLASMA_zgebrd", "illegal value of LDA");
return -6;
}
if (LDU < 1) {
plasma_error("PLASMA_zgebrd", "illegal value of LDU");
return -9;
}
if (LDVT < 1) {
plasma_error("PLASMA_zgebrd", "illegal value of LDVT");
return -11;
}
if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) ||
( descT->m != MT*IB ) || (descT->n != NT*NB) ) {
plasma_error("PLASMA_zgebrd", "invalid T descriptor");
return -12;
}
/* Quick return */
if (min(M, N) == 0) {
}
if (jobu == PlasmaVec) {
plasma_error("PLASMA_zgebrd", "computing the singular vectors is not supported in this version");
return -1;
}
if (jobvt == PlasmaVec) {
plasma_error("PLASMA_zgebrd", "computing the singular vectors is not supported in this version");
return -2;
}
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
if (jobu == PlasmaVec){
plasma_zooplap2tile( descU, U, NB, NB, LDU, M, 0, 0, M, M, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descU)));
}
if (jobvt == PlasmaVec){
plasma_zooplap2tile( descVT, VT, NB, NB, LDVT, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descU)); plasma_desc_mat_free(&(descVT)));
}
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N);
if (jobu == PlasmaVec){
plasma_ziplap2tile( descU, U, NB, NB, LDU, M, 0, 0, M, M);
}
if (jobvt == PlasmaVec){
plasma_ziplap2tile( descVT, VT, NB, NB, LDVT, N, 0, 0, N, N);
}
}
/* Call the tile interface */
PLASMA_zgebrd_Tile_Async(jobu, jobvt, &descA, D, E, &descU, &descVT, descT, sequence, &request);
plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
if (jobu == PlasmaVec){
plasma_zooptile2lap( descU, U, NB, NB, LDU, M );
}
if (jobvt == PlasmaVec){
plasma_zooptile2lap( descVT, VT, NB, NB, LDVT, N );
}
if (jobu == PlasmaVec){
}
if (jobvt == PlasmaVec){
}
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
if (jobu == PlasmaVec){
plasma_ziptile2lap( descU, U, NB, NB, LDU, M );
}
if (jobvt == PlasmaVec){
plasma_ziptile2lap( descVT, VT, NB, NB, LDVT, N );
}
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgelqf ( int  M,
int  N,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t T 
)

PLASMA_zgelqf - Computes the tile LQ factorization of a complex M-by-N matrix A: A = L * Q.

Parameters:
[in]MThe number of rows of the matrix A. M >= 0.
[in]NThe number of columns of the matrix A. N >= 0.
[in,out]AOn entry, the M-by-N matrix A. On exit, the elements on and below the diagonal of the array contain the m-by-min(M,N) lower trapezoidal matrix L (L is lower triangular if M <= N); the elements above the diagonal represent the unitary matrix Q as a product of elementary reflectors, stored by tiles.
[in]LDAThe leading dimension of the array A. LDA >= max(1,M).
[out]TOn exit, auxiliary factorization data, required by PLASMA_zgelqs to solve the system of equations.
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
See also:
PLASMA_zgelqf_Tile
PLASMA_zgelqf_Tile_Async
PLASMA_cgelqf
PLASMA_dgelqf
PLASMA_sgelqf
PLASMA_zgelqs

Definition at line 62 of file zgelqf.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_ZGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zgelqf_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaComplexDouble, plasma_sequence_t::status, and T.

{
int NB, IB, IBNB, MT, NT;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA, descT;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zgelqf", "PLASMA not initialized");
}
/* Check input arguments */
if (M < 0) {
plasma_error("PLASMA_zgelqf", "illegal value of M");
return -1;
}
if (N < 0) {
plasma_error("PLASMA_zgelqf", "illegal value of N");
return -2;
}
if (LDA < max(1, M)) {
plasma_error("PLASMA_zgelqf", "illegal value of LDA");
return -4;
}
/* Quick return */
if (min(M, N) == 0)
/* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
status = plasma_tune(PLASMA_FUNC_ZGELS, M, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zgelqf", "plasma_tune() failed");
return status;
}
/* Set MT & NT */
NB = PLASMA_NB;
IB = PLASMA_IB;
IBNB = IB*NB;
MT = (M%NB==0) ? (M/NB) : (M/NB+1);
NT = (N%NB==0) ? (N/NB) : (N/NB+1);
plasma_sequence_create(plasma, &sequence);
IB, NB, IBNB,
MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
}
else {
/* Double the size of T to accomodate the tree reduction phase */
IB, NB, IBNB,
MT*IB, 2*NT*NB, 0, 0, MT*IB, 2*NT*NB);
}
descT.mat = T;
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N);
}
/* Call the tile interface */
PLASMA_zgelqf_Tile_Async(&descA, &descT, sequence, &request);
plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgelqs ( int  M,
int  N,
int  NRHS,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t T,
PLASMA_Complex64_t B,
int  LDB 
)

PLASMA_zgelqs - Compute a minimum-norm solution min || A*X - B || using the LQ factorization A = L*Q computed by PLASMA_zgelqf.

Parameters:
[in]MThe number of rows of the matrix A. M >= 0.
[in]NThe number of columns of the matrix A. N >= M >= 0.
[in]NRHSThe number of columns of B. NRHS >= 0.
[in]ADetails of the LQ factorization of the original matrix A as returned by PLASMA_zgelqf.
[in]LDAThe leading dimension of the array A. LDA >= M.
[in]TAuxiliary factorization data, computed by PLASMA_zgelqf.
[in,out]BOn entry, the M-by-NRHS right hand side matrix B. On exit, the N-by-NRHS solution matrix X.
[in]LDBThe leading dimension of the array B. LDB >= N.
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
See also:
PLASMA_zgelqs_Tile
PLASMA_zgelqs_Tile_Async
PLASMA_cgelqs
PLASMA_dgelqs
PLASMA_sgelqs
PLASMA_zgelqf

Definition at line 67 of file zgelqs.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_ZGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zgelqs_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaComplexDouble, plasma_sequence_t::status, and T.

{
int NB, IB, IBNB, MT, NT;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA, descB, descT;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zgelqs", "PLASMA not initialized");
}
/* Check input arguments */
if (M < 0) {
plasma_error("PLASMA_zgelqs", "illegal value of M");
return -1;
}
if (N < 0 || M > N) {
plasma_error("PLASMA_zgelqs", "illegal value of N");
return -2;
}
if (NRHS < 0) {
plasma_error("PLASMA_zgelqs", "illegal value of N");
return -3;
}
if (LDA < max(1, M)) {
plasma_error("PLASMA_zgelqs", "illegal value of LDA");
return -5;
}
if (LDB < max(1, max(1, N))) {
plasma_error("PLASMA_zgelqs", "illegal value of LDB");
return -8;
}
/* Quick return */
if (min(M, min(N, NRHS)) == 0) {
}
/* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
status = plasma_tune(PLASMA_FUNC_ZGELS, M, N, NRHS);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zgelqs", "plasma_tune() failed");
return status;
}
/* Set MT, NT & NTRHS */
NB = PLASMA_NB;
IB = PLASMA_IB;
IBNB = IB*NB;
MT = (M%NB==0) ? (M/NB) : (M/NB+1);
NT = (N%NB==0) ? (N/NB) : (N/NB+1);
plasma_sequence_create(plasma, &sequence);
IB, NB, IBNB,
MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
}
else {
/* Double the size of T to accomodate the tree reduction phase */
IB, NB, IBNB,
MT*IB, 2*NT*NB, 0, 0, MT*IB, 2*NT*NB);
}
descT.mat = T;
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N , plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N );
plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
}
/* Call the tile interface */
PLASMA_zgelqs_Tile_Async(&descA, &descT, &descB, sequence, &request);
plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
plasma_zooptile2lap( descB, B, NB, NB, LDB, NRHS );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgels ( PLASMA_enum  trans,
int  M,
int  N,
int  NRHS,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t T,
PLASMA_Complex64_t B,
int  LDB 
)

PLASMA_zgels - solves overdetermined or underdetermined linear systems involving an M-by-N matrix A using the QR or the LQ factorization of A. It is assumed that A has full rank. The following options are provided:

trans = PlasmaNoTrans and M >= N: find the least squares solution of an overdetermined

system, i.e., solve the least squares problem: minimize || B - A*X ||.

trans = PlasmaNoTrans and M < N: find the minimum norm solution of an underdetermined

system A * X = B.

Several right hand side vectors B and solution vectors X can be handled in a single call; they are stored as the columns of the M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix X.

Parameters:
[in]transIntended usage: = PlasmaNoTrans: the linear system involves A; = PlasmaConjTrans: the linear system involves A**H. Currently only PlasmaNoTrans is supported.
[in]MThe number of rows of the matrix A. M >= 0.
[in]NThe number of columns of the matrix A. N >= 0.
[in]NRHSThe number of right hand sides, i.e., the number of columns of the matrices B and X. NRHS >= 0.
[in,out]AOn entry, the M-by-N matrix A. On exit, if M >= N, A is overwritten by details of its QR factorization as returned by PLASMA_zgeqrf; if M < N, A is overwritten by details of its LQ factorization as returned by PLASMA_zgelqf.
[in]LDAThe leading dimension of the array A. LDA >= max(1,M).
[out]TOn exit, auxiliary factorization data.
[in,out]BOn entry, the M-by-NRHS matrix B of right hand side vectors, stored columnwise; On exit, if return value = 0, B is overwritten by the solution vectors, stored columnwise: if M >= N, rows 1 to N of B contain the least squares solution vectors; the residual sum of squares for the solution in each column is given by the sum of squares of the modulus of elements N+1 to M in that column; if M < N, rows 1 to N of B contain the minimum norm solution vectors;
[in]LDBThe leading dimension of the array B. LDB >= MAX(1,M,N).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
See also:
PLASMA_zgels_Tile
PLASMA_zgels_Tile_Async
PLASMA_cgels
PLASMA_dgels
PLASMA_sgels

Definition at line 94 of file zgels.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_NOT_SUPPORTED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_ZGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zgels_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaComplexDouble, PlasmaNoTrans, plasma_sequence_t::status, and T.

{
int i, j;
int NB, IB, IBNB, MT, NT;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA, descB, descT;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zgels", "PLASMA not initialized");
}
/* Check input arguments */
if (trans != PlasmaNoTrans) {
plasma_error("PLASMA_zgels", "only PlasmaNoTrans supported");
}
if (M < 0) {
plasma_error("PLASMA_zgels", "illegal value of M");
return -2;
}
if (N < 0) {
plasma_error("PLASMA_zgels", "illegal value of N");
return -3;
}
if (NRHS < 0) {
plasma_error("PLASMA_zgels", "illegal value of NRHS");
return -4;
}
if (LDA < max(1, M)) {
plasma_error("PLASMA_zgels", "illegal value of LDA");
return -6;
}
if (LDB < max(1, max(M, N))) {
plasma_error("PLASMA_zgels", "illegal value of LDB");
return -9;
}
/* Quick return */
if (min(M, min(N, NRHS)) == 0) {
for (i = 0; i < max(M, N); i++)
for (j = 0; j < NRHS; j++)
B[j*LDB+i] = 0.0;
}
/* Tune NB & IB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZGELS, M, N, NRHS);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zgels", "plasma_tune() failed");
return status;
}
/* Set MT, NT & NTRHS */
NB = PLASMA_NB;
IB = PLASMA_IB;
IBNB = IB*NB;
NT = (N%NB==0) ? (N/NB) : (N/NB+1);
MT = (M%NB==0) ? (M/NB) : (M/NB+1);
plasma_sequence_create(plasma, &sequence);
IB, NB, IBNB,
MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
}
else {
/* Double the size of T to accomodate the tree reduction phase */
IB, NB, IBNB,
MT*IB, 2*NT*NB, 0, 0, MT*IB, 2*NT*NB);
}
descT.mat = T;
if ( M >= N ) {
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N , plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, M, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N );
plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, M, NRHS);
}
} else {
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N , plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N );
plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
}
}
/* Call the tile interface */
PLASMA_zgels_Tile_Async(PlasmaNoTrans, &descA, &descT, &descB, sequence, &request);
plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
plasma_zooptile2lap( descB, B, NB, NB, LDB, NRHS );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgemm ( PLASMA_enum  transA,
PLASMA_enum  transB,
int  M,
int  N,
int  K,
PLASMA_Complex64_t  alpha,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t B,
int  LDB,
PLASMA_Complex64_t  beta,
PLASMA_Complex64_t C,
int  LDC 
)

PLASMA_zgemm - Performs one of the matrix-matrix operations

\[ C = \alpha [op( A )\times op( B )] + \beta C \]

,

where op( X ) is one of

op( X ) = X or op( X ) = X' or op( X ) = conjg( X' )

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters:
[in]transASpecifies whether the matrix A is transposed, not transposed or conjugate transposed: = PlasmaNoTrans: A is not transposed; = PlasmaTrans: A is transposed; = PlasmaConjTrans: A is conjugate transposed.
[in]transBSpecifies whether the matrix B is transposed, not transposed or conjugate transposed: = PlasmaNoTrans: B is not transposed; = PlasmaTrans: B is transposed; = PlasmaConjTrans: B is conjugate transposed.
[in]MM specifies the number of rows of the matrix op( A ) and of the matrix C. M >= 0.
[in]NN specifies the number of columns of the matrix op( B ) and of the matrix C. N >= 0.
[in]KK specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K >= 0.
[in]alphaalpha specifies the scalar alpha
[in]AA is a LDA-by-ka matrix, where ka is K when transA = PlasmaNoTrans, and is M otherwise.
[in]LDAThe leading dimension of the array A. LDA >= max(1,M).
[in]BB is a LDB-by-kb matrix, where kb is N when transB = PlasmaNoTrans, and is K otherwise.
[in]LDBThe leading dimension of the array B. LDB >= max(1,N).
[in]betabeta specifies the scalar beta
[in,out]CC is a LDC-by-N matrix. On exit, the array is overwritten by the M by N matrix ( alpha*op( A )*op( B ) + beta*C )
[in]LDCThe leading dimension of the array C. LDC >= max(1,M).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
See also:
PLASMA_zgemm_Tile
PLASMA_cgemm
PLASMA_dgemm
PLASMA_sgemm

Definition at line 96 of file zgemm.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGEMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zgemm_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaConjTrans, PlasmaNoTrans, PlasmaTrans, and plasma_sequence_t::status.

{
int NB;
int Am, An, Bm, Bn;
int status;
PLASMA_desc descA, descB, descC;
PLASMA_sequence *sequence = NULL;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zgemm", "PLASMA not initialized");
}
/* Check input arguments */
if ((transA != PlasmaNoTrans) && (transA != PlasmaTrans) && (transA != PlasmaConjTrans)) {
plasma_error("PLASMA_zgemm", "illegal value of transA");
return -1;
}
if ((transB != PlasmaNoTrans) && (transB != PlasmaTrans) && (transB != PlasmaConjTrans)) {
plasma_error("PLASMA_zgemm", "illegal value of transB");
return -2;
}
if ( transA == PlasmaNoTrans ) {
Am = M; An = K;
} else {
Am = K; An = M;
}
if ( transB == PlasmaNoTrans ) {
Bm = K; Bn = N;
} else {
Bm = N; Bn = K;
}
if (M < 0) {
plasma_error("PLASMA_zgemm", "illegal value of M");
return -3;
}
if (N < 0) {
plasma_error("PLASMA_zgemm", "illegal value of N");
return -4;
}
if (K < 0) {
plasma_error("PLASMA_zgemm", "illegal value of N");
return -5;
}
if (LDA < max(1, Am)) {
plasma_error("PLASMA_zgemm", "illegal value of LDA");
return -8;
}
if (LDB < max(1, Bm)) {
plasma_error("PLASMA_zgemm", "illegal value of LDB");
return -10;
}
if (LDC < max(1, M)) {
plasma_error("PLASMA_zgemm", "illegal value of LDC");
return -13;
}
/* Quick return */
if (M == 0 || N == 0 ||
((alpha == (PLASMA_Complex64_t)0.0 || K == 0) && beta == (PLASMA_Complex64_t)1.0))
/* Tune NB depending on M, N & NRHS; Set NBNBSIZE */
status = plasma_tune(PLASMA_FUNC_ZGEMM, M, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zgemm", "plasma_tune() failed");
return status;
}
/* Set MT & NT & KT */
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descB, B, NB, NB, LDB, Bn, 0, 0, Bm, Bn, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
plasma_zooplap2tile( descC, C, NB, NB, LDC, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descC)));
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An );
plasma_ziplap2tile( descB, B, NB, NB, LDB, Bn, 0, 0, Bm, Bn );
plasma_ziplap2tile( descC, C, NB, NB, LDC, N, 0, 0, M, N );
}
/* Call the tile interface */
transA, transB, alpha, &descA, &descB, beta, &descC, sequence, &request);
plasma_zooptile2lap( descC, C, NB, NB, LDC, N );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, An );
plasma_ziptile2lap( descB, B, NB, NB, LDB, Bn );
plasma_ziptile2lap( descC, C, NB, NB, LDC, N );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgeqrf ( int  M,
int  N,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t T 
)

PLASMA_zgeqrf - Computes the tile QR factorization of a complex M-by-N matrix A: A = Q * R.

Parameters:
[in]MThe number of rows of the matrix A. M >= 0.
[in]NThe number of columns of the matrix A. N >= 0.
[in,out]AOn entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if M >= N); the elements below the diagonal represent the unitary matrix Q as a product of elementary reflectors stored by tiles.
[in]LDAThe leading dimension of the array A. LDA >= max(1,M).
[out]TOn exit, auxiliary factorization data, required by PLASMA_zgeqrs to solve the system of equations.
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
See also:
PLASMA_zgeqrf_Tile
PLASMA_zgeqrf_Tile_Async
PLASMA_cgeqrf
PLASMA_dgeqrf
PLASMA_sgeqrf
PLASMA_zgeqrs

Definition at line 61 of file zgeqrf.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_ZGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zgeqrf_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaComplexDouble, plasma_sequence_t::status, and T.

{
int NB, IB, IBNB, MT, NT;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA, descT;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zgeqrf", "PLASMA not initialized");
}
/* Check input arguments */
if (M < 0) {
plasma_error("PLASMA_zgeqrf", "illegal value of M");
return -1;
}
if (N < 0) {
plasma_error("PLASMA_zgeqrf", "illegal value of N");
return -2;
}
if (LDA < max(1, M)) {
plasma_error("PLASMA_zgeqrf", "illegal value of LDA");
return -4;
}
/* Quick return */
if (min(M, N) == 0)
/* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
status = plasma_tune(PLASMA_FUNC_ZGELS, M, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zgeqrf", "plasma_tune() failed");
return status;
}
/* Set MT & NT */
NB = PLASMA_NB;
IB = PLASMA_IB;
IBNB = IB*NB;
MT = (M%NB==0) ? (M/NB) : (M/NB+1);
NT = (N%NB==0) ? (N/NB) : (N/NB+1);
plasma_sequence_create(plasma, &sequence);
IB, NB, IBNB,
MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
}
else {
/* Double the size of T to accomodate the tree reduction phase */
IB, NB, IBNB,
MT*IB, 2*NT*NB, 0, 0, MT*IB, 2*NT*NB);
}
descT.mat = T;
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N);
}
/* Call the tile interface */
PLASMA_zgeqrf_Tile_Async(&descA, &descT, sequence, &request);
plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgeqrs ( int  M,
int  N,
int  NRHS,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t T,
PLASMA_Complex64_t B,
int  LDB 
)

PLASMA_zgeqrs - Compute a minimum-norm solution min || A*X - B || using the RQ factorization A = R*Q computed by PLASMA_zgeqrf.

Parameters:
[in]MThe number of rows of the matrix A. M >= 0.
[in]NThe number of columns of the matrix A. N >= M >= 0.
[in]NRHSThe number of columns of B. NRHS >= 0.
[in,out]ADetails of the QR factorization of the original matrix A as returned by PLASMA_zgeqrf.
[in]LDAThe leading dimension of the array A. LDA >= M.
[in]TAuxiliary factorization data, computed by PLASMA_zgeqrf.
[in,out]BOn entry, the m-by-nrhs right hand side matrix B. On exit, the n-by-nrhs solution matrix X.
[in]LDBThe leading dimension of the array B. LDB >= max(1,N).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
See also:
PLASMA_zgeqrs_Tile
PLASMA_zgeqrs_Tile_Async
PLASMA_cgeqrs
PLASMA_dgeqrs
PLASMA_sgeqrs
PLASMA_zgeqrf

Definition at line 67 of file zgeqrs.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_ZGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zgeqrs_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaComplexDouble, plasma_sequence_t::status, and T.

{
int NB, IB, IBNB, MT, NT;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA, descB, descT;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zgeqrs", "PLASMA not initialized");
}
/* Check input arguments */
if (M < 0) {
plasma_error("PLASMA_zgeqrs", "illegal value of M");
return -1;
}
if (N < 0 || N > M) {
plasma_error("PLASMA_zgeqrs", "illegal value of N");
return -2;
}
if (NRHS < 0) {
plasma_error("PLASMA_zgeqrs", "illegal value of N");
return -3;
}
if (LDA < max(1, M)) {
plasma_error("PLASMA_zgeqrs", "illegal value of LDA");
return -5;
}
if (LDB < max(1, max(1, M))) {
plasma_error("PLASMA_zgeqrs", "illegal value of LDB");
return -8;
}
/* Quick return */
if (min(M, min(N, NRHS)) == 0) {
}
/* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
status = plasma_tune(PLASMA_FUNC_ZGELS, M, N, NRHS);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zgeqrs", "plasma_tune() failed");
return status;
}
/* Set MT, NT & NTRHS */
NB = PLASMA_NB;
IB = PLASMA_IB;
IBNB = IB*NB;
MT = (M%NB==0) ? (M/NB) : (M/NB+1);
NT = (N%NB==0) ? (N/NB) : (N/NB+1);
plasma_sequence_create(plasma, &sequence);
IB, NB, IBNB,
MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
}
else {
/* Double the size of T to accomodate the tree reduction phase */
IB, NB, IBNB,
MT*IB, 2*NT*NB, 0, 0, MT*IB, 2*NT*NB);
}
descT.mat = T;
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N , plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, M, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N );
plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, M, NRHS);
}
/* Call the tile interface */
PLASMA_zgeqrs_Tile_Async(&descA, &descT, &descB, sequence, &request);
plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
plasma_zooptile2lap( descB, B, NB, NB, LDB, NRHS );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgesv ( int  N,
int  NRHS,
PLASMA_Complex64_t A,
int  LDA,
int *  IPIV,
PLASMA_Complex64_t B,
int  LDB 
)

PLASMA_zgesv - Computes the solution to a system of linear equations A * X = B, where A is an N-by-N matrix and X and B are N-by-NRHS matrices. The tile LU decomposition with partial tile pivoting and row interchanges is used to factor A. The factored form of A is then used to solve the system of equations A * X = B.

Parameters:
[in]NThe number of linear equations, i.e., the order of the matrix A. N >= 0.
[in]NRHSThe number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in,out]AOn entry, the N-by-N coefficient matrix A. On exit, the tile L and U factors from the factorization.
[in]LDAThe leading dimension of the array A. LDA >= max(1,N).
[out]IPIVOn exit, the pivot indices that define the permutations.
[in,out]BOn entry, the N-by-NRHS matrix of right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]LDBThe leading dimension of the array B. LDB >= max(1,N).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
>0if i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, so the solution could not be computed.
See also:
PLASMA_zgesv_Tile
PLASMA_zgesv_Tile_Async
PLASMA_cgesv
PLASMA_dgesv
PLASMA_sgesv

Definition at line 70 of file zgesv.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), PLASMA_FUNC_ZGESV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zgesv_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, and plasma_sequence_t::status.

{
int NB;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA, descB;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_error("PLASMA_zgesv", "PLASMA not initialized");
}
/* Check input arguments */
if (N < 0) {
plasma_error("PLASMA_zgesv", "illegal value of N");
return -1;
}
if (NRHS < 0) {
plasma_error("PLASMA_zgesv", "illegal value of NRHS");
return -2;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_zgesv", "illegal value of LDA");
return -4;
}
if (LDB < max(1, N)) {
plasma_error("PLASMA_zgesv", "illegal value of LDB");
return -8;
}
/* Quick return */
if (min(N, NRHS) == 0)
/* Tune NB & IB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZGESV, N, N, NRHS);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zgesv", "plasma_tune() failed");
return status;
}
/* Set NT & NTRHS */
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N , plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N );
plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
}
/* Call the tile interface */
PLASMA_zgesv_Tile_Async(&descA, IPIV, &descB, sequence, &request);
plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
plasma_zooptile2lap( descB, B, NB, NB, LDB, NRHS );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgesv_incpiv ( int  N,
int  NRHS,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t L,
int *  IPIV,
PLASMA_Complex64_t B,
int  LDB 
)

PLASMA_zgesv_incpiv - Computes the solution to a system of linear equations A * X = B, where A is an N-by-N matrix and X and B are N-by-NRHS matrices. The tile LU decomposition with partial tile pivoting and row interchanges is used to factor A. The factored form of A is then used to solve the system of equations A * X = B.

Parameters:
[in]NThe number of linear equations, i.e., the order of the matrix A. N >= 0.
[in]NRHSThe number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in,out]AOn entry, the N-by-N coefficient matrix A. On exit, the tile L and U factors from the factorization (not equivalent to LAPACK).
[in]LDAThe leading dimension of the array A. LDA >= max(1,N).
[out]LOn exit, auxiliary factorization data, related to the tile L factor, necessary to solve the system of equations.
[out]IPIVOn exit, the pivot indices that define the permutations (not equivalent to LAPACK).
[in,out]BOn entry, the N-by-NRHS matrix of right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]LDBThe leading dimension of the array B. LDB >= max(1,N).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
>0if i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, so the solution could not be computed.
See also:
PLASMA_zgesv_incpiv_Tile
PLASMA_zgesv_incpiv_Tile_Async
PLASMA_cgesv_incpiv
PLASMA_dgesv_incpiv
PLASMA_sgesv_incpiv

Definition at line 73 of file zgesv_incpiv.c.

References L, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), PLASMA_FUNC_ZGESV, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zgesv_incpiv_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaComplexDouble, and plasma_sequence_t::status.

{
int NB, IB, IBNB, NT;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA, descB, descL;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_error("PLASMA_zgesv_incpiv", "PLASMA not initialized");
}
/* Check input arguments */
if (N < 0) {
plasma_error("PLASMA_zgesv_incpiv", "illegal value of N");
return -1;
}
if (NRHS < 0) {
plasma_error("PLASMA_zgesv_incpiv", "illegal value of NRHS");
return -2;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_zgesv_incpiv", "illegal value of LDA");
return -4;
}
if (LDB < max(1, N)) {
plasma_error("PLASMA_zgesv_incpiv", "illegal value of LDB");
return -8;
}
/* Quick return */
if (min(N, NRHS) == 0)
/* Tune NB & IB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZGESV, N, N, NRHS);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zgesv_incpiv", "plasma_tune() failed");
return status;
}
/* Set NT & NTRHS */
NB = PLASMA_NB;
IB = PLASMA_IB;
IBNB = IB*NB;
NT = (N%NB==0) ? (N/NB) : (N/NB+1);
plasma_sequence_create(plasma, &sequence);
IB, NB, IBNB,
NT*IB, NT*NB, 0, 0, NT*IB, NT*NB);
descL.mat = L;
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N , plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N );
plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
}
/* Call the tile interface */
PLASMA_zgesv_incpiv_Tile_Async(&descA, &descL, IPIV, &descB, sequence, &request);
plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
plasma_zooptile2lap( descB, B, NB, NB, LDB, NRHS );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgesvd ( PLASMA_enum  jobu,
PLASMA_enum  jobvt,
int  M,
int  N,
PLASMA_Complex64_t A,
int  LDA,
double *  S,
PLASMA_Complex64_t U,
int  LDU,
PLASMA_Complex64_t VT,
int  LDVT,
PLASMA_desc descT 
)

PLASMA_zgesvd - computes the singular value decomposition (SVD) of a complex M-by-N matrix A, optionally computing the left and/or right singular vectors. The SVD is written

 A = U * SIGMA * transpose(V)

where SIGMA is an M-by-N matrix which is zero except for its min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA are the singular values of A; they are real and non-negative, and are returned in descending order. The first min(m,n) columns of U and V are the left and right singular vectors of A.

Note that the routine returns V**T, not V. Not LAPACK Compliant for now! Note: Only PlasmaNoVec supported!

Parameters:
[in]jobuSpecifies options for computing all or part of the matrix U. Intended usage: = PlasmaVec: all M columns of U are returned in array U; = PlasmaNoVec: no columns of U (no left singular vectors) are computed. Note: Only PlasmaNoVec supported!
[in]jobvtSpecifies options for computing all or part of the matrix V**H. Intended usage: = PlasmaVec: all M columns of U are returned in array U; = PlasmaNoVec: no columns of U (no left singular vectors) are computed. Note: Only PlasmaNoVec supported!
[in]MThe number of rows of the matrix A. M >= 0.
[in]NThe number of columns of the matrix A. N >= 0.
[in,out]AOn entry, the M-by-N matrix A. On exit, if JOBU = 'O', A is overwritten with the first min(m,n) columns of U (the left singular vectors, stored columnwise); if JOBVT = 'O', A is overwritten with the first min(m,n) rows of V**H (the right singular vectors, stored rowwise); if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A are destroyed.
[in]LDAThe leading dimension of the array A. LDA >= max(1,M).
[out]SThe double precision singular values of A, sorted so that S(i) >= S(i+1).
[out]U(LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. If JOBU = 'A', U contains the M-by-M unitary matrix U; if JOBU = 'S', U contains the first min(m,n) columns of U (the left singular vectors, stored columnwise); if JOBU = 'N' or 'O', U is not referenced.
[in]LDUThe leading dimension of the array U. LDU >= 1; if JOBU = 'S' or 'A', LDU >= M.
[out]VTIf JOBVT = 'A', VT contains the N-by-N unitary matrix V**H; if JOBVT = 'S', VT contains the first min(m,n) rows of V**H (the right singular vectors, stored rowwise); if JOBVT = 'N' or 'O', VT is not referenced.
[in]LDVTThe leading dimension of the array VT. LDVT >= 1; if JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
[in,out]descTOn entry, descriptor as return by PLASMA_Alloc_Workspace_zgesvd On exit, contains auxiliary factorization data.
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
See also:
PLASMA_zgesvd_Tile
PLASMA_zgesvd_Tile_Async
PLASMA_cgesvd
PLASMA_dgesvd
PLASMA_sgesvd

Definition at line 123 of file zgesvd.c.

References plasma_desc_t::m, max, min, plasma_desc_t::n, plasma_context_self(), plasma_desc_check(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGESVD, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zgesvd_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaNoVec, PlasmaVec, and plasma_sequence_t::status.

{
int NB, IB, MT, NT;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA, descU, descVT;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zgesvd", "PLASMA not initialized");
}
/* Tune NB & IB depending on M & N; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZGESVD, M, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zgesvd", "plasma_tune() failed");
return status;
}
/* Set MT, NT */
NB = PLASMA_NB;
IB = PLASMA_IB;
MT = (M%NB==0) ? (M/NB) : (M/NB+1);
NT = (N%NB==0) ? (N/NB) : (N/NB+1);
/* Check input arguments */
if (jobu != PlasmaNoVec && jobu !=PlasmaVec) {
plasma_error("PLASMA_zgesvd", "illegal value of jobu");
return -1;
}
if (jobvt != PlasmaNoVec && jobvt != PlasmaVec) {
plasma_error("PLASMA_zgesvd", "illegal value of jobvt");
return -2;
}
if (M < 0) {
plasma_error("PLASMA_zgesvd", "illegal value of M");
return -3;
}
if (N < 0) {
plasma_error("PLASMA_zgesvd", "illegal value of N");
return -4;
}
if (LDA < max(1, M)) {
plasma_error("PLASMA_zgesvd", "illegal value of LDA");
return -6;
}
if (LDU < 1) {
plasma_error("PLASMA_zgesvd", "illegal value of LDU");
return -9;
}
if (LDVT < 1) {
plasma_error("PLASMA_zgesvd", "illegal value of LDVT");
return -11;
}
if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) ||
( descT->m != MT*IB ) || (descT->n != NT*NB) ) {
plasma_error("PLASMA_zgesvd", "invalid T descriptor");
return -12;
}
/* Quick return */
if (min(M, N) == 0) {
}
if (jobu == PlasmaVec) {
plasma_error("PLASMA_zgesvd", "computing the singular vectors is not supported in this version");
return -1;
}
if (jobvt == PlasmaVec) {
plasma_error("PLASMA_zgesvd", "computing the singular vectors is not supported in this version");
return -2;
}
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
if (jobu == PlasmaVec){
plasma_zooplap2tile( descU, U, NB, NB, LDU, M, 0, 0, M, M, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descU)));
}
if (jobvt == PlasmaVec){
plasma_zooplap2tile( descVT, VT, NB, NB, LDVT, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descU)); plasma_desc_mat_free(&(descVT)));
}
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N);
if (jobu == PlasmaVec){
plasma_ziplap2tile( descU, U, NB, NB, LDU, M, 0, 0, M, M);
}
if (jobvt == PlasmaVec){
plasma_ziplap2tile( descVT, VT, NB, NB, LDVT, N, 0, 0, N, N);
}
}
/* Call the tile interface */
PLASMA_zgesvd_Tile_Async(jobu, jobvt, &descA, S, &descU, &descVT, descT, sequence, &request);
plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
if (jobu == PlasmaVec){
plasma_zooptile2lap( descU, U, NB, NB, LDU, M );
}
if (jobvt == PlasmaVec){
plasma_zooptile2lap( descVT, VT, NB, NB, LDVT, N );
}
if (jobu == PlasmaVec){
}
if (jobvt == PlasmaVec){
}
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
if (jobu == PlasmaVec){
plasma_ziptile2lap( descU, U, NB, NB, LDU, M );
}
if (jobvt == PlasmaVec){
plasma_ziptile2lap( descVT, VT, NB, NB, LDVT, N );
}
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgetrf ( int  M,
int  N,
PLASMA_Complex64_t A,
int  LDA,
int *  IPIV 
)

PLASMA_zgetrf - Computes an LU factorization of a general M-by-N matrix A using the tile LU algorithm with partial tile pivoting with row interchanges.

Parameters:
[in]MThe number of rows of the matrix A. M >= 0.
[in]NThe number of columns of the matrix A. N >= 0.
[in,out]AOn entry, the M-by-N matrix to be factored. On exit, the tile factors L and U from the factorization.
[in]LDAThe leading dimension of the array A. LDA >= max(1,M).
[out]IPIVThe pivot indices that define the permutations.
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
>0if i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
See also:
PLASMA_zgetrf_Tile
PLASMA_zgetrf_Tile_Async
PLASMA_cgetrf
PLASMA_dgetrf
PLASMA_sgetrf

Definition at line 62 of file zgetrf.c.

References A, plasma_desc_t::mat, max, plasma_desc_t::mb, min, plasma_desc_t::mt, plasma_context_self(), plasma_desc_init(), plasma_dynamic_call_4, plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGESV, PLASMA_NB, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, plasma_tune(), PlasmaComplexDouble, and plasma_sequence_t::status.

{
int NB, NBNB, minMN;
int status;
PLASMA_desc descA ;
PLASMA_sequence *sequence = NULL;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zgetrf", "PLASMA not initialized");
}
/* Check input arguments */
if (M < 0) {
plasma_error("PLASMA_zgetrf", "illegal value of M");
return -1;
}
if (N < 0) {
plasma_error("PLASMA_zgetrf", "illegal value of N");
return -2;
}
if (LDA < max(1, M)) {
plasma_error("PLASMA_zgetrf", "illegal value of LDA");
return -4;
}
/* Quick return */
if (min(M, N) == 0)
/* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
status = plasma_tune(PLASMA_FUNC_ZGESV, M, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zgetrf", "plasma_tune() failed");
return status;
}
/* Set NT & NTRHS */
NB = PLASMA_NB;
NBNB = NB*NB;
plasma_sequence_create(plasma, &sequence);
NB, NB, NBNB,
LDA, N, 0, 0, M, N);
descA.mat = A;
minMN = min(M, N);
memset(IPIV, 0, minMN*sizeof(int));
/* Call the tile interface */
plasma_dynamic_call_4(plasma_pzgetrf_reclap,
PLASMA_desc, descA,
int*, IPIV,
PLASMA_sequence*, sequence,
PLASMA_request*, &request);
/*
* Generate the correct IPIV (Has to be moved in a task)
*/
{
int i, inc, tmp, j;
for(i=1; i<descA.mt; i++) {
inc = i*descA.mb;
tmp = min( minMN - inc, descA.mb);
if ( tmp < 1 )
break;
for (j=0; j<tmp; j++)
IPIV[inc+j] = IPIV[inc+j] + inc;
}
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgetrf_incpiv ( int  M,
int  N,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t L,
int *  IPIV 
)

PLASMA_zgetrf_incpiv - Computes an LU factorization of a general M-by-N matrix A using the tile LU algorithm with partial tile pivoting with row interchanges.

Parameters:
[in]MThe number of rows of the matrix A. M >= 0.
[in]NThe number of columns of the matrix A. N >= 0.
[in,out]AOn entry, the M-by-N matrix to be factored. On exit, the tile factors L and U from the factorization.
[in]LDAThe leading dimension of the array A. LDA >= max(1,M).
[out]LOn exit, auxiliary factorization data, related to the tile L factor, required by PLASMA_zgetrs_incpiv to solve the system of equations.
[out]IPIVThe pivot indices that define the permutations (not equivalent to LAPACK).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
>0if i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
See also:
PLASMA_zgetrf_incpiv_Tile
PLASMA_zgetrf_incpiv_Tile_Async
PLASMA_cgetrf_incpiv
PLASMA_dgetrf_incpiv
PLASMA_sgetrf_incpiv
PLASMA_zgetrs_incpiv

Definition at line 65 of file zgetrf_incpiv.c.

References L, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGESV, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zgetrf_incpiv_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaComplexDouble, and plasma_sequence_t::status.

{
int NB, IB, IBNB, MT, NT;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA, descL;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zgetrf_incpiv", "PLASMA not initialized");
}
/* Check input arguments */
if (M < 0) {
plasma_error("PLASMA_zgetrf_incpiv", "illegal value of M");
return -1;
}
if (N < 0) {
plasma_error("PLASMA_zgetrf_incpiv", "illegal value of N");
return -2;
}
if (LDA < max(1, M)) {
plasma_error("PLASMA_zgetrf_incpiv", "illegal value of LDA");
return -4;
}
/* Quick return */
if (min(M, N) == 0)
/* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
status = plasma_tune(PLASMA_FUNC_ZGESV, M, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zgetrf_incpiv", "plasma_tune() failed");
return status;
}
/* Set NT & NTRHS */
NB = PLASMA_NB;
IB = PLASMA_IB;
IBNB = IB*NB;
MT = (M%NB==0) ? (M/NB) : (M/NB+1);
NT = (N%NB==0) ? (N/NB) : (N/NB+1);
plasma_sequence_create(plasma, &sequence);
IB, NB, IBNB,
MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
descL.mat = L;
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N);
}
/* Call the tile interface */
PLASMA_zgetrf_incpiv_Tile_Async(&descA, &descL, IPIV, sequence, &request);
plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgetri ( int  N,
PLASMA_Complex64_t A,
int  LDA,
int *  IPIV 
)

PLASMA_zgetri - Computes the inverse of a matrix using the LU factorization computed by PLASMA_zgetrf. This method inverts U and then computes inv(A) by solving the system inv(A)*L = inv(U) for inv(A).

Parameters:
[in]NThe order of the matrix A. N >= 0.
[in,out]AOn entry, the triangular factor L or U from the factorization A = P*L*U as computed by PLASMA_zgetrf. On exit, if return value = 0, the inverse of the original matrix A.
[in]LDAThe leading dimension of the array A. LDA >= max(1,N).
[in]IPIVThe pivot indices that define the permutations as returned by PLASMA_zgetrf.
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
>0if i, the (i,i) element of the factor U is exactly zero; The matrix is singular and its inverse could not be computed.
See also:
PLASMA_zgetri_Tile
PLASMA_zgetri_Tile_Async
PLASMA_cgetri
PLASMA_dgetri
PLASMA_sgetri
PLASMA_zgetrf

Definition at line 63 of file zgetri.c.

References max, PLASMA_Alloc_Workspace_zgetri_Tile_Async(), plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGESV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zgetri_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, and plasma_sequence_t::status.

{
int NB;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA;
PLASMA_desc descW;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zgetri", "PLASMA not initialized");
}
/* Check input arguments */
if (N < 0) {
plasma_error("PLASMA_zgetri", "illegal value of N");
return -1;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_zgetri", "illegal value of LDA");
return -3;
}
/* Quick return */
if (max(N, 0) == 0)
/* Tune NB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZGESV, N, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zgetri", "plasma_tune() failed");
return status;
}
/* Set NT */
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N);
}
/* Allocate workspace */
/* Call the tile interface */
PLASMA_zgetri_Tile_Async(&descA, IPIV, &descW, sequence, &request);
plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgetrs ( PLASMA_enum  trans,
int  N,
int  NRHS,
PLASMA_Complex64_t A,
int  LDA,
int *  IPIV,
PLASMA_Complex64_t B,
int  LDB 
)

PLASMA_zgetrs - Solves a system of linear equations A * X = B, with a general N-by-N matrix A using the tile LU factorization computed by PLASMA_zgetrf.

Parameters:
[in]transIntended to specify the the form of the system of equations: = PlasmaNoTrans: A * X = B (No transpose) = PlasmaTrans: A**T * X = B (Transpose) = PlasmaConjTrans: A**H * X = B (Conjugate transpose)
[in]NThe order of the matrix A. N >= 0.
[in]NRHSThe number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]AThe tile factors L and U from the factorization, computed by PLASMA_zgetrf.
[in]LDAThe leading dimension of the array A. LDA >= max(1,N).
[in]IPIVThe pivot indices from PLASMA_zgetrf.
[in,out]BOn entry, the N-by-NRHS matrix of right hand side matrix B. On exit, the solution matrix X.
[in]LDBThe leading dimension of the array B. LDB >= max(1,N).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
Returns:
<0 if -i, the i-th argument had an illegal value
See also:
PLASMA_zgetrs_Tile
PLASMA_zgetrs_Tile_Async
PLASMA_cgetrs
PLASMA_dgetrs
PLASMA_sgetrs
PLASMA_zgetrf

Definition at line 72 of file zgetrs.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGESV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zgetrs_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaConjTrans, PlasmaNoTrans, PlasmaTrans, and plasma_sequence_t::status.

{
int NB;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA, descB;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zgetrs", "PLASMA not initialized");
}
/* Check input arguments */
if ( (trans != PlasmaNoTrans) &&
plasma_error("PLASMA_zgetrs", "illegal value of trans");
return -1;
}
if (N < 0) {
plasma_error("PLASMA_zgetrs", "illegal value of N");
return -2;
}
if (NRHS < 0) {
plasma_error("PLASMA_zgetrs", "illegal value of NRHS");
return -3;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_zgetrs", "illegal value of LDA");
return -5;
}
if (LDB < max(1, N)) {
plasma_error("PLASMA_zgetrs", "illegal value of LDB");
return -8;
}
/* Quick return */
if (min(N, NRHS) == 0)
/* Tune NB & IB depending on N & NRHS; Set NBNBSIZE */
status = plasma_tune(PLASMA_FUNC_ZGESV, N, N, NRHS);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zgetrs", "plasma_tune() failed");
return status;
}
/* Set NT & NTRHS */
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N , plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N );
plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
}
/* Call the tile interface */
PLASMA_zgetrs_Tile_Async(trans, &descA, IPIV, &descB, sequence, &request);
plasma_zooptile2lap( descB, B, NB, NB, LDB, NRHS );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

int PLASMA_zgetrs_incpiv ( PLASMA_enum  trans,
int  N,
int  NRHS,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t L,
int *  IPIV,
PLASMA_Complex64_t B,
int  LDB 
)

PLASMA_zgetrs_incpiv - Solves a system of linear equations A * X = B, with a general N-by-N matrix A using the tile LU factorization computed by PLASMA_zgetrf_incpiv.

Parameters:
[in]transIntended to specify the the form of the system of equations: = PlasmaNoTrans: A * X = B (No transpose) = PlasmaTrans: A**T * X = B (Transpose) = PlasmaConjTrans: A**H * X = B (Conjugate transpose) Currently only PlasmaNoTrans is supported.
[in]NThe order of the matrix A. N >= 0.
[in]NRHSThe number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]AThe tile factors L and U from the factorization, computed by PLASMA_zgetrf_incpiv.
[in]LDAThe leading dimension of the array A. LDA >= max(1,N).
[in]LAuxiliary factorization data, related to the tile L factor, computed by PLASMA_zgetrf_incpiv.
[in]IPIVThe pivot indices from PLASMA_zgetrf_incpiv (not equivalent to LAPACK).
[in,out]BOn entry, the N-by-NRHS matrix of right hand side matrix B. On exit, the solution matrix X.
[in]LDBThe leading dimension of the array B. LDB >= max(1,N).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
Returns:
<0 if -i, the i-th argument had an illegal value
See also:
PLASMA_zgetrs_incpiv_Tile
PLASMA_zgetrs_incpiv_Tile_Async
PLASMA_cgetrs_incpiv
PLASMA_dgetrs_incpiv
PLASMA_sgetrs_incpiv
PLASMA_zgetrf_incpiv

Definition at line 75 of file zgetrs_incpiv.c.

References L, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_NOT_SUPPORTED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGESV, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zgetrs_incpiv_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaComplexDouble, PlasmaNoTrans, and plasma_sequence_t::status.

{
int NB, IB, IBNB, NT;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA, descB, descL;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zgetrs_incpiv", "PLASMA not initialized");
}
/* Check input arguments */
if (trans != PlasmaNoTrans) {
plasma_error("PLASMA_zgetrs_incpiv", "only PlasmaNoTrans supported");
}
if (N < 0) {
plasma_error("PLASMA_zgetrs_incpiv", "illegal value of N");
return -2;
}
if (NRHS < 0) {
plasma_error("PLASMA_zgetrs_incpiv", "illegal value of NRHS");
return -3;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_zgetrs_incpiv", "illegal value of LDA");
return -5;
}
if (LDB < max(1, N)) {
plasma_error("PLASMA_zgetrs_incpiv", "illegal value of LDB");
return -9;
}
/* Quick return */
if (min(N, NRHS) == 0)
/* Tune NB & IB depending on N & NRHS; Set NBNBSIZE */
status = plasma_tune(PLASMA_FUNC_ZGESV, N, N, NRHS);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zgetrs_incpiv", "plasma_tune() failed");
return status;
}
/* Set NT & NTRHS */
NB = PLASMA_NB;
IB = PLASMA_IB;
IBNB = IB*NB;
NT = (N%NB==0) ? (N/NB) : (N/NB+1);
plasma_sequence_create(plasma, &sequence);
IB, NB, IBNB,
NT*IB, NT*NB, 0, 0, NT*IB, NT*NB);
descL.mat = L;
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N , plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N );
plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
}
/* Call the tile interface */
PLASMA_zgetrs_incpiv_Tile_Async(&descA, &descL, IPIV, &descB, sequence, &request);
plasma_zooptile2lap( descB, B, NB, NB, LDB, NRHS );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zheev ( PLASMA_enum  jobz,
PLASMA_enum  uplo,
int  N,
PLASMA_Complex64_t A,
int  LDA,
double *  W,
PLASMA_desc descT,
PLASMA_Complex64_t Q,
int  LDQ 
)

PLASMA_zheev - Computes all eigenvalues and, optionally, eigenvectors of a complex Hermitian matrix A. The matrix A is preliminary reduced to tridiagonal form using a two-stage approach: First stage: reduction to band tridiagonal form; Second stage: reduction from band to tridiagonal form. Note: Only PlasmaNoVec supported!

Parameters:
[in]jobzIntended usage: = PlasmaNoVec: computes eigenvalues only; = PlasmaVec: computes eigenvalues and eigenvectors. Note: Only PlasmaNoVec supported!
[in]uploSpecifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]NThe order of the matrix A. N >= 0.
[in,out]AOn entry, the symmetric (or Hermitian) matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, the lower triangle (if uplo = PlasmaLower) or the upper triangle (if uplo = PlasmaUpper) of A, including the diagonal, is destroyed.
[in]LDAThe leading dimension of the array A. LDA >= max(1,N).
[out]WOn exit, if info = 0, the eigenvalues.
[in,out]descTOn entry, descriptor as return by PLASMA_Alloc_Workspace_zheev On exit, contains auxiliary factorization data.
[out]QOn exit, if jobz = PlasmaVec and info = 0, the eigenvectors.
[in]LDQThe leading dimension of the array Q. LDQ >= max(1,N).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
>0if INFO = i, the algorithm failed to converge; i off-diagonal elements of an intermediate tridiagonal form did not converge to zero.
See also:
PLASMA_zheev_Tile
PLASMA_zheev_Tile_Async
PLASMA_cheev
PLASMA_dsyev
PLASMA_ssyev

Definition at line 96 of file zheev.c.

References plasma_desc_t::m, plasma_desc_t::mat, max, plasma_desc_t::n, plasma_context_self(), plasma_desc_check(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), PLASMA_FUNC_ZHEEV, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_zdesc_alloc, PLASMA_zheev_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaComplexDouble, PlasmaLower, PlasmaNoVec, PlasmaUpper, PlasmaVec, Q, and plasma_sequence_t::status.

{
int NB, IB, NT;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA, descQ;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_error("PLASMA_zheev", "PLASMA not initialized");
}
/* Tune NB & IB depending on N; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZHEEV, N, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zheev", "plasma_tune() failed");
return status;
}
/* Set NT */
NB = PLASMA_NB;
IB = PLASMA_IB;
NT = (N%NB==0) ? (N/NB) : (N/NB+1);
/* Check input arguments */
if (jobz != PlasmaNoVec && jobz != PlasmaVec) {
plasma_error("PLASMA_zheev", "illegal value of jobz");
return -1;
}
if (uplo != PlasmaLower && uplo != PlasmaUpper) {
plasma_error("PLASMA_zheev", "illegal value of uplo");
return -2;
}
if (N < 0) {
plasma_error("PLASMA_zheev", "illegal value of N");
return -3;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_zheev", "illegal value of LDA");
return -5;
}
if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) ||
( descT->m != NT*IB ) || (descT->n != NT*NB) ) {
plasma_error("PLASMA_zheev", "invalid T descriptor");
return -7;
}
if (LDQ < max(1, N)) {
plasma_error("PLASMA_zheev", "illegal value of LDQ");
return -9;
}
/* Quick return */
if (N == 0)
if (jobz == PlasmaVec) {
plasma_error("PLASMA_zheev", "computing the eigenvectors is not supported in this version");
return -1;
}
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
if (jobz == PlasmaVec) {
/* No need for conversion, it's just output */
plasma_zdesc_alloc( descQ, NB, NB, LDQ, N, 0, 0, N, N,
}
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N );
if (jobz == PlasmaVec) {
/* No need for conversion, it's just output */
PlasmaComplexDouble, NB, NB, NB*NB,
LDQ, N, 0, 0, N, N);
descQ.mat = Q;
}
}
/* Call the tile interface */
PLASMA_zheev_Tile_Async(jobz, uplo, &descA, W, descT, &descQ, sequence, &request);
plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
if (jobz == PlasmaVec) {
plasma_zooptile2lap( descQ, Q, NB, NB, LDQ, N );
}
if (jobz == PlasmaVec)
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
if (jobz == PlasmaVec)
plasma_ziptile2lap( descQ, Q, NB, NB, LDQ, N );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zheevd ( PLASMA_enum  jobz,
PLASMA_enum  uplo,
int  N,
PLASMA_Complex64_t A,
int  LDA,
double *  W,
PLASMA_desc T,
PLASMA_Complex64_t Q,
int  LDQ 
)

PLASMA_zheevd - Computes all eigenvalues and, optionally, eigenvectors of a complex Hermitian matrix A. The matrix A is preliminary reduced to tridiagonal form using a two-stage approach: First stage: reduction to band tridiagonal form; Second stage: reduction from band to tridiagonal form. Note: Only PlasmaNoVec supported!

Parameters:
[in]jobzIntended usage: = PlasmaNoVec: computes eigenvalues only; = PlasmaVec: computes eigenvalues and eigenvectors. Note: Only PlasmaNoVec supported!
[in]uploSpecifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]NThe order of the matrix A. N >= 0.
[in,out]AOn entry, the symmetric (or Hermitian) matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, the lower triangle (if uplo = PlasmaLower) or the upper triangle (if uplo = PlasmaUpper) of A, including the diagonal, is destroyed.
[in]LDAThe leading dimension of the array A. LDA >= max(1,N).
[out]WOn exit, if info = 0, the eigenvalues.
[in,out]TOn entry, descriptor as return by PLASMA_Alloc_Workspace_zheev On exit, contains auxiliary factorization data.
[out]QOn exit, if jobz = PlasmaVec and info = 0, the eigenvectors.
[in]LDQThe leading dimension of the array Q. LDQ >= max(1,N).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
>0if INFO = i, the algorithm failed to converge; i off-diagonal elements of an intermediate tridiagonal form did not converge to zero.
See also:
PLASMA_zheevd_Tile
PLASMA_zheevd_Tile_Async
PLASMA_cheevd
PLASMA_dsyevd
PLASMA_ssyevd

Definition at line 97 of file zheevd.c.

References plasma_desc_t::mat, max, plasma_context_self(), plasma_desc_check(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), PLASMA_FUNC_ZHEEV, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_zdesc_alloc, PLASMA_zheevd_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaComplexDouble, PlasmaLower, PlasmaNoVec, PlasmaUpper, PlasmaVec, Q, and plasma_sequence_t::status.

{
int NB, IB, IBNB, NT;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA, descQ;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_error("PLASMA_zheevd", "PLASMA not initialized");
}
/* Set NT */
NB = PLASMA_NB;
IB = PLASMA_IB;
IBNB = IB*NB;
NT = (N%NB==0) ? (N/NB) : (N/NB+1);
/* Check input arguments */
if (jobz != PlasmaNoVec && jobz != PlasmaVec) {
plasma_error("PLASMA_zheevd", "illegal value of jobz");
return -1;
}
if (uplo != PlasmaLower && uplo != PlasmaUpper) {
plasma_error("PLASMA_zheevd", "illegal value of uplo");
return -2;
}
if (N < 0) {
plasma_error("PLASMA_zheevd", "illegal value of N");
return -3;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_zheevd", "illegal value of LDA");
return -5;
}
if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) ||
( descT->m != NT*IB ) || (descT->n != NT*NB) ) {
plasma_error("PLASMA_zhegv", "invalid T descriptor");
return -7;
}
if (LDQ < max(1, N)) {
plasma_error("PLASMA_zheevd", "illegal value of LDQ");
return -9;
}
/* Quick return */
if (N == 0)
if (jobz == PlasmaVec) {
plasma_error("PLASMA_zheevd", "computing the eigenvectors is not supported in this version");
return -1;
}
/* Tune NB & IB depending on N; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZHEEV, N, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zheevd", "plasma_tune() failed");
return status;
}
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
if (jobz == PlasmaVec) {
/* No need for conversion, it's just output */
plasma_zdesc_alloc( descQ, NB, NB, LDQ, N, 0, 0, N, N,
}
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N );
if (jobz == PlasmaVec) {
/* No need for conversion, it's just output */
PlasmaComplexDouble, NB, NB, NB*NB,
LDQ, N, 0, 0, N, N);
descQ.mat = Q;
}
}
/* Call the tile interface */
PLASMA_zheevd_Tile_Async(jobz, uplo, &descA, &descT, W, &descQ, sequence, &request);
plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
if (jobz == PlasmaVec) {
plasma_zooptile2lap( descQ, Q, NB, NB, LDQ, N );
}
if (jobz == PlasmaVec)
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
if (jobz == PlasmaVec)
plasma_ziptile2lap( descQ, Q, NB, NB, LDQ, N );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zhegst ( PLASMA_enum  itype,
PLASMA_enum  uplo,
int  N,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t B,
int  LDB 
)

PLASMA_zhegst - reduces a complex Hermitian-definite generalized eigenproblem to standard form. If PlasmaItype == 1, the problem is A*x = lambda*B*x, and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) If PlasmaItype == 2 or 3, the problem is A*B*x = lambda*x or B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L. B must have been previously factorized as U**H*U or L*L**H by PLASMA_ZPOTRF.

Parameters:
[in]PlasmaItypeIntended usage: = 1: A*x=(lambda)*B*x = 2: A*Bx=(lambda)*x = 3: B*A*x=(lambda)*x
[in]uploSpecifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]NThe order of the matrices A and B. N >= 0.
[in,out]AOn entry, the symmetric (or Hermitian) matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if return value == 0, the transformed matrix, stored in the same format as A.
[in]LDAThe leading dimension of the array A. LDA >= max(1,N).
[in,out]BOn entry, the triangular factor from the Cholesky factorization of B, as returned by PLASMA_ZPOTRF.
[in]LDBThe leading dimension of the array B. LDB >= max(1,N).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
See also:
PLASMA_zhegst_Tile
PLASMA_zhegst_Tile_Async
PLASMA_chegst
PLASMA_dsygst
PLASMA_ssygst

Definition at line 85 of file zhegst.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZHEGST, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zhegst_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaLower, PlasmaUpper, and plasma_sequence_t::status.

{
int NB;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA, descB;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zhegst", "PLASMA not initialized");
}
/* Check input arguments */
if (itype != 1 && itype != 2 && itype != 3) {
plasma_error("PLASMA_zhegst", "Illegal value of itype");
return -1;
}
if (uplo != PlasmaUpper && uplo != PlasmaLower) {
plasma_error("PLASMA_zhegst", "Illegal value of uplo");
return -2;
}
if (N < 0) {
plasma_error("PLASMA_zhegst", "illegal value of N");
return -3;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_zhegst", "illegal value of LDA");
return -5;
}
if (LDB < max(1, N)) {
plasma_error("PLASMA_zhegst", "illegal value of LDB");
return -7;
}
/* Quick return */
if (N == 0)
/* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
status = plasma_tune(PLASMA_FUNC_ZHEGST, N, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zhegst", "plasma_tune() failed");
return status;
}
/* Set NT */
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, N, N, plasma_desc_mat_free(&(descB)) );
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N);
plasma_ziplap2tile( descB, B, NB, NB, LDB, N, 0, 0, N, N);
}
/* Call the tile interface */
PLASMA_zhegst_Tile_Async(itype, uplo, &descA, &descB, sequence, &request);
plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
plasma_zooptile2lap( descB, B, NB, NB, LDB, N );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
plasma_ziptile2lap( descB, B, NB, NB, LDB, N );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zhegv ( PLASMA_enum  itype,
PLASMA_enum  jobz,
PLASMA_enum  uplo,
int  N,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t B,
int  LDB,
double *  W,
PLASMA_desc descT,
PLASMA_Complex64_t Q,
int  LDQ 
)

PLASMA_zhegv - Computes all eigenvalues and, optionally, eigenvectors of a complex generalized Hermitian-definite eigenproblem of the form: A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and B are assumed to be Hermitian and B is also positive definite. Note: Only PlasmaNoVec supported!

Parameters:
[in]PlasmaItypeIntended usage: = 1: A*x=(lambda)*B*x = 2: A*Bx=(lambda)*x = 3: B*A*x=(lambda)*x
[in]jobzIntended usage: = PlasmaNoVec: computes eigenvalues only; = PlasmaVec: computes eigenvalues and eigenvectors. Note: Only PlasmaNoVec supported!
[in]uploSpecifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A and B are stored; = PlasmaLower: Lower triangle of A and B are stored.
[in]NThe order of the matrix A. N >= 0.
[in,out]AOn entry, the symmetric (or Hermitian) matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if jobz = PlasmaVec, then if return value = 0, A contains the matrix Z of eigenvectors. The eigenvectors are normalized as follows: if ITYPE = 1 or 2, Z**H*B*Z = I; if ITYPE = 3, Z**H*inv(B)*Z = I. If jobz = PlasmaNoVec, then on exit the lower triangle (if uplo = PlasmaLower) or the upper triangle (if uplo = PlasmaUpper) of A, including the diagonal, is destroyed.
[in]LDAThe leading dimension of the array A. LDA >= max(1,N).
[in,out]BOn entry, the symmetric (or Hermitian) positive definite matrix B. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of B contains the upper triangular part of the matrix B, and the strictly lower triangular part of B is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of B contains the lower triangular part of the matrix B, and the strictly upper triangular part of B is not referenced. On exit, if return value <= N, the part of B containing the matrix is overwritten by the triangular factor U or L from the Cholesky factorization B = U**H*U or B = L*L**H.
[in]LDBThe leading dimension of the array B. LDA >= max(1,N).
[out]WOn exit, if info = 0, the eigenvalues.
[in,out]descTOn entry, descriptor as return by PLASMA_Alloc_Workspace_zhegv On exit, contains auxiliary factorization data.
[out]QOn exit, if jobz = PlasmaVec and info = 0, the eigenvectors.
[in]LDQThe leading dimension of Q.
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
<=Nif INFO = i, plasma_zhegv failed to converge; i off-diagonal elements of an intermediate tridiagonal form did not converge to zero.
>Nif INFO = N + i, for 1 <= i <= N, then the leading minor of order i of B is not positive definite. The factorization of B could not be completed and no eigenvalues or eigenvectors were computed.
See also:
PLASMA_zhegv_Tile
PLASMA_zhegv_Tile_Async
PLASMA_chegv
PLASMA_dsygv
PLASMA_ssygv

Definition at line 128 of file zhegv.c.

References plasma_desc_t::m, plasma_desc_t::mat, max, plasma_desc_t::n, plasma_context_self(), plasma_desc_check(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), PLASMA_FUNC_ZHEGV, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_zdesc_alloc, PLASMA_zhegv_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaComplexDouble, PlasmaLower, PlasmaNoVec, PlasmaUpper, PlasmaVec, Q, and plasma_sequence_t::status.

{
int NB, IB, NT;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA, descB, descQ;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_error("PLASMA_zhegv", "PLASMA not initialized");
}
/* Tune NB & IB depending on N; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZHEGV, N, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zhegv", "plasma_tune() failed");
return status;
}
/* Set NT */
NB = PLASMA_NB;
IB = PLASMA_IB;
NT = (N%NB==0) ? (N/NB) : (N/NB+1);
/* Check input arguments */
if (itype != 1 && itype != 2 && itype != 3) {
plasma_error("PLASMA_zhegv", "Illegal value of itype");
return -1;
}
if (jobz != PlasmaNoVec && jobz != PlasmaVec) {
plasma_error("PLASMA_zhegv", "illegal value of jobz");
return -2;
}
plasma_error("PLASMA_zhegv", "only PlasmaLower supported");
return -3;
}
if (N < 0) {
plasma_error("PLASMA_zhegv", "illegal value of N");
return -4;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_zhegv", "illegal value of LDA");
return -6;
}
if (LDB < max(1, N)) {
plasma_error("PLASMA_zhegv", "illegal value of LDB");
return -8;
}
if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) ||
( descT->m != NT*IB ) || (descT->n != NT*NB) ) {
plasma_error("PLASMA_zhegv", "invalid T descriptor");
return -10;
}
if (LDQ < max(1, N)) {
plasma_error("PLASMA_zhegv", "illegal value of LDQ");
return -12;
}
/* Quick return */
if (N == 0)
if (jobz == PlasmaVec) {
plasma_error("PLASMA_zhegv", "computing the eigenvectors is not supported in this version");
return -1;
}
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N,
plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, N, N,
if (jobz == PlasmaVec) {
/* No need for conversion, it's just output */
plasma_zdesc_alloc( descQ, NB, NB, LDQ, N, 0, 0, N, N,
}
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N );
plasma_ziplap2tile( descB, B, NB, NB, LDB, N, 0, 0, N, N );
if (jobz == PlasmaVec) {
/* No need for conversion, it's just output */
PlasmaComplexDouble, NB, NB, NB*NB,
LDQ, N, 0, 0, N, N);
descQ.mat = Q;
}
}
/* Call the tile interface */
&descA, &descB, W,
descT, &descQ,
sequence, &request);
plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
plasma_zooptile2lap( descB, B, NB, NB, LDB, N );
if (jobz == PlasmaVec) {
plasma_zooptile2lap( descQ, Q, NB, NB, LDQ, N );
}
if (jobz == PlasmaVec)
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
plasma_ziptile2lap( descB, B, NB, NB, LDB, N );
if (jobz == PlasmaVec)
plasma_ziptile2lap( descQ, Q, NB, NB, LDQ, N );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zhemm ( PLASMA_enum  side,
PLASMA_enum  uplo,
int  M,
int  N,
PLASMA_Complex64_t  alpha,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t B,
int  LDB,
PLASMA_Complex64_t  beta,
PLASMA_Complex64_t C,
int  LDC 
)

PLASMA_zhemm - Performs one of the matrix-matrix operations

\[ C = \alpha \times A \times B + \beta \times C \]

or

\[ C = \alpha \times B \times A + \beta \times C \]

where alpha and beta are scalars, A is an hermitian matrix and B and C are m by n matrices.

Parameters:
[in]sideSpecifies whether the hermitian matrix A appears on the left or right in the operation as follows: = PlasmaLeft:

\[ C = \alpha \times A \times B + \beta \times C \]

= PlasmaRight:

\[ C = \alpha \times B \times A + \beta \times C \]

[in]uploSpecifies whether the upper or lower triangular part of the hermitian matrix A is to be referenced as follows: = PlasmaLower: Only the lower triangular part of the hermitian matrix A is to be referenced. = PlasmaUpper: Only the upper triangular part of the hermitian matrix A is to be referenced.
[in]MSpecifies the number of rows of the matrix C. M >= 0.
[in]NSpecifies the number of columns of the matrix C. N >= 0.
[in]alphaSpecifies the scalar alpha.
[in]AA is a LDA-by-ka matrix, where ka is M when side = PlasmaLeft, and is N otherwise. Only the uplo triangular part is referenced.
[in]LDAThe leading dimension of the array A. LDA >= max(1,ka).
[in]BB is a LDB-by-N matrix, where the leading M-by-N part of the array B must contain the matrix B.
[in]LDBThe leading dimension of the array B. LDB >= max(1,M).
[in]betaSpecifies the scalar beta.
[in,out]CC is a LDC-by-N matrix. On exit, the array is overwritten by the M by N updated matrix.
[in]LDCThe leading dimension of the array C. LDC >= max(1,M).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
See also:
PLASMA_zhemm_Tile
PLASMA_chemm
PLASMA_dhemm
PLASMA_shemm

Definition at line 94 of file zhemm.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZHEMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zhemm_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaLeft, PlasmaLower, PlasmaRight, PlasmaUpper, and plasma_sequence_t::status.

{
int NB;
int Am;
int status;
PLASMA_desc descA, descB, descC;
PLASMA_sequence *sequence = NULL;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zhemm", "PLASMA not initialized");
}
/* Check input arguments */
if ( (side != PlasmaLeft) && (side != PlasmaRight) ){
plasma_error("PLASMA_zhemm", "illegal value of side");
return -1;
}
if ((uplo != PlasmaLower) && (uplo != PlasmaUpper)) {
plasma_error("PLASMA_zhemm", "illegal value of uplo");
return -2;
}
Am = ( side == PlasmaLeft ) ? M : N;
if (M < 0) {
plasma_error("PLASMA_zhemm", "illegal value of M");
return -3;
}
if (N < 0) {
plasma_error("PLASMA_zhemm", "illegal value of N");
return -4;
}
if (LDA < max(1, Am)) {
plasma_error("PLASMA_zhemm", "illegal value of LDA");
return -7;
}
if (LDB < max(1, M)) {
plasma_error("PLASMA_zhemm", "illegal value of LDB");
return -9;
}
if (LDC < max(1, M)) {
plasma_error("PLASMA_zhemm", "illegal value of LDC");
return -12;
}
/* Quick return */
if (M == 0 || N == 0 ||
((alpha == (PLASMA_Complex64_t)0.0) && beta == (PLASMA_Complex64_t)1.0))
/* Tune NB depending on M, N & NRHS; Set NBNBSIZE */
status = plasma_tune(PLASMA_FUNC_ZHEMM, M, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zhemm", "plasma_tune() failed");
return status;
}
/* Set MT & NT & KT */
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, Am, 0, 0, Am, Am,
plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N,
plasma_zooplap2tile( descC, C, NB, NB, LDC, N, 0, 0, M, N,
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, Am, 0, 0, Am, Am );
plasma_ziplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N );
plasma_ziplap2tile( descC, C, NB, NB, LDC, N, 0, 0, M, N );
}
/* Call the tile interface */
side, uplo, alpha, &descA, &descB, beta, &descC, sequence, &request);
plasma_zooptile2lap( descC, C, NB, NB, LDC, N );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, Am );
plasma_ziptile2lap( descB, B, NB, NB, LDB, N );
plasma_ziptile2lap( descC, C, NB, NB, LDC, N );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zher2k ( PLASMA_enum  uplo,
PLASMA_enum  trans,
int  N,
int  K,
PLASMA_Complex64_t  alpha,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t B,
int  LDB,
double  beta,
PLASMA_Complex64_t C,
int  LDC 
)

PLASMA_zher2k - Performs one of the hermitian rank 2k operations

\[ C = \alpha [ op( A ) \times conjg( op( B )' )] + conjg( \alpha ) [ op( B ) \times conjg( op( A )' )] + \beta C \]

, or

\[ C = \alpha [ conjg( op( A )' ) \times op( B ) ] + conjg( \alpha ) [ conjg( op( B )' ) \times op( A ) ] + \beta C \]

,

where op( X ) is one of

op( X ) = X or op( X ) = conjg( X' )

where alpha and beta are real scalars, C is an n-by-n symmetric matrix and A and B are an n-by-k matrices the first case and k-by-n matrices in the second case.

Parameters:
[in]uplo= PlasmaUpper: Upper triangle of C is stored; = PlasmaLower: Lower triangle of C is stored.
[in]transSpecifies whether the matrix A is transposed or conjugate transposed: = PlasmaNoTrans:

\[ C = \alpha [ op( A ) \times conjg( op( B )' )] + conjg( \alpha ) [ op( B ) \times conjg( op( A )' )] + \beta C \]

= PlasmaConjTrans:

\[ C = \alpha [ conjg( op( A )' ) \times op( B ) ] + conjg( \alpha ) [ conjg( op( B )' ) \times op( A ) ] + \beta C \]

[in]NN specifies the order of the matrix C. N must be at least zero.
[in]KK specifies the number of columns of the A and B matrices with trans = PlasmaNoTrans. K specifies the number of rows of the A and B matrices with trans = PlasmaTrans.
[in]alphaalpha specifies the scalar alpha.
[in]AA is a LDA-by-ka matrix, where ka is K when trans = PlasmaNoTrans, and is N otherwise.
[in]LDAThe leading dimension of the array A. LDA must be at least max( 1, N ), otherwise LDA must be at least max( 1, K ).
[in]BB is a LDB-by-kb matrix, where kb is K when trans = PlasmaNoTrans, and is N otherwise.
[in]LDBThe leading dimension of the array B. LDB must be at least max( 1, N ), otherwise LDB must be at least max( 1, K ).
[in]betabeta specifies the scalar beta.
[in,out]CC is a LDC-by-N matrix. On exit, the array uplo part of the matrix is overwritten by the uplo part of the updated matrix.
[in]LDCThe leading dimension of the array C. LDC >= max( 1, N ).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
See also:
PLASMA_zher2k_Tile
PLASMA_cher2k
PLASMA_dher2k
PLASMA_sher2k

Definition at line 96 of file zher2k.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZHERK, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zher2k_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaConjTrans, PlasmaLower, PlasmaNoTrans, PlasmaUpper, and plasma_sequence_t::status.

{
int NB;
int Am, An;
int status;
PLASMA_desc descA, descB, descC;
PLASMA_sequence *sequence = NULL;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zher2k", "PLASMA not initialized");
}
/* Check input arguments */
if ((uplo != PlasmaUpper) && (uplo != PlasmaLower)) {
plasma_error("PLASMA_zher2k", "illegal value of uplo");
return -1;
}
plasma_error("PLASMA_zher2k", "illegal value of trans");
return -2;
}
if ( trans == PlasmaNoTrans ) {
Am = N; An = K;
} else {
Am = K; An = N;
}
if (N < 0) {
plasma_error("PLASMA_zher2k", "illegal value of N");
return -3;
}
if (K < 0) {
plasma_error("PLASMA_zher2k", "illegal value of K");
return -4;
}
if (LDA < max(1, Am)) {
plasma_error("PLASMA_zher2k", "illegal value of LDA");
return -7;
}
if (LDB < max(1, Am)) {
plasma_error("PLASMA_zher2k", "illegal value of LDB");
return -9;
}
if (LDC < max(1, N)) {
plasma_error("PLASMA_zher2k", "illegal value of LDC");
return -12;
}
/* Quick return */
if (N == 0 ||
((alpha == (PLASMA_Complex64_t)0.0 || K == 0.0) && beta == (double)1.0))
/* Tune NB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZHERK, N, K, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zher2k", "plasma_tune() failed");
return status;
}
/* Set MT & NT & KT */
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descB, B, NB, NB, LDB, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
plasma_zooplap2tile( descC, C, NB, NB, LDC, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descC)));
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An );
plasma_ziplap2tile( descB, B, NB, NB, LDB, An, 0, 0, Am, An );
plasma_ziplap2tile( descC, C, NB, NB, LDC, N, 0, 0, N, N );
}
/* Call the tile interface */
PLASMA_zher2k_Tile_Async(uplo, trans, alpha, &descA, &descB, beta, &descC, sequence, &request);
plasma_zooptile2lap( descC, C, NB, NB, LDC, N );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, An );
plasma_ziptile2lap( descB, B, NB, NB, LDB, An );
plasma_ziptile2lap( descC, C, NB, NB, LDC, N );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zherk ( PLASMA_enum  uplo,
PLASMA_enum  trans,
int  N,
int  K,
double  alpha,
PLASMA_Complex64_t A,
int  LDA,
double  beta,
PLASMA_Complex64_t C,
int  LDC 
)

PLASMA_zherk - Performs one of the hermitian rank k operations

\[ C = \alpha [ op( A ) \times conjg( op( A )' )] + \beta C \]

,

where op( X ) is one of

op( X ) = X or op( X ) = conjg( X' )

where alpha and beta are real scalars, C is an n-by-n hermitian matrix and A is an n-by-k matrix in the first case and a k-by-n matrix in the second case.

Parameters:
[in]uplo= PlasmaUpper: Upper triangle of C is stored; = PlasmaLower: Lower triangle of C is stored.
[in]transSpecifies whether the matrix A is transposed or conjugate transposed: = PlasmaNoTrans: A is not transposed; = PlasmaConjTrans: A is conjugate transposed.
[in]NN specifies the order of the matrix C. N must be at least zero.
[in]KK specifies the number of columns of the matrix op( A ).
[in]alphaalpha specifies the scalar alpha.
[in]AA is a LDA-by-ka matrix, where ka is K when trans = PlasmaNoTrans, and is N otherwise.
[in]LDAThe leading dimension of the array A. LDA must be at least max( 1, N ), otherwise LDA must be at least max( 1, K ).
[in]betabeta specifies the scalar beta
[in,out]CC is a LDC-by-N matrix. On exit, the array uplo part of the matrix is overwritten by the uplo part of the updated matrix.
[in]LDCThe leading dimension of the array C. LDC >= max( 1, N ).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
See also:
PLASMA_zherk_Tile
PLASMA_cherk
PLASMA_dherk
PLASMA_sherk

Definition at line 85 of file zherk.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZHERK, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zherk_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaConjTrans, PlasmaLower, PlasmaNoTrans, PlasmaUpper, and plasma_sequence_t::status.

{
int NB;
int Am, An;
int status;
PLASMA_desc descA, descC;
PLASMA_sequence *sequence = NULL;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zherk", "PLASMA not initialized");
}
/* Check input arguments */
if ((uplo != PlasmaUpper) && (uplo != PlasmaLower)) {
plasma_error("PLASMA_zherk", "illegal value of uplo");
return -1;
}
plasma_error("PLASMA_zherk", "illegal value of trans");
return -2;
}
if ( trans == PlasmaNoTrans ) {
Am = N; An = K;
} else {
Am = K; An = N;
}
if (N < 0) {
plasma_error("PLASMA_zherk", "illegal value of N");
return -3;
}
if (K < 0) {
plasma_error("PLASMA_zherk", "illegal value of K");
return -4;
}
if (LDA < max(1, Am)) {
plasma_error("PLASMA_zherk", "illegal value of LDA");
return -7;
}
if (LDC < max(1, N)) {
plasma_error("PLASMA_zherk", "illegal value of LDC");
return -10;
}
/* Quick return */
if (N == 0 ||
((alpha == (double)0.0 || K == 0.0) && beta == (double)1.0))
/* Tune NB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZHERK, N, K, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zherk", "plasma_tune() failed");
return status;
}
/* Set MT & NT & KT */
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descC, C, NB, NB, LDC, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descC)));
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An );
plasma_ziplap2tile( descC, C, NB, NB, LDC, N, 0, 0, N, N );
}
/* Call the tile interface */
PLASMA_zherk_Tile_Async(uplo, trans, alpha, &descA, beta, &descC, sequence, &request);
plasma_zooptile2lap( descC, C, NB, NB, LDC, N );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, An );
plasma_ziptile2lap( descC, C, NB, NB, LDC, N );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zhetrd ( PLASMA_enum  jobz,
PLASMA_enum  uplo,
int  N,
PLASMA_Complex64_t A,
int  LDA,
double *  D,
double *  E,
PLASMA_desc descT,
PLASMA_Complex64_t Q,
int  LDQ 
)

PLASMA_zhetrd - reduces a complex Hermitian matrix A to real symmetric tridiagonal form S using a two-stage approach First stage: reduction to band tridiagonal form (unitary Q1); Second stage: reduction from band to tridiagonal form (unitary Q2). Let Q = Q1 * Q2 be the global unitary transformation; Q**H * A * Q = S. Not LAPACK compliant as A does not contain the T elements Note: Only PlasmaNoVec supported!

Parameters:
[in]jobzIntended usage: = PlasmaNoVec: computes eigenvalues only; = PlasmaVec: computes eigenvalues and eigenvectors. Note: Only PlasmaNoVec supported!
[in]uploSpecifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]NThe order of the matrix A. N >= 0.
[in,out]AOn entry, the symmetric (or Hermitian) matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, the lower triangle (if uplo = PlasmaLower) or the upper triangle (if uplo = PlasmaUpper) of A, including the diagonal, is destroyed.
[in]LDAThe leading dimension of the array A. LDA >= max(1,N).
[out]DOn exit, the diagonal elements of the tridiagonal matrix: D(i) = A(i,i).
[out]EOn exit, he off-diagonal elements of the tridiagonal matrix: E(i) = A(i,i+1) if uplo = PlasmaUpper, E(i) = A(i+1,i) if uplo = PlasmaLower.
[in,out]descTOn entry, descriptor as return by PLASMA_Alloc_Workspace_zheev On exit, contains auxiliary factorization data.
[out]QOn exit, if jobz = PlasmaVec and info = 0, the eigenvectors.
[in]LDQThe leading dimension of the array Q. LDQ >= max(1,N).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
>0if INFO = i, the algorithm failed to converge; i off-diagonal elements of an intermediate tridiagonal form did not converge to zero.
See also:
PLASMA_zhetrd_Tile
PLASMA_zhetrd_Tile_Async
PLASMA_chetrd
PLASMA_dsytrd
PLASMA_ssytrd

Definition at line 100 of file zhetrd.c.

References plasma_desc_t::m, max, plasma_desc_t::n, plasma_context_self(), plasma_desc_check(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), PLASMA_FUNC_ZHETRD, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zhetrd_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaLower, PlasmaNoVec, PlasmaUpper, PlasmaVec, and plasma_sequence_t::status.

{
int NB, IB, NT;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA, descQ;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_error("PLASMA_zhetrd", "PLASMA not initialized");
}
/* Tune NB & IB depending on N; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZHETRD, N, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zhetrd", "plasma_tune() failed");
return status;
}
/* Set NT */
NB = PLASMA_NB;
IB = PLASMA_IB;
NT = (N%NB==0) ? (N/NB) : (N/NB+1);
/* Check input arguments */
if (jobz != PlasmaNoVec && jobz != PlasmaVec) {
plasma_error("PLASMA_zhetrd", "illegal value of jobz");
return -1;
}
if (uplo != PlasmaLower && uplo != PlasmaUpper) {
plasma_error("PLASMA_zhetrd", "illegal value of uplo");
return -2;
}
if (N < 0) {
plasma_error("PLASMA_zhetrd", "illegal value of N");
return -3;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_zhetrd", "illegal value of LDA");
return -5;
}
if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) ||
( descT->m != NT*IB ) || (descT->n != NT*NB) ) {
plasma_error("PLASMA_zhetrd", "invalid T descriptor");
return -8;
}
if (LDQ < max(1, N)) {
plasma_error("PLASMA_zhetrd", "illegal value of LDQ");
return -10;
}
/* Quick return */
if (N == 0)
if (jobz == PlasmaVec) {
plasma_error("PLASMA_zhetrd", "computing the eigenvectors is not supported in this version");
return -1;
}
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N , plasma_desc_mat_free(&(descA)) );
if (jobz == PlasmaVec) {
plasma_zooplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, N, N , plasma_desc_mat_free(&(descQ)) );
}
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N );
if (jobz == PlasmaVec)
plasma_ziplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, N, N );
}
/* Call the tile interface */
PLASMA_zhetrd_Tile_Async(jobz, uplo, &descA, D, E, descT, &descQ, sequence, &request);
plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
if (jobz == PlasmaVec) {
plasma_zooptile2lap( descQ, Q, NB, NB, LDQ, N );
}
if (jobz == PlasmaVec)
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
if (jobz == PlasmaVec)
plasma_ziptile2lap( descQ, Q, NB, NB, LDQ, N );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zlacpy ( PLASMA_enum  uplo,
int  M,
int  N,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t B,
int  LDB 
)

PLASMA_zlacpy copies all or part of a two-dimensional matrix A to another matrix B

Parameters:
[in]uploSpecifies the part of the matrix A to be copied to B. = PlasmaUpperLower: All the matrix A = PlasmaUpper: Upper triangular part = PlasmaLower: Lower triangular part
[in]MThe number of rows of the matrix A. M >= 0.
[in]NThe number of columns of the matrix A. N >= 0.
[in]AThe M-by-N matrix A. If uplo = PlasmaUpper, only the upper trapezium is accessed; if UPLO = PlasmaLower, only the lower trapezium is accessed.
[in]LDAThe leading dimension of the array A. LDA >= max(1,M).
[out]BThe M-by-N matrix B. On exit, B = A in the locations specified by UPLO.
[in]LDBThe leading dimension of the array B. LDB >= max(1,M).
See also:
PLASMA_zlacpy_Tile
PLASMA_zlacpy_Tile_Async
PLASMA_clacpy
PLASMA_dlacpy
PLASMA_slacpy

Definition at line 62 of file zlacpy.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGEMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, PLASMA_zlacpy_Tile_Async(), plasma_zooplap2tile, plasma_zooptile2lap, PlasmaLower, PlasmaUpper, and PlasmaUpperLower.

{
int NB;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA, descB;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zlacpy", "PLASMA not initialized");
}
/* Check input arguments */
if ( (uplo != PlasmaUpperLower) &&
(uplo != PlasmaUpper) &&
(uplo != PlasmaLower) ) {
plasma_error("PLASMA_zlacpy", "illegal value of uplo");
return -1;
}
if (M < 0) {
plasma_error("PLASMA_zlacpy", "illegal value of M");
return -2;
}
if (N < 0) {
plasma_error("PLASMA_zlacpy", "illegal value of N");
return -3;
}
if (LDA < max(1, M)) {
plasma_error("PLASMA_zlacpy", "illegal value of LDA");
return -5;
}
if (LDB < max(1, M)) {
plasma_error("PLASMA_zlacpy", "illegal value of LDB");
return -7;
}
/* Quick return */
if (min(N, M) == 0)
return (double)0.0;
/* Tune NB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZGEMM, M, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zlacpy", "plasma_tune() failed");
return status;
}
/* Set NT */
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)) );
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N);
plasma_ziplap2tile( descB, B, NB, NB, LDA, N, 0, 0, M, N);
}
/* Call the tile interface */
PLASMA_zlacpy_Tile_Async(uplo, &descA, &descB, sequence, &request);
plasma_zooptile2lap( descB, B, NB, NB, LDB, N );
} else {
plasma_ziptile2lap( descB, B, NB, NB, LDB, N );
}
plasma_sequence_destroy(plasma, sequence);
}

Here is the call graph for this function:

double PLASMA_zlange ( PLASMA_enum  norm,
int  M,
int  N,
PLASMA_Complex64_t A,
int  LDA,
double *  work 
)

PLASMA_zlange returns the value

zlange = ( max(abs(A(i,j))), NORM = PlasmaMaxNorm ( ( norm1(A), NORM = PlasmaOneNorm ( ( normI(A), NORM = PlasmaInfNorm ( ( normF(A), NORM = PlasmaFrobeniusNorm

where norm1 denotes the one norm of a matrix (maximum column sum), normI denotes the infinity norm of a matrix (maximum row sum) and normF denotes the Frobenius norm of a matrix (square root of sum of squares). Note that max(abs(A(i,j))) is not a consistent matrix norm.

Parameters:
[in]norm= PlasmaMaxNorm: Max norm = PlasmaOneNorm: One norm = PlasmaInfNorm: Infinity norm = PlasmaFrobeniusNorm: Frobenius norm
[in]MThe number of rows of the matrix A. M >= 0. When M = 0, the returned value is set to zero.
[in]NThe number of columns of the matrix A. N >= 0. When N = 0, the returned value is set to zero.
[in]AThe M-by-N matrix A.
[in]LDAThe leading dimension of the array A. LDA >= max(1,M).
[in]workdouble precision array of dimension (MAX(1,LWORK)), where LWORK >= M when NORM = PlasmaInfNorm; otherwise, WORK is not referenced.
Returns:
Return values:
thenorm described above.
See also:
PLASMA_zlange_Tile
PLASMA_zlange_Tile_Async
PLASMA_clange
PLASMA_dlange
PLASMA_slange

Definition at line 78 of file zlange.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGEMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, PLASMA_zlange_Tile_Async(), plasma_zooplap2tile, PlasmaFrobeniusNorm, PlasmaInfNorm, PlasmaMaxNorm, and PlasmaOneNorm.

{
int NB;
int status;
double value;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zlange", "PLASMA not initialized");
}
/* Check input arguments */
plasma_error("PLASMA_zlange", "illegal value of norm");
return -1;
}
if (M < 0) {
plasma_error("PLASMA_zlange", "illegal value of M");
return -2;
}
if (N < 0) {
plasma_error("PLASMA_zlange", "illegal value of N");
return -3;
}
if (LDA < max(1, M)) {
plasma_error("PLASMA_zlange", "illegal value of LDA");
return -5;
}
/* Quick return */
if (min(N, M) == 0)
return (double)0.0;
/* Tune NB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZGEMM, M, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zlange", "plasma_tune() failed");
return status;
}
/* Set NT */
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N);
}
/* Call the tile interface */
PLASMA_zlange_Tile_Async(norm, &descA, work, &value, sequence, &request);
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
}
plasma_sequence_destroy(plasma, sequence);
return value;
}

Here is the call graph for this function:

double PLASMA_zlanhe ( PLASMA_enum  norm,
PLASMA_enum  uplo,
int  N,
PLASMA_Complex64_t A,
int  LDA,
double *  work 
)

PLASMA_zlanhe returns the value

zlanhe = ( max(abs(A(i,j))), NORM = PlasmaMaxNorm ( ( norm1(A), NORM = PlasmaOneNorm ( ( normI(A), NORM = PlasmaInfNorm ( ( normF(A), NORM = PlasmaFrobeniusNorm

where norm1 denotes the one norm of a matrix (maximum column sum), normI denotes the infinity norm of a matrix (maximum row sum) and normF denotes the Frobenius norm of a matrix (square root of sum of squares). Note that max(abs(A(i,j))) is not a consistent matrix norm.

Parameters:
[in]norm= PlasmaMaxNorm: Max norm = PlasmaOneNorm: One norm = PlasmaInfNorm: Infinity norm = PlasmaFrobeniusNorm: Frobenius norm
[in]uplo= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]NThe number of columns/rows of the matrix A. N >= 0. When N = 0, the returned value is set to zero.
[in]AThe N-by-N matrix A.
[in]LDAThe leading dimension of the array A. LDA >= max(1,N).
[in]workdouble precision array of dimension PLASMA_SIZE is PLASMA_STATIC_SCHEDULING is used, and NULL otherwise.
Returns:
Return values:
thenorm described above.
See also:
PLASMA_zlanhe_Tile
PLASMA_zlanhe_Tile_Async
PLASMA_clanhe
PLASMA_dlanhe
PLASMA_slanhe

Definition at line 77 of file zlanhe.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGEMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, PLASMA_zlanhe_Tile_Async(), plasma_zooplap2tile, PlasmaFrobeniusNorm, PlasmaInfNorm, PlasmaLower, PlasmaMaxNorm, PlasmaOneNorm, and PlasmaUpper.

{
int NB;
int status;
double value;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zlanhe", "PLASMA not initialized");
}
/* Check input arguments */
plasma_error("PLASMA_zlanhe", "illegal value of norm");
return -1;
}
if ( (uplo != PlasmaUpper) && (uplo != PlasmaLower) ) {
plasma_error("PLASMA_zlanhe", "illegal value of uplo");
return -2;
}
if (N < 0) {
plasma_error("PLASMA_zlanhe", "illegal value of N");
return -3;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_zlanhe", "illegal value of LDA");
return -5;
}
/* Quick return */
if ( N == 0)
return (double)0.0;
/* Tune NB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZGEMM, N, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zlanhe", "plasma_tune() failed");
return status;
}
/* Set NT */
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N);
}
/* Call the tile interface */
PLASMA_zlanhe_Tile_Async(norm, uplo, &descA, work, &value, sequence, &request);
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
}
plasma_sequence_destroy(plasma, sequence);
return value;
}

Here is the call graph for this function:

double PLASMA_zlansy ( PLASMA_enum  norm,
PLASMA_enum  uplo,
int  N,
PLASMA_Complex64_t A,
int  LDA,
double *  work 
)

PLASMA_zlansy returns the value

zlansy = ( max(abs(A(i,j))), NORM = PlasmaMaxNorm ( ( norm1(A), NORM = PlasmaOneNorm ( ( normI(A), NORM = PlasmaInfNorm ( ( normF(A), NORM = PlasmaFrobeniusNorm

where norm1 denotes the one norm of a matrix (maximum column sum), normI denotes the infinity norm of a matrix (maximum row sum) and normF denotes the Frobenius norm of a matrix (square root of sum of squares). Note that max(abs(A(i,j))) is not a consistent matrix norm.

Parameters:
[in]norm= PlasmaMaxNorm: Max norm = PlasmaOneNorm: One norm = PlasmaInfNorm: Infinity norm = PlasmaFrobeniusNorm: Frobenius norm
[in]uplo= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]NThe number of columns/rows of the matrix A. N >= 0. When N = 0, the returned value is set to zero.
[in]AThe N-by-N matrix A.
[in]LDAThe leading dimension of the array A. LDA >= max(1,N).
[in]workdouble precision array of dimension PLASMA_SIZE is PLASMA_STATIC_SCHEDULING is used, and NULL otherwise.
Returns:
Return values:
thenorm described above.
See also:
PLASMA_zlansy_Tile
PLASMA_zlansy_Tile_Async
PLASMA_clansy
PLASMA_dlansy
PLASMA_slansy

Definition at line 77 of file zlansy.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGEMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, PLASMA_zlansy_Tile_Async(), plasma_zooplap2tile, PlasmaFrobeniusNorm, PlasmaInfNorm, PlasmaLower, PlasmaMaxNorm, PlasmaOneNorm, and PlasmaUpper.

{
int NB;
int status;
double value;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zlansy", "PLASMA not initialized");
}
/* Check input arguments */
plasma_error("PLASMA_zlansy", "illegal value of norm");
return -1;
}
if ( (uplo != PlasmaUpper) && (uplo != PlasmaLower) ) {
plasma_error("PLASMA_zlansy", "illegal value of uplo");
return -2;
}
if (N < 0) {
plasma_error("PLASMA_zlansy", "illegal value of N");
return -3;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_zlansy", "illegal value of LDA");
return -5;
}
/* Quick return */
if ( N == 0)
return (double)0.0;
/* Tune NB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZGEMM, N, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zlansy", "plasma_tune() failed");
return status;
}
/* Set NT */
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N);
}
/* Call the tile interface */
PLASMA_zlansy_Tile_Async(norm, uplo, &descA, work, &value, sequence, &request);
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
}
plasma_sequence_destroy(plasma, sequence);
return value;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zLapack_to_Tile ( PLASMA_Complex64_t Af77,
int  LDA,
PLASMA_desc A 
)

PLASMA_zLapack_to_Tile - Conversion from LAPACK layout to tile layout.

Parameters:
[in]Af77LAPACK matrix.
[in]LDAThe leading dimension of the matrix Af77.
[in,out]ADescriptor of the PLASMA matrix in tile layout. If PLASMA_TRANSLATION_MODE is set to PLASMA_INPLACE, A->mat is not used and set to Af77 when returns, else if PLASMA_TRANSLATION_MODE is set to PLASMA_OUTOFPLACE, A->mat has to be allocated before.
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
See also:
PLASMA_zLapack_to_Tile_Async
PLASMA_zTile_to_Lapack
PLASMA_cLapack_to_Tile
PLASMA_dLapack_to_Tile
PLASMA_sLapack_to_Tile

Definition at line 55 of file ztile.c.

References A, plasma_context_self(), plasma_desc_check(), plasma_dynamic_sync, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_5, plasma_pzlapack_to_tile(), plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
PLASMA_sequence *sequence = NULL;
PLASMA_request request;
int status;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zLapack_to_Tile", "PLASMA not initialized");
}
/* Check descriptor for correctness */
plasma_error("PLASMA_zLapack_to_Tile", "invalid descriptor");
}
plasma_sequence_create(plasma, &sequence);
int, LDA,
PLASMA_desc, descA,
PLASMA_sequence*, sequence,
PLASMA_request*, &request);
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zlaset ( PLASMA_enum  uplo,
int  M,
int  N,
PLASMA_Complex64_t  alpha,
PLASMA_Complex64_t  beta,
PLASMA_Complex64_t A,
int  LDA 
)

PLASMA_zlaset copies all or part of a two-dimensional matrix A to another matrix B

Parameters:
[in]uploSpecifies the part of the matrix A to be copied to B. = PlasmaUpperLower: All the matrix A = PlasmaUpper: Upper triangular part is set. The lower triangle is unchanged. = PlasmaLower: Lower triangular part is set. The upper triangle is unchange.
[in]MThe number of rows of the matrix A. M >= 0.
[in]NThe number of columns of the matrix A. N >= 0.
[in]alphaAll the offdiagonal array elements are set to alpha.
[in]betaAll the diagonal array elements are set to beta.
[in,out]AOn entry, the m by n matrix A. On exit, A(i,j) = ALPHA, 1 <= i <= m, 1 <= j <= n, i.ne.j; A(i,i) = BETA , 1 <= i <= min(m,n)
[in]LDAThe leading dimension of the array A. LDA >= max(1,M).
See also:
PLASMA_zlaset_Tile
PLASMA_zlaset_Tile_Async
PLASMA_claset
PLASMA_dlaset
PLASMA_slaset

Definition at line 63 of file zlaset.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGEMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, PLASMA_zlaset_Tile_Async(), plasma_zooplap2tile, PlasmaLower, PlasmaUpper, and PlasmaUpperLower.

{
int NB;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zlaset", "PLASMA not initialized");
}
/* Check input arguments */
if ( (uplo != PlasmaUpperLower) &&
(uplo != PlasmaUpper) &&
(uplo != PlasmaLower) ) {
plasma_error("PLASMA_zlaset", "illegal value of uplo");
return -1;
}
if (M < 0) {
plasma_error("PLASMA_zlaset", "illegal value of M");
return -2;
}
if (N < 0) {
plasma_error("PLASMA_zlaset", "illegal value of N");
return -3;
}
if (LDA < max(1, M)) {
plasma_error("PLASMA_zlaset", "illegal value of LDA");
return -5;
}
/* Quick return */
if (min(N, M) == 0)
return (double)0.0;
/* Tune NB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZGEMM, M, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zlaset", "plasma_tune() failed");
return status;
}
/* Set NT */
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N);
}
/* Call the tile interface */
PLASMA_zlaset_Tile_Async(uplo, alpha, beta, &descA, sequence, &request);
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
}
plasma_sequence_destroy(plasma, sequence);
}

Here is the call graph for this function:

int PLASMA_zlaswp ( int  N,
PLASMA_Complex64_t A,
int  LDA,
int  K1,
int  K2,
int *  IPIV,
int  INCX 
)

PLASMA_zlaswp - performs a series of row interchanges on the matrix A. One row interchange is initiated for each of rows K1 through K2 of A.

Parameters:
[in]NThe order of the matrix A. N >= 0.
[in]AThe tile factors L and U from the factorization, computed by PLASMA_zgetrf.
[in]LDAThe leading dimension of the array A. LDA >= max(1,N).
[in]K1The first element of IPIV for which a row interchange will be done.
[in]K2The last element of IPIV for which a row interchange will be done.
[in]IPIVThe pivot indices from PLASMA_zgetrf.
[in]INCXThe increment between successive values of IPIV. If IPIV is negative, the pivots are applied in reverse order.
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
Returns:
<0 if -i, the i-th argument had an illegal value
See also:
PLASMA_zlaswp_Tile
PLASMA_zlaswp_Tile_Async
PLASMA_claswp
PLASMA_dlaswp
PLASMA_slaswp
PLASMA_zgetrf

Definition at line 66 of file zlaswp.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGESV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, PLASMA_zlaswp_Tile_Async(), plasma_zooplap2tile, plasma_zooptile2lap, and plasma_sequence_t::status.

{
int NB;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zlaswp", "PLASMA not initialized");
}
/* Check input arguments */
if (N < 0) {
plasma_error("PLASMA_zlaswp", "illegal value of N");
return -1;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_zlaswp", "illegal value of LDA");
return -3;
}
/* Quick return */
if ( N == 0 )
/* Tune NB & IB depending on N & NRHS; Set NBNBSIZE */
status = plasma_tune(PLASMA_FUNC_ZGESV, LDA, N, N);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zlaswp", "plasma_tune() failed");
return status;
}
/* Set NT & NTRHS */
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K2, N, plasma_desc_mat_free(&(descA)) );
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K2, N);
}
/* Call the tile interface */
PLASMA_zlaswp_Tile_Async(&descA, K1, K2, IPIV, INCX, sequence, &request);
plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zlaswpc ( int  N,
PLASMA_Complex64_t A,
int  LDA,
int  K1,
int  K2,
int *  IPIV,
int  INCX 
)

PLASMA_zlaswpc - performs a series of row interchanges on the matrix A. One row interchange is initiated for each of rows K1 through K2 of A.

Parameters:
[in]NThe order of the matrix A. N >= 0.
[in]AThe tile factors L and U from the factorization, computed by PLASMA_zgetrf.
[in]LDAThe leading dimension of the array A. LDA >= max(1,N).
[in]K1The first element of IPIV for which a row interchange will be done.
[in]K2The last element of IPIV for which a row interchange will be done.
[in]IPIVThe pivot indices from PLASMA_zgetrf.
[in]INCXThe increment between successive values of IPIV. If IPIV is negative, the pivots are applied in reverse order.
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
Returns:
<0 if -i, the i-th argument had an illegal value
See also:
PLASMA_zlaswpc_Tile
PLASMA_zlaswpc_Tile_Async
PLASMA_claswpc
PLASMA_dlaswpc
PLASMA_slaswpc
PLASMA_zgetrf

Definition at line 66 of file zlaswpc.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGESV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, PLASMA_zlaswpc_Tile_Async(), plasma_zooplap2tile, plasma_zooptile2lap, and plasma_sequence_t::status.

{
int NB;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zlaswpc", "PLASMA not initialized");
}
/* Check input arguments */
if (N < 0) {
plasma_error("PLASMA_zlaswpc", "illegal value of N");
return -1;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_zlaswpc", "illegal value of LDA");
return -3;
}
/* Quick return */
if ( N == 0 )
/* Tune NB & IB depending on N & NRHS; Set NBNBSIZE */
status = plasma_tune(PLASMA_FUNC_ZGESV, LDA, N, N);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zlaswpc", "plasma_tune() failed");
return status;
}
/* Set NT & NTRHS */
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K2, N, plasma_desc_mat_free(&(descA)) );
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K2, N);
}
/* Call the tile interface */
PLASMA_zlaswpc_Tile_Async(&descA, K1, K2, IPIV, INCX, sequence, &request);
plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

int PLASMA_zlauum ( PLASMA_enum  uplo,
int  N,
PLASMA_Complex64_t A,
int  LDA 
)

PLASMA_zlauum - Computes the product U * U' or L' * L, where the triangular factor U or L is stored in the upper or lower triangular part of the array A.

If UPLO = 'U' or 'u' then the upper triangle of the result is stored, overwriting the factor U in A. If UPLO = 'L' or 'l' then the lower triangle of the result is stored, overwriting the factor L in A.

Parameters:
[in]uplo= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]NThe order of the triangular factor U or L. N >= 0.
[in,out]AOn entry, the triangular factor U or L. On exit, if UPLO = 'U', the upper triangle of A is overwritten with the upper triangle of the product U * U'; if UPLO = 'L', the lower triangle of A is overwritten with the lower triangle of the product L' * L.
[in]LDAThe leading dimension of the array A. LDA >= max(1,N).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
See also:
PLASMA_zlauum_Tile
PLASMA_zlauum_Tile_Async
PLASMA_clauum
PLASMA_dlauum
PLASMA_slauum
PLASMA_zpotri

Definition at line 65 of file zlauum.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, PLASMA_zlauum_Tile_Async(), plasma_zooplap2tile, plasma_zooptile2lap, PlasmaLower, PlasmaUpper, and plasma_sequence_t::status.

{
int NB;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zlauum", "PLASMA not initialized");
}
/* Check input arguments */
if (uplo != PlasmaUpper && uplo != PlasmaLower) {
plasma_error("PLASMA_zlauum", "illegal value of uplo");
return -1;
}
if (N < 0) {
plasma_error("PLASMA_zlauum", "illegal value of N");
return -2;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_zlauum", "illegal value of LDA");
return -4;
}
/* Quick return */
if (max(N, 0) == 0)
/* Tune NB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZPOSV, N, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zlauum", "plasma_tune() failed");
return status;
}
/* Set NT */
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N);
}
/* Call the tile interface */
PLASMA_zlauum_Tile_Async(uplo, &descA, sequence, &request);
plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

int PLASMA_zplghe ( double  bump,
int  N,
PLASMA_Complex64_t A,
int  LDA,
unsigned long long int  seed 
)

PLASMA_zplghe - Generate a random hermitian matrix by tiles.

Parameters:
[in]bumpThe value to add to the diagonal to be sure to have a positive definite matrix.
[in]NThe order of the matrix A. N >= 0.
[out]AOn exit, The random hermitian matrix A generated.
[in]LDAThe leading dimension of the array A. LDA >= max(1,M).
[in]seedThe seed used in the random generation.
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
See also:
PLASMA_zplghe_Tile
PLASMA_zplghe_Tile_Async
PLASMA_cplghe
PLASMA_dplghe
PLASMA_splghe
PLASMA_zplrnt
PLASMA_zplgsy

Definition at line 58 of file zplghe.c.

References A, plasma_desc_t::mat, max, plasma_context_self(), plasma_desc_init(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGEMM, PLASMA_NB, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, plasma_tune(), plasma_ziptile2lap, PLASMA_zplghe_Tile_Async(), PlasmaComplexDouble, and plasma_sequence_t::status.

{
int NB;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zplghe", "PLASMA not initialized");
}
/* Check input arguments */
if (N < 0) {
plasma_error("PLASMA_zplghe", "illegal value of N");
return -2;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_zplghe", "illegal value of LDA");
return -4;
}
/* Quick return */
if (max(0, N) == 0)
/* Tune NB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZGEMM, N, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zplghe", "plasma_tune() failed");
return status;
}
/* Set NT */
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
PlasmaComplexDouble, NB, NB, NB*NB,
LDA, N, 0, 0, N, N);
descA.mat = A;
/* Call the tile interface */
PLASMA_zplghe_Tile_Async( bump, &descA, seed, sequence, &request );
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zplgsy ( PLASMA_Complex64_t  bump,
int  N,
PLASMA_Complex64_t A,
int  LDA,
unsigned long long int  seed 
)

PLASMA_zplgsy - Generate a random hermitian matrix by tiles.

Parameters:
[in]bumpThe value to add to the diagonal to be sure to have a positive definite matrix.
[in]NThe order of the matrix A. N >= 0.
[out]AOn exit, The random hermitian matrix A generated.
[in]LDAThe leading dimension of the array A. LDA >= max(1,M).
[in]seedThe seed used in the random generation.
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
See also:
PLASMA_zplgsy_Tile
PLASMA_zplgsy_Tile_Async
PLASMA_cplgsy
PLASMA_dplgsy
PLASMA_splgsy
PLASMA_zplrnt
PLASMA_zplgsy

Definition at line 58 of file zplgsy.c.

References A, plasma_desc_t::mat, max, plasma_context_self(), plasma_desc_init(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGEMM, PLASMA_NB, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, plasma_tune(), plasma_ziptile2lap, PLASMA_zplgsy_Tile_Async(), PlasmaComplexDouble, and plasma_sequence_t::status.

{
int NB;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zplgsy", "PLASMA not initialized");
}
/* Check input arguments */
if (N < 0) {
plasma_error("PLASMA_zplgsy", "illegal value of N");
return -2;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_zplgsy", "illegal value of LDA");
return -4;
}
/* Quick return */
if (max(0, N) == 0)
/* Tune NB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZGEMM, N, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zplgsy", "plasma_tune() failed");
return status;
}
/* Set NT */
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
PlasmaComplexDouble, NB, NB, NB*NB,
LDA, N, 0, 0, N, N);
descA.mat = A;
/* Call the tile interface */
PLASMA_zplgsy_Tile_Async( bump, &descA, seed, sequence, &request );
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zplrnt ( int  M,
int  N,
PLASMA_Complex64_t A,
int  LDA,
unsigned long long int  seed 
)

PLASMA_zplrnt - Generate a random matrix by tiles.

Parameters:
[in]MThe number of rows of A.
[in]NThe order of the matrix A. N >= 0.
[out]AOn exit, The random matrix A generated.
[in]LDAThe leading dimension of the array A. LDA >= max(1,M).
[in]seedThe seed used in the random generation.
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
See also:
PLASMA_zplrnt_Tile
PLASMA_zplrnt_Tile_Async
PLASMA_cplrnt
PLASMA_dplrnt
PLASMA_splrnt
PLASMA_zplghe
PLASMA_zplgsy

Definition at line 57 of file zplrnt.c.

References A, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGEMM, PLASMA_NB, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, plasma_tune(), plasma_ziptile2lap, PLASMA_zplrnt_Tile_Async(), PlasmaComplexDouble, and plasma_sequence_t::status.

{
int NB;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zplrnt", "PLASMA not initialized");
}
/* Check input arguments */
if (M < 0) {
plasma_error("PLASMA_zplrnt", "illegal value of M");
return -1;
}
if (N < 0) {
plasma_error("PLASMA_zplrnt", "illegal value of N");
return -2;
}
if (LDA < max(1, M)) {
plasma_error("PLASMA_zplrnt", "illegal value of LDA");
return -4;
}
/* Quick return */
if (min(M, N) == 0)
/* Tune NB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZGEMM, M, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zplrnt", "plasma_tune() failed");
return status;
}
/* Set NT */
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
PlasmaComplexDouble, NB, NB, NB*NB,
LDA, N, 0, 0, M, N);
descA.mat = A;
/* Call the tile interface */
PLASMA_zplrnt_Tile_Async( &descA, seed, sequence, &request );
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zposv ( PLASMA_enum  uplo,
int  N,
int  NRHS,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t B,
int  LDB 
)

PLASMA_zposv - Computes the solution to a system of linear equations A * X = B, where A is an N-by-N symmetric positive definite (or Hermitian positive definite in the complex case) matrix and X and B are N-by-NRHS matrices. The Cholesky decomposition is used to factor A as

\[ A = \{_{L\times L^H, if uplo = PlasmaLower}^{U^H\times U, if uplo = PlasmaUpper} \]

where U is an upper triangular matrix and L is a lower triangular matrix. The factored form of A is then used to solve the system of equations A * X = B.

Parameters:
[in]uploSpecifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]NThe number of linear equations, i.e., the order of the matrix A. N >= 0.
[in]NRHSThe number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in,out]AOn entry, the symmetric positive definite (or Hermitian) matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if return value = 0, the factor U or L from the Cholesky factorization A = U**H*U or A = L*L**H.
[in]LDAThe leading dimension of the array A. LDA >= max(1,N).
[in,out]BOn entry, the N-by-NRHS right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]LDBThe leading dimension of the array B. LDB >= max(1,N).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
>0if i, the leading minor of order i of A is not positive definite, so the factorization could not be completed, and the solution has not been computed.
See also:
PLASMA_zposv_Tile
PLASMA_zposv_Tile_Async
PLASMA_cposv
PLASMA_dposv
PLASMA_sposv

Definition at line 82 of file zposv.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_zposv_Tile_Async(), PlasmaLower, PlasmaUpper, and plasma_sequence_t::status.

{
int NB;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA, descB;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zposv", "PLASMA not initialized");
}
/* Check input arguments */
if (uplo != PlasmaUpper && uplo != PlasmaLower) {
plasma_error("PLASMA_zposv", "illegal value of uplo");
return -1;
}
if (N < 0) {
plasma_error("PLASMA_zposv", "illegal value of N");
return -2;
}
if (NRHS < 0) {
plasma_error("PLASMA_zposv", "illegal value of NRHS");
return -3;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_zposv", "illegal value of LDA");
return -5;
}
if (LDB < max(1, N)) {
plasma_error("PLASMA_zposv", "illegal value of LDB");
return -7;
}
/* Quick return - currently NOT equivalent to LAPACK's
* LAPACK does not have such check for DPOSV */
if (min(N, NRHS) == 0)
/* Tune NB depending on M, N & NRHS; Set NBNBSIZE */
status = plasma_tune(PLASMA_FUNC_ZPOSV, N, N, NRHS);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zposv", "plasma_tune() failed");
return status;
}
/* Set NT & NTRHS */
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N , plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N );
plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
}
/* Call the tile interface */
PLASMA_zposv_Tile_Async(uplo, &descA, &descB, sequence, &request);
plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
plasma_zooptile2lap( descB, B, NB, NB, LDB, NRHS );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zpotrf ( PLASMA_enum  uplo,
int  N,
PLASMA_Complex64_t A,
int  LDA 
)

PLASMA_zpotrf - Computes the Cholesky factorization of a symmetric positive definite (or Hermitian positive definite in the complex case) matrix A. The factorization has the form

\[ A = \{_{L\times L^H, if uplo = PlasmaLower}^{U^H\times U, if uplo = PlasmaUpper} \]

where U is an upper triangular matrix and L is a lower triangular matrix.

Parameters:
[in]uplo= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]NThe order of the matrix A. N >= 0.
[in,out]AOn entry, the symmetric positive definite (or Hermitian) matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if return value = 0, the factor U or L from the Cholesky factorization A = U**H*U or A = L*L**H.
[in]LDAThe leading dimension of the array A. LDA >= max(1,N).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
>0if i, the leading minor of order i of A is not positive definite, so the factorization could not be completed, and the solution has not been computed.
See also:
PLASMA_zpotrf_Tile
PLASMA_zpotrf_Tile_Async
PLASMA_cpotrf
PLASMA_dpotrf
PLASMA_spotrf
PLASMA_zpotrs

Definition at line 70 of file zpotrf.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_zpotrf_Tile_Async(), PlasmaLower, PlasmaUpper, and plasma_sequence_t::status.

{
int NB;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zpotrf", "PLASMA not initialized");
}
/* Check input arguments */
if (uplo != PlasmaUpper && uplo != PlasmaLower) {
plasma_error("PLASMA_zpotrf", "illegal value of uplo");
return -1;
}
if (N < 0) {
plasma_error("PLASMA_zpotrf", "illegal value of N");
return -2;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_zpotrf", "illegal value of LDA");
return -4;
}
/* Quick return */
if (max(N, 0) == 0)
/* Tune NB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZPOSV, N, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zpotrf", "plasma_tune() failed");
return status;
}
/* Set NT */
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N);
}
/* Call the tile interface */
PLASMA_zpotrf_Tile_Async(uplo, &descA, sequence, &request);
plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zpotri ( PLASMA_enum  uplo,
int  N,
PLASMA_Complex64_t A,
int  LDA 
)

PLASMA_zpotri - Computes the inverse of a complex Hermitian positive definite matrix A using the Cholesky factorization A = U**H*U or A = L*L**H computed by PLASMA_zpotrf.

Parameters:
[in]uplo= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]NThe order of the matrix A. N >= 0.
[in,out]AOn entry, the triangular factor U or L from the Cholesky factorization A = U**H*U or A = L*L**H, as computed by PLASMA_zpotrf. On exit, the upper or lower triangle of the (Hermitian) inverse of A, overwriting the input factor U or L.
[in]LDAThe leading dimension of the array A. LDA >= max(1,N).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
>0if i, the (i,i) element of the factor U or L is zero, and the inverse could not be computed.
See also:
PLASMA_zpotri_Tile
PLASMA_zpotri_Tile_Async
PLASMA_cpotri
PLASMA_dpotri
PLASMA_spotri
PLASMA_zpotrf

Definition at line 62 of file zpotri.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_zpotri_Tile_Async(), PlasmaLower, PlasmaUpper, and plasma_sequence_t::status.

{
int NB;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zpotri", "PLASMA not initialized");
}
/* Check input arguments */
if (uplo != PlasmaUpper && uplo != PlasmaLower) {
plasma_error("PLASMA_zpotri", "illegal value of uplo");
return -1;
}
if (N < 0) {
plasma_error("PLASMA_zpotri", "illegal value of N");
return -2;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_zpotri", "illegal value of LDA");
return -4;
}
/* Quick return */
if (max(N, 0) == 0)
/* Tune NB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZPOSV, N, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zpotri", "plasma_tune() failed");
return status;
}
/* Set NT */
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N);
}
/* Call the tile interface */
PLASMA_zpotri_Tile_Async(uplo, &descA, sequence, &request);
plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zpotrs ( PLASMA_enum  uplo,
int  N,
int  NRHS,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t B,
int  LDB 
)

PLASMA_zpotrs - Solves a system of linear equations A * X = B with a symmetric positive definite (or Hermitian positive definite in the complex case) matrix A using the Cholesky factorization A = U**H*U or A = L*L**H computed by PLASMA_zpotrf.

Parameters:
[in]uplo= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]NThe order of the matrix A. N >= 0.
[in]NRHSThe number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]AThe triangular factor U or L from the Cholesky factorization A = U**H*U or A = L*L**H, computed by PLASMA_zpotrf.
[in]LDAThe leading dimension of the array A. LDA >= max(1,N).
[in,out]BOn entry, the N-by-NRHS right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]LDBThe leading dimension of the array B. LDB >= max(1,N).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
See also:
PLASMA_zpotrs_Tile
PLASMA_zpotrs_Tile_Async
PLASMA_cpotrs
PLASMA_dpotrs
PLASMA_spotrs
PLASMA_zpotrf

Definition at line 67 of file zpotrs.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_zpotrs_Tile_Async(), PlasmaLower, PlasmaUpper, and plasma_sequence_t::status.

{
int NB;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA, descB;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zpotrs", "PLASMA not initialized");
}
/* Check input arguments */
if (uplo != PlasmaUpper && uplo != PlasmaLower) {
plasma_error("PLASMA_zpotrs", "illegal value of uplo");
return -1;
}
if (N < 0) {
plasma_error("PLASMA_zpotrs", "illegal value of N");
return -2;
}
if (NRHS < 0) {
plasma_error("PLASMA_zpotrs", "illegal value of NRHS");
return -3;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_zpotrs", "illegal value of LDA");
return -5;
}
if (LDB < max(1, N)) {
plasma_error("PLASMA_zpotrs", "illegal value of LDB");
return -7;
}
/* Quick return */
if (min(N, NRHS) == 0)
/* Tune NB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZPOSV, N, N, NRHS);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zpotrs", "plasma_tune() failed");
return status;
}
/* Set NT & NTRHS */
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N , plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N );
plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
}
/* Call the tile interface */
PLASMA_zpotrs_Tile_Async(uplo, &descA, &descB, sequence, &request);
plasma_zooptile2lap( descB, B, NB, NB, LDB, NRHS );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zsymm ( PLASMA_enum  side,
PLASMA_enum  uplo,
int  M,
int  N,
PLASMA_Complex64_t  alpha,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t B,
int  LDB,
PLASMA_Complex64_t  beta,
PLASMA_Complex64_t C,
int  LDC 
)

PLASMA_zsymm - Performs one of the matrix-matrix operations

\[ C = \alpha \times A \times B + \beta \times C \]

or

\[ C = \alpha \times B \times A + \beta \times C \]

where alpha and beta are scalars, A is an symmetric matrix and B and C are m by n matrices.

Parameters:
[in]sideSpecifies whether the symmetric matrix A appears on the left or right in the operation as follows: = PlasmaLeft:

\[ C = \alpha \times A \times B + \beta \times C \]

= PlasmaRight:

\[ C = \alpha \times B \times A + \beta \times C \]

[in]uploSpecifies whether the upper or lower triangular part of the symmetric matrix A is to be referenced as follows: = PlasmaLower: Only the lower triangular part of the symmetric matrix A is to be referenced. = PlasmaUpper: Only the upper triangular part of the symmetric matrix A is to be referenced.
[in]MSpecifies the number of rows of the matrix C. M >= 0.
[in]NSpecifies the number of columns of the matrix C. N >= 0.
[in]alphaSpecifies the scalar alpha.
[in]AA is a LDA-by-ka matrix, where ka is M when side = PlasmaLeft, and is N otherwise. Only the uplo triangular part is referenced.
[in]LDAThe leading dimension of the array A. LDA >= max(1,ka).
[in]BB is a LDB-by-N matrix, where the leading M-by-N part of the array B must contain the matrix B.
[in]LDBThe leading dimension of the array B. LDB >= max(1,M).
[in]betaSpecifies the scalar beta.
[in,out]CC is a LDC-by-N matrix. On exit, the array is overwritten by the M by N updated matrix.
[in]LDCThe leading dimension of the array C. LDC >= max(1,M).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
See also:
PLASMA_zsymm_Tile
PLASMA_csymm
PLASMA_dsymm
PLASMA_ssymm

Definition at line 94 of file zsymm.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZSYMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_zsymm_Tile_Async(), PlasmaLeft, PlasmaLower, PlasmaRight, PlasmaUpper, and plasma_sequence_t::status.

{
int NB;
int Am;
int status;
PLASMA_desc descA, descB, descC;
PLASMA_sequence *sequence = NULL;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zsymm", "PLASMA not initialized");
}
/* Check input arguments */
if ( (side != PlasmaLeft) && (side != PlasmaRight) ){
plasma_error("PLASMA_zsymm", "illegal value of side");
return -1;
}
if ((uplo != PlasmaLower) && (uplo != PlasmaUpper)) {
plasma_error("PLASMA_zsymm", "illegal value of uplo");
return -2;
}
Am = ( side == PlasmaLeft ) ? M : N;
if (M < 0) {
plasma_error("PLASMA_zsymm", "illegal value of M");
return -3;
}
if (N < 0) {
plasma_error("PLASMA_zsymm", "illegal value of N");
return -4;
}
if (LDA < max(1, Am)) {
plasma_error("PLASMA_zsymm", "illegal value of LDA");
return -7;
}
if (LDB < max(1, M)) {
plasma_error("PLASMA_zsymm", "illegal value of LDB");
return -9;
}
if (LDC < max(1, M)) {
plasma_error("PLASMA_zsymm", "illegal value of LDC");
return -12;
}
/* Quick return */
if (M == 0 || N == 0 ||
((alpha == (PLASMA_Complex64_t)0.0) && beta == (PLASMA_Complex64_t)1.0))
/* Tune NB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZSYMM, M, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zsymm", "plasma_tune() failed");
return status;
}
/* Set MT & NT & KT */
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, Am, 0, 0, Am, Am,
plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N,
plasma_zooplap2tile( descC, C, NB, NB, LDC, N, 0, 0, M, N,
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, Am, 0, 0, Am, Am );
plasma_ziplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N );
plasma_ziplap2tile( descC, C, NB, NB, LDC, N, 0, 0, M, N );
}
/* Call the tile interface */
side, uplo, alpha, &descA, &descB, beta, &descC, sequence, &request);
plasma_zooptile2lap( descC, C, NB, NB, LDC, N );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, Am );
plasma_ziptile2lap( descB, B, NB, NB, LDB, N );
plasma_ziptile2lap( descC, C, NB, NB, LDC, N );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zsyr2k ( PLASMA_enum  uplo,
PLASMA_enum  trans,
int  N,
int  K,
PLASMA_Complex64_t  alpha,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t B,
int  LDB,
PLASMA_Complex64_t  beta,
PLASMA_Complex64_t C,
int  LDC 
)

PLASMA_zsyr2k - Performs one of the symmetric rank 2k operations

\[ C = \alpha [ op( A ) \times conjg( op( B )' )] + \alpha [ op( B ) \times conjg( op( A )' )] + \beta C \]

, or

\[ C = \alpha [ conjg( op( A )' ) \times op( B ) ] + \alpha [ conjg( op( B )' ) \times op( A ) ] + \beta C \]

,

where op( X ) is one of

op( X ) = X or op( X ) = conjg( X' )

where alpha and beta are real scalars, C is an n-by-n symmetric matrix and A and B are an n-by-k matrices the first case and k-by-n matrices in the second case.

Parameters:
[in]uplo= PlasmaUpper: Upper triangle of C is stored; = PlasmaLower: Lower triangle of C is stored.
[in]transSpecifies whether the matrix A is transposed or conjugate transposed: = PlasmaNoTrans:

\[ C = \alpha [ op( A ) \times conjg( op( B )' )] + \alpha [ op( B ) \times conjg( op( A )' )] + \beta C \]

= PlasmaTrans:

\[ C = \alpha [ conjg( op( A )' ) \times op( B ) ] + \alpha [ conjg( op( B )' ) \times op( A ) ] + \beta C \]

[in]NN specifies the order of the matrix C. N must be at least zero.
[in]KK specifies the number of columns of the A and B matrices with trans = PlasmaNoTrans. K specifies the number of rows of the A and B matrices with trans = PlasmaTrans.
[in]alphaalpha specifies the scalar alpha.
[in]AA is a LDA-by-ka matrix, where ka is K when trans = PlasmaNoTrans, and is N otherwise.
[in]LDAThe leading dimension of the array A. LDA must be at least max( 1, N ), otherwise LDA must be at least max( 1, K ).
[in]BB is a LDB-by-kb matrix, where kb is K when trans = PlasmaNoTrans, and is N otherwise.
[in]LDBThe leading dimension of the array B. LDB must be at least max( 1, N ), otherwise LDB must be at least max( 1, K ).
[in]betabeta specifies the scalar beta.
[in,out]CC is a LDC-by-N matrix. On exit, the array uplo part of the matrix is overwritten by the uplo part of the updated matrix.
[in]LDCThe leading dimension of the array C. LDC >= max( 1, N ).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
See also:
PLASMA_zsyr2k_Tile
PLASMA_csyr2k
PLASMA_dsyr2k
PLASMA_ssyr2k

Definition at line 96 of file zsyr2k.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZSYRK, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_zsyr2k_Tile_Async(), PlasmaLower, PlasmaNoTrans, PlasmaTrans, PlasmaUpper, and plasma_sequence_t::status.

{
int NB;
int Am, An;
int status;
PLASMA_desc descA, descB, descC;
PLASMA_sequence *sequence = NULL;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zsyr2k", "PLASMA not initialized");
}
/* Check input arguments */
if ((uplo != PlasmaUpper) && (uplo != PlasmaLower)) {
plasma_error("PLASMA_zsyr2k", "illegal value of uplo");
return -1;
}
if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans)) {
plasma_error("PLASMA_zsyr2k", "illegal value of trans");
return -2;
}
if ( trans == PlasmaNoTrans ) {
Am = N; An = K;
} else {
Am = K; An = N;
}
if (N < 0) {
plasma_error("PLASMA_zsyr2k", "illegal value of N");
return -3;
}
if (K < 0) {
plasma_error("PLASMA_zsyr2k", "illegal value of K");
return -4;
}
if (LDA < max(1, Am)) {
plasma_error("PLASMA_zsyr2k", "illegal value of LDA");
return -7;
}
if (LDB < max(1, Am)) {
plasma_error("PLASMA_zsyr2k", "illegal value of LDB");
return -9;
}
if (LDC < max(1, N)) {
plasma_error("PLASMA_zsyr2k", "illegal value of LDC");
return -12;
}
/* Quick return */
if (N == 0 ||
((alpha == (PLASMA_Complex64_t)0.0 || K == 0.0) && beta == (PLASMA_Complex64_t)1.0))
/* Tune NB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZSYRK, N, K, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zsyr2k", "plasma_tune() failed");
return status;
}
/* Set MT & NT & KT */
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descB, B, NB, NB, LDB, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
plasma_zooplap2tile( descC, C, NB, NB, LDC, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descC)));
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An );
plasma_ziplap2tile( descB, B, NB, NB, LDB, An, 0, 0, Am, An );
plasma_ziplap2tile( descC, C, NB, NB, LDC, N, 0, 0, N, N );
}
/* Call the tile interface */
PLASMA_zsyr2k_Tile_Async(uplo, trans, alpha, &descA, &descB, beta, &descC, sequence, &request);
plasma_zooptile2lap( descC, C, NB, NB, LDC, N );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, An );
plasma_ziptile2lap( descB, B, NB, NB, LDB, An );
plasma_ziptile2lap( descC, C, NB, NB, LDC, N );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zsyrk ( PLASMA_enum  uplo,
PLASMA_enum  trans,
int  N,
int  K,
PLASMA_Complex64_t  alpha,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t  beta,
PLASMA_Complex64_t C,
int  LDC 
)

PLASMA_zsyrk - Performs one of the hermitian rank k operations

\[ C = \alpha [ op( A ) \times conjg( op( A )' )] + \beta C \]

,

where op( X ) is one of

op( X ) = X or op( X ) = conjg( X' )

where alpha and beta are real scalars, C is an n-by-n hermitian matrix and A is an n-by-k matrix in the first case and a k-by-n matrix in the second case.

Parameters:
[in]uplo= PlasmaUpper: Upper triangle of C is stored; = PlasmaLower: Lower triangle of C is stored.
[in]transSpecifies whether the matrix A is transposed or conjugate transposed: = PlasmaNoTrans: A is not transposed; = PlasmaTrans : A is transposed.
[in]NN specifies the order of the matrix C. N must be at least zero.
[in]KK specifies the number of columns of the matrix op( A ).
[in]alphaalpha specifies the scalar alpha.
[in]AA is a LDA-by-ka matrix, where ka is K when trans = PlasmaNoTrans, and is N otherwise.
[in]LDAThe leading dimension of the array A. LDA must be at least max( 1, N ), otherwise LDA must be at least max( 1, K ).
[in]betabeta specifies the scalar beta
[in,out]CC is a LDC-by-N matrix. On exit, the array uplo part of the matrix is overwritten by the uplo part of the updated matrix.
[in]LDCThe leading dimension of the array C. LDC >= max( 1, N ).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
See also:
PLASMA_zsyrk_Tile
PLASMA_csyrk
PLASMA_dsyrk
PLASMA_ssyrk

Definition at line 85 of file zsyrk.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZSYRK, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_zsyrk_Tile_Async(), PlasmaLower, PlasmaNoTrans, PlasmaTrans, PlasmaUpper, and plasma_sequence_t::status.

{
int NB;
int Am, An;
int status;
PLASMA_desc descA, descC;
PLASMA_sequence *sequence = NULL;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zsyrk", "PLASMA not initialized");
}
/* Check input arguments */
if ((uplo != PlasmaUpper) && (uplo != PlasmaLower)) {
plasma_error("PLASMA_zsyrk", "illegal value of uplo");
return -1;
}
if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans)) {
plasma_error("PLASMA_zsyrk", "illegal value of trans");
return -2;
}
if ( trans == PlasmaNoTrans ) {
Am = N; An = K;
} else {
Am = K; An = N;
}
if (N < 0) {
plasma_error("PLASMA_zsyrk", "illegal value of N");
return -3;
}
if (K < 0) {
plasma_error("PLASMA_zsyrk", "illegal value of K");
return -4;
}
if (LDA < max(1, Am)) {
plasma_error("PLASMA_zsyrk", "illegal value of LDA");
return -7;
}
if (LDC < max(1, N)) {
plasma_error("PLASMA_zsyrk", "illegal value of LDC");
return -10;
}
/* Quick return */
if (N == 0 ||
((alpha == (PLASMA_Complex64_t)0.0 || K == 0.0) && beta == (PLASMA_Complex64_t)1.0))
/* Tune NB depending on M, N & NRHS; Set NBNBSIZE */
status = plasma_tune(PLASMA_FUNC_ZSYRK, N, K, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zsyrk", "plasma_tune() failed");
return status;
}
/* Set MT & NT & KT */
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descC, C, NB, NB, LDC, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descC)));
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An );
plasma_ziplap2tile( descC, C, NB, NB, LDC, N, 0, 0, N, N );
}
/* Call the tile interface */
PLASMA_zsyrk_Tile_Async(uplo, trans, alpha, &descA, beta, &descC, sequence, &request);
plasma_zooptile2lap( descC, C, NB, NB, LDC, N );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, An );
plasma_ziptile2lap( descC, C, NB, NB, LDC, N );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zTile_to_Lapack ( PLASMA_desc A,
PLASMA_Complex64_t Af77,
int  LDA 
)

PLASMA_Tile_to_Lapack - Conversion from tile layout to LAPACK layout.

Parameters:
[in]ADescriptor of the PLASMA matrix in tile layout.
[in,out]Af77LAPACK matrix. If PLASMA_TRANSLATION_MODE is set to PLASMA_INPLACE, Af77 has to be A->mat, else if PLASMA_TRANSLATION_MODE is set to PLASMA_OUTOFPLACE, Af77 has to be allocated before.
[in]LDAThe leading dimension of the matrix Af77.
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
See also:
PLASMA_zTile_to_Lapack_Async
PLASMA_zLapack_to_Tile
PLASMA_cTile_to_Lapack
PLASMA_dTile_to_Lapack
PLASMA_sTile_to_Lapack

Definition at line 191 of file ztile.c.

References A, plasma_context_self(), plasma_desc_check(), plasma_dynamic_sync, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), plasma_pztile_to_lapack(), plasma_sequence_create(), plasma_sequence_destroy(), plasma_static_call_5, PLASMA_SUCCESS, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
PLASMA_sequence *sequence = NULL;
PLASMA_request request;
int status;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zTile_to_Lapack", "PLASMA not initialized");
}
/* Check descriptor for correctness */
plasma_error("PLASMA_zTile_to_Lapack", "invalid descriptor");
}
plasma_sequence_create(plasma, &sequence);
PLASMA_desc, descA,
int, LDA,
PLASMA_sequence*, sequence,
PLASMA_request*, &request);
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_ztrmm ( PLASMA_enum  side,
PLASMA_enum  uplo,
PLASMA_enum  transA,
PLASMA_enum  diag,
int  N,
int  NRHS,
PLASMA_Complex64_t  alpha,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t B,
int  LDB 
)

PLASMA_ztrmm - Computes B = alpha*op( A )*B or B = alpha*B*op( A ).

Parameters:
[in]sideSpecifies whether A appears on the left or on the right of X: = PlasmaLeft: A*X = B = PlasmaRight: X*A = B
[in]uploSpecifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]transASpecifies whether the matrix A is transposed, not transposed or conjugate transposed: = PlasmaNoTrans: A is transposed; = PlasmaTrans: A is not transposed; = PlasmaConjTrans: A is conjugate transposed.
[in]diagSpecifies whether or not A is unit triangular: = PlasmaNonUnit: A is non unit; = PlasmaUnit: A us unit.
[in]NThe order of the matrix A. N >= 0.
[in]NRHSThe number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]alphaalpha specifies the scalar alpha.
[in]AThe triangular matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of the array A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of the array A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If diag = PlasmaUnit, the diagonal elements of A are also not referenced and are assumed to be 1.
[in]LDAThe leading dimension of the array A. LDA >= max(1,N).
[in,out]BOn entry, the N-by-NRHS right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]LDBThe leading dimension of the array B. LDB >= max(1,N).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
See also:
PLASMA_ztrmm_Tile
PLASMA_ztrmm_Tile_Async
PLASMA_ctrmm
PLASMA_dtrmm
PLASMA_strmm

Definition at line 88 of file ztrmm.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_ztrmm_Tile_Async(), PlasmaConjTrans, PlasmaLeft, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaRight, PlasmaTrans, PlasmaUnit, PlasmaUpper, and plasma_sequence_t::status.

{
int NB, NA;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA, descB;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_ztrmm", "PLASMA not initialized");
}
/* Check input arguments */
if (side != PlasmaLeft && side != PlasmaRight) {
plasma_error("PLASMA_ztrmm", "illegal value of side");
return -1;
}
if (uplo != PlasmaUpper && uplo != PlasmaLower) {
plasma_error("PLASMA_ztrmm", "illegal value of uplo");
return -2;
}
if (transA != PlasmaConjTrans && transA != PlasmaNoTrans && transA != PlasmaTrans ) {
plasma_error("PLASMA_ztrmm", "illegal value of transA");
return -3;
}
plasma_error("PLASMA_ztrmm", "illegal value of diag");
return -4;
}
if (N < 0) {
plasma_error("PLASMA_ztrmm", "illegal value of N");
return -5;
}
if (NRHS < 0) {
plasma_error("PLASMA_ztrmm", "illegal value of NRHS");
return -6;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_ztrmm", "illegal value of LDA");
return -8;
}
if (LDB < max(1, N)) {
plasma_error("PLASMA_ztrmm", "illegal value of LDB");
return -10;
}
/* Quick return */
if (min(N, NRHS) == 0)
/* Tune NB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZPOSV, N, N, NRHS);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_ztrmm", "plasma_tune() failed");
return status;
}
/* Set NT & NTRHS */
NB = PLASMA_NB;
if (side == PlasmaLeft) {
NA = N;
} else {
NA = NRHS;
}
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, NA, 0, 0, NA, NA, plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, NA, 0, 0, NA, NA );
plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
}
/* Call the tile interface */
side, uplo, transA, diag, alpha, &descA, &descB, sequence, &request);
plasma_zooptile2lap( descB, B, NB, NB, LDB, NRHS );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, NA );
plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_ztrsm ( PLASMA_enum  side,
PLASMA_enum  uplo,
PLASMA_enum  transA,
PLASMA_enum  diag,
int  N,
int  NRHS,
PLASMA_Complex64_t  alpha,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t B,
int  LDB 
)

PLASMA_ztrsm - Computes triangular solve A*X = B or X*A = B.

Parameters:
[in]sideSpecifies whether A appears on the left or on the right of X: = PlasmaLeft: A*X = B = PlasmaRight: X*A = B
[in]uploSpecifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]transASpecifies whether the matrix A is transposed, not transposed or conjugate transposed: = PlasmaNoTrans: A is transposed; = PlasmaTrans: A is not transposed; = PlasmaConjTrans: A is conjugate transposed.
[in]diagSpecifies whether or not A is unit triangular: = PlasmaNonUnit: A is non unit; = PlasmaUnit: A us unit.
[in]NThe order of the matrix A. N >= 0.
[in]NRHSThe number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]alphaalpha specifies the scalar alpha.
[in]AThe triangular matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of the array A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of the array A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If diag = PlasmaUnit, the diagonal elements of A are also not referenced and are assumed to be 1.
[in]LDAThe leading dimension of the array A. LDA >= max(1,N).
[in,out]BOn entry, the N-by-NRHS right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]LDBThe leading dimension of the array B. LDB >= max(1,N).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
See also:
PLASMA_ztrsm_Tile
PLASMA_ztrsm_Tile_Async
PLASMA_ctrsm
PLASMA_dtrsm
PLASMA_strsm

Definition at line 88 of file ztrsm.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_ztrsm_Tile_Async(), PlasmaConjTrans, PlasmaLeft, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaRight, PlasmaTrans, PlasmaUnit, PlasmaUpper, and plasma_sequence_t::status.

{
int NB, NA;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA, descB;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_ztrsm", "PLASMA not initialized");
}
/* Check input arguments */
if (side != PlasmaLeft && side != PlasmaRight) {
plasma_error("PLASMA_ztrsm", "illegal value of side");
return -1;
}
if (uplo != PlasmaUpper && uplo != PlasmaLower) {
plasma_error("PLASMA_ztrsm", "illegal value of uplo");
return -2;
}
if (transA != PlasmaConjTrans && transA != PlasmaNoTrans && transA != PlasmaTrans ) {
plasma_error("PLASMA_ztrsm", "illegal value of transA");
return -3;
}
plasma_error("PLASMA_ztrsm", "illegal value of diag");
return -4;
}
if (N < 0) {
plasma_error("PLASMA_ztrsm", "illegal value of N");
return -5;
}
if (NRHS < 0) {
plasma_error("PLASMA_ztrsm", "illegal value of NRHS");
return -6;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_ztrsm", "illegal value of LDA");
return -8;
}
if (LDB < max(1, N)) {
plasma_error("PLASMA_ztrsm", "illegal value of LDB");
return -10;
}
/* Quick return */
if (min(N, NRHS) == 0)
/* Tune NB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZPOSV, N, N, NRHS);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_ztrsm", "plasma_tune() failed");
return status;
}
/* Set NT & NTRHS */
NB = PLASMA_NB;
if (side == PlasmaLeft) {
NA = N;
} else {
NA = NRHS;
}
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, NA, 0, 0, NA, NA, plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, NA, 0, 0, NA, NA );
plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
}
/* Call the tile interface */
side, uplo, transA, diag, alpha, &descA, &descB, sequence, &request);
plasma_zooptile2lap( descB, B, NB, NB, LDB, NRHS );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, NA );
plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_ztrsmpl ( int  N,
int  NRHS,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t L,
int *  IPIV,
PLASMA_Complex64_t B,
int  LDB 
)

PLASMA_ztrsmpl - Performs the forward substitution step of solving a system of linear equations after the tile LU factorization of the matrix.

Parameters:
[in]NThe order of the matrix A. N >= 0.
[in]NRHSThe number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]AThe tile factor L from the factorization, computed by PLASMA_zgetrf_incpiv.
[in]LDAThe leading dimension of the array A. LDA >= max(1,N).
[in]LAuxiliary factorization data, related to the tile L factor, computed by PLASMA_zgetrf_incpiv.
[in]IPIVThe pivot indices from PLASMA_zgetrf_incpiv (not equivalent to LAPACK).
[in,out]BOn entry, the N-by-NRHS right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]LDBThe leading dimension of the array B. LDB >= max(1,N).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
See also:
PLASMA_ztrsmpl_Tile
PLASMA_ztrsmpl_Tile_Async
PLASMA_ctrsmpl
PLASMA_dtrsmpl
PLASMA_strsmpl
PLASMA_zgetrf_incpiv

Definition at line 67 of file ztrsmpl.c.

References L, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGESV, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_ztrsmpl_Tile_Async(), PlasmaComplexDouble, and plasma_sequence_t::status.

{
int NB, IB, IBNB, NT;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA, descB, descL;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_ztrsmpl", "PLASMA not initialized");
}
/* Check input arguments */
if (N < 0) {
plasma_error("PLASMA_ztrsmpl", "illegal value of N");
return -1;
}
if (NRHS < 0) {
plasma_error("PLASMA_ztrsmpl", "illegal value of NRHS");
return -2;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_ztrsmpl", "illegal value of LDA");
return -4;
}
if (LDB < max(1, N)) {
plasma_error("PLASMA_ztrsmpl", "illegal value of LDB");
return -8;
}
/* Quick return */
if (min(N, NRHS) == 0)
/* Tune NB & IB depending on N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZGESV, N, N, NRHS);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_ztrsmpl", "plasma_tune() failed");
return status;
}
/* Set Mt, NT & NTRHS */
NB = PLASMA_NB;
IB = PLASMA_IB;
IBNB = IB*NB;
NT = (N%NB==0) ? (N/NB) : (N/NB+1);
plasma_sequence_create(plasma, &sequence);
IB, NB, IBNB,
NT*IB, NT*NB, 0, 0, NT*IB, NT*NB);
descL.mat = L;
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N , plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N );
plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
}
/* Call the tile interface */
PLASMA_ztrsmpl_Tile_Async(&descA, &descL, IPIV, &descB, sequence, &request);
plasma_zooptile2lap( descB, B, NB, NB, LDB, NRHS );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_ztrsmrv ( PLASMA_enum  side,
PLASMA_enum  uplo,
PLASMA_enum  transA,
PLASMA_enum  diag,
int  N,
int  NRHS,
PLASMA_Complex64_t  alpha,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t B,
int  LDB 
)

PLASMA_ztrsmrv - Computes triangular solve A*X = B or X*A = B.

Parameters:
[in]sideSpecifies whether A appears on the left or on the right of X: = PlasmaLeft: A*X = B = PlasmaRight: X*A = B
[in]uploSpecifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]transASpecifies whether the matrix A is transposed, not transposed or conjugate transposed: = PlasmaNoTrans: A is transposed; = PlasmaTrans: A is not transposed; = PlasmaConjTrans: A is conjugate transposed.
[in]diagSpecifies whether or not A is unit triangular: = PlasmaNonUnit: A is non unit; = PlasmaUnit: A us unit.
[in]NThe order of the matrix A. N >= 0.
[in]NRHSThe number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]alphaalpha specifies the scalar alpha.
[in]AThe triangular matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of the array A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of the array A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If diag = PlasmaUnit, the diagonal elements of A are also not referenced and are assumed to be 1.
[in]LDAThe leading dimension of the array A. LDA >= max(1,N).
[in,out]BOn entry, the N-by-NRHS right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]LDBThe leading dimension of the array B. LDB >= max(1,N).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
See also:
PLASMA_ztrsmrv_Tile
PLASMA_ztrsmrv_Tile_Async
PLASMA_ctrsmrv
PLASMA_dtrsmrv
PLASMA_strsmrv

Definition at line 88 of file ztrsmrv.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_ztrsmrv_Tile_Async(), PlasmaConjTrans, PlasmaLeft, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaRight, PlasmaTrans, PlasmaUnit, PlasmaUpper, and plasma_sequence_t::status.

{
int NB, NA;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA, descB;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_ztrsmrv", "PLASMA not initialized");
}
/* Check input arguments */
if (side != PlasmaLeft && side != PlasmaRight) {
plasma_error("PLASMA_ztrsmrv", "illegal value of side");
return -1;
}
if (uplo != PlasmaUpper && uplo != PlasmaLower) {
plasma_error("PLASMA_ztrsmrv", "illegal value of uplo");
return -2;
}
if (transA != PlasmaConjTrans && transA != PlasmaNoTrans && transA != PlasmaTrans ) {
plasma_error("PLASMA_ztrsmrv", "illegal value of transA");
return -3;
}
plasma_error("PLASMA_ztrsmrv", "illegal value of diag");
return -4;
}
if (N < 0) {
plasma_error("PLASMA_ztrsmrv", "illegal value of N");
return -5;
}
if (NRHS < 0) {
plasma_error("PLASMA_ztrsmrv", "illegal value of NRHS");
return -6;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_ztrsmrv", "illegal value of LDA");
return -8;
}
if (LDB < max(1, N)) {
plasma_error("PLASMA_ztrsmrv", "illegal value of LDB");
return -10;
}
/* Quick return */
if (min(N, NRHS) == 0)
/* Tune NB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZPOSV, N, N, NRHS);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_ztrsmrv", "plasma_tune() failed");
return status;
}
/* Set NT & NTRHS */
NB = PLASMA_NB;
if (side == PlasmaLeft) {
NA = N;
} else {
NA = NRHS;
}
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, NA, 0, 0, NA, NA, plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, NA, 0, 0, NA, NA );
plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
}
/* Call the tile interface */
side, uplo, transA, diag, alpha, &descA, &descB, sequence, &request);
plasma_zooptile2lap( descB, B, NB, NB, LDB, NRHS );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, NA );
plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

int PLASMA_ztrtri ( PLASMA_enum  uplo,
PLASMA_enum  diag,
int  N,
PLASMA_Complex64_t A,
int  LDA 
)

PLASMA_ztrtri - Computes the inverse of a complex upper or lower triangular matrix A.

Parameters:
[in]uplo= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]diag= PlasmaNonUnit: A is non-unit triangular; = PlasmaUnit: A is unit triangular.
[in]NThe order of the matrix A. N >= 0.
[in,out]AOn entry, the triangular matrix A. If UPLO = 'U', the leading N-by-N upper triangular part of the array A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of the array A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If DIAG = 'U', the diagonal elements of A are also not referenced and are assumed to be 1. On exit, the (triangular) inverse of the original matrix, in the same storage format.
[in]LDAThe leading dimension of the array A. LDA >= max(1,N).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
>0if i, A(i,i) is exactly zero. The triangular matrix is singular and its inverse can not be computed.
See also:
PLASMA_ztrtri_Tile
PLASMA_ztrtri_Tile_Async
PLASMA_ctrtri
PLASMA_dtrtri
PLASMA_strtri
PLASMA_zpotri

Definition at line 70 of file ztrtri.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_ztrtri_Tile_Async(), PlasmaLower, PlasmaNonUnit, PlasmaUnit, PlasmaUpper, and plasma_sequence_t::status.

{
int NB;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_ztrtri", "PLASMA not initialized");
}
/* Check input arguments */
if (uplo != PlasmaUpper && uplo != PlasmaLower) {
plasma_error("PLASMA_ztrtri", "illegal value of uplo");
return -1;
}
plasma_error("PLASMA_ztrtri", "illegal value of diag");
return -2;
}
if (N < 0) {
plasma_error("PLASMA_ztrtri", "illegal value of N");
return -3;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_ztrtri", "illegal value of LDA");
return -5;
}
/* Quick return */
if (max(N, 0) == 0)
/* Tune NB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZPOSV, N, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_ztrtri", "plasma_tune() failed");
return status;
}
/* Set NT */
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N);
}
/* Call the tile interface */
PLASMA_ztrtri_Tile_Async(uplo, diag, &descA, sequence, &request);
plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

int PLASMA_zunglq ( int  M,
int  N,
int  K,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t T,
PLASMA_Complex64_t Q,
int  LDQ 
)

PLASMA_zunglq - Generates an M-by-N matrix Q with orthonormal rows, which is defined as the first M rows of a product of the elementary reflectors returned by PLASMA_zgelqf.

Parameters:
[in]MThe number of rows of the matrix Q. M >= 0.
[in]NThe number of columns of the matrix Q. N >= M.
[in]KThe number of rows of elementary tile reflectors whose product defines the matrix Q. M >= K >= 0.
[in]ADetails of the LQ factorization of the original matrix A as returned by PLASMA_zgelqf.
[in]LDAThe leading dimension of the array A. LDA >= max(1,M).
[in]TAuxiliary factorization data, computed by PLASMA_zgelqf.
[out]QOn exit, the M-by-N matrix Q.
[in]LDQThe leading dimension of the array Q. LDQ >= max(1,M).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
PLASMA_SUCCESS<0 if -i, the i-th argument had an illegal value
See also:
PLASMA_zunglq_Tile
PLASMA_zunglq_Tile_Async
PLASMA_cunglq
PLASMA_dorglq
PLASMA_sorglq
PLASMA_zgelqf

Definition at line 68 of file zunglq.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_ZGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_zunglq_Tile_Async(), PlasmaComplexDouble, plasma_sequence_t::status, and T.

{
int NB, IB, IBNB, KT, NT;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA, descQ, descT;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zunglq", "PLASMA not initialized");
}
if (M < 0) {
plasma_error("PLASMA_zunglq", "illegal value of M");
return -1;
}
if (N < M) {
plasma_error("PLASMA_zunglq", "illegal value of N");
return -2;
}
if (K < 0 || K > M) {
plasma_error("PLASMA_zunglq", "illegal value of K");
return -3;
}
if (LDA < max(1, M)) {
plasma_error("PLASMA_zunglq", "illegal value of LDA");
return -5;
}
if (LDQ < max(1, M)) {
plasma_error("PLASMA_zunglq", "illegal value of LDQ");
return -8;
}
/* Quick return - currently NOT equivalent to LAPACK's:
* CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDQ ) */
if (min(M, min(N, K)) == 0)
/* Tune NB & IB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZGELS, M, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zunglq", "plasma_tune() failed");
return status;
}
/* Set MT & NT */
NB = PLASMA_NB;
IB = PLASMA_IB;
IBNB = IB*NB;
NT = (N%NB==0) ? (N/NB) : (N/NB+1);
KT = (K%NB==0) ? (K/NB) : (K/NB+1);
plasma_sequence_create(plasma, &sequence);
IB, NB, IBNB,
KT*IB, NT*NB, 0, 0, KT*IB, NT*NB);
}
else {
/* Double the size of T to accomodate the tree reduction phase */
IB, NB, IBNB,
KT*IB, 2*NT*NB, 0, 0, KT*IB, 2*NT*NB);
}
descT.mat = T;
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K, N, plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descQ)));
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K, N);
plasma_ziplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, M, N);
}
/* Call the tile interface */
PLASMA_zunglq_Tile_Async(&descA, &descT, &descQ, sequence, &request);
plasma_zooptile2lap( descQ, Q, NB, NB, LDQ, N );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
plasma_ziptile2lap( descQ, Q, NB, NB, LDQ, N );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zungqr ( int  M,
int  N,
int  K,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t T,
PLASMA_Complex64_t Q,
int  LDQ 
)

PLASMA_zungqr - Generates an M-by-N matrix Q with orthonormal columns, which is defined as the first N columns of a product of the elementary reflectors returned by PLASMA_zgeqrf.

Parameters:
[in]MThe number of rows of the matrix Q. M >= 0.
[in]NThe number of columns of the matrix Q. N >= M.
[in]KThe number of columns of elementary tile reflectors whose product defines the matrix Q. M >= K >= 0.
[in]ADetails of the QR factorization of the original matrix A as returned by PLASMA_zgeqrf.
[in]LDAThe leading dimension of the array A. LDA >= max(1,M).
[in]TAuxiliary factorization data, computed by PLASMA_zgeqrf.
[out]QOn exit, the M-by-N matrix Q.
[in]LDQThe leading dimension of the array Q. LDQ >= max(1,M).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
See also:
PLASMA_zungqr_Tile
PLASMA_zungqr_Tile_Async
PLASMA_cungqr
PLASMA_dorgqr
PLASMA_sorgqr
PLASMA_zgeqrf

Definition at line 68 of file zungqr.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_ZGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_zungqr_Tile_Async(), PlasmaComplexDouble, plasma_sequence_t::status, and T.

{
int NB, IB, IBNB, MT, KT;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA, descQ, descT;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zungqr", "PLASMA not initialized");
}
if (M < 0) {
plasma_error("PLASMA_zungqr", "illegal value of M");
return -1;
}
if (N < 0 || N > M) {
plasma_error("PLASMA_zungqr", "illegal value of N");
return -2;
}
if (K < 0 || K > N) {
plasma_error("PLASMA_zungqr", "illegal value of K");
return -3;
}
if (LDA < max(1, M)) {
plasma_error("PLASMA_zungqr", "illegal value of LDA");
return -5;
}
if (LDQ < max(1, M)) {
plasma_error("PLASMA_zungqr", "illegal value of LDQ");
return -8;
}
if (min(M, min(N, K)) == 0)
/* Tune NB & IB depending on M & N; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZGELS, M, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zungqr", "plasma_tune() failed");
return status;
}
/* Set MT & KT */
NB = PLASMA_NB;
IB = PLASMA_IB;
IBNB = IB*NB;
MT = (M%NB==0) ? (M/NB) : (M/NB+1);
KT = (K%NB==0) ? (K/NB) : (K/NB+1);
plasma_sequence_create(plasma, &sequence);
IB, NB, IBNB,
MT*IB, KT*NB, 0, 0, MT*IB, KT*NB);
}
else {
/* Double the size of T to accomodate the tree reduction phase */
IB, NB, IBNB,
MT*IB, 2*KT*NB, 0, 0, MT*IB, 2*KT*NB);
}
descT.mat = T;
plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, K, plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descQ)));
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, K);
plasma_ziplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, M, N);
}
/* Call the tile interface */
PLASMA_zungqr_Tile_Async(&descA, &descT, &descQ, sequence, &request);
plasma_zooptile2lap( descQ, Q, NB, NB, LDQ, N );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, K );
plasma_ziptile2lap( descQ, Q, NB, NB, LDQ, N );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zunmlq ( PLASMA_enum  side,
PLASMA_enum  trans,
int  M,
int  N,
int  K,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t T,
PLASMA_Complex64_t B,
int  LDB 
)

PLASMA_zunmlq - overwrites the general M-by-N matrix C with Q*C, where Q is an orthogonal matrix (unitary in the complex case) defined as the product of elementary reflectors returned by PLASMA_zgelqf. Q is of order M.

Parameters:
[in]sideIntended usage: = PlasmaLeft: apply Q or Q**H from the left; = PlasmaRight: apply Q or Q**H from the right. Currently only PlasmaLeft is supported.
[in]transIntended usage: = PlasmaNoTrans: no transpose, apply Q; = PlasmaConjTrans: conjugate transpose, apply Q**H. Currently only PlasmaConjTrans is supported.
[in]MThe number of rows of the matrix C. M >= 0.
[in]NThe number of columns of the matrix C. N >= 0.
[in]KThe number of rows of elementary tile reflectors whose product defines the matrix Q. M >= K >= 0.
[in]ADetails of the LQ factorization of the original matrix A as returned by PLASMA_zgelqf.
[in]LDAThe leading dimension of the array A. LDA >= max(1,K).
[in]TAuxiliary factorization data, computed by PLASMA_zgelqf.
[in,out]BOn entry, the M-by-N matrix B. On exit, B is overwritten by Q*B or Q**H*B.
[in]LDBThe leading dimension of the array C. LDC >= max(1,M).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
See also:
PLASMA_zunmlq_Tile
PLASMA_zunmlq_Tile_Async
PLASMA_cunmlq
PLASMA_dormlq
PLASMA_sormlq
PLASMA_zgelqf

Definition at line 83 of file zunmlq.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_ZGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_zunmlq_Tile_Async(), PlasmaComplexDouble, PlasmaConjTrans, PlasmaLeft, PlasmaNoTrans, PlasmaRight, plasma_sequence_t::status, and T.

{
int NB, IB, IBNB, KT, NT, An;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA, descB, descT;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zunmlq", "PLASMA not initialized");
}
if (side == PlasmaLeft)
An = M;
else
An = N;
/* Check input arguments */
if ( (side != PlasmaLeft) && (side != PlasmaRight) ) {
plasma_error("PLASMA_zunmlq", "illegal value of side");
return -1;
}
plasma_error("PLASMA_zunmlq", "illegal value of trans");
return -2;
}
if (M < 0) {
plasma_error("PLASMA_zunmlq", "illegal value of M");
return -3;
}
if (N < 0) {
plasma_error("PLASMA_zunmlq", "illegal value of N");
return -4;
}
if ((K < 0) || (K > An)) {
plasma_error("PLASMA_zunmlq", "illegal value of K");
return -5;
}
if (LDA < max(1, K)) {
plasma_error("PLASMA_zunmlq", "illegal value of LDA");
return -7;
}
if (LDB < max(1, M)) {
plasma_error("PLASMA_zunmlq", "illegal value of LDB");
return -10;
}
/* Quick return - currently NOT equivalent to LAPACK's:
* CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) */
if (min(M, min(N, K)) == 0)
/* Tune NB & IB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZGELS, M, K, N);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zunmlq", "plasma_tune() failed");
return status;
}
/* Set MT, NT & NTRHS */
NB = PLASMA_NB;
IB = PLASMA_IB;
IBNB = IB*NB;
KT = ( K%NB==0) ? (K /NB) : (K /NB+1);
NT = (An%NB==0) ? (An/NB) : (An/NB+1);
plasma_sequence_create(plasma, &sequence);
IB, NB, IBNB,
KT*IB, NT*NB, 0, 0, KT*IB, NT*NB);
}
else {
/* Double the size of T to accomodate the tree reduction phase */
IB, NB, IBNB,
KT*IB, 2*NT*NB, 0, 0, KT*IB, 2*NT*NB);
}
descT.mat = T;
plasma_zooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, K, An, plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, An, 0, 0, K, An);
plasma_ziplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N);
}
/* Call the tile interface */
side, trans, &descA, &descT, &descB, sequence, &request);
plasma_zooptile2lap( descB, B, NB, NB, LDB, N );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, An );
plasma_ziptile2lap( descB, B, NB, NB, LDB, N );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zunmqr ( PLASMA_enum  side,
PLASMA_enum  trans,
int  M,
int  N,
int  K,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t T,
PLASMA_Complex64_t B,
int  LDB 
)

PLASMA_zunmqr - overwrites the general M-by-N matrix C with Q*C, where Q is an orthogonal matrix (unitary in the complex case) defined as the product of elementary reflectors returned by PLASMA_zgeqrf. Q is of order M.

Parameters:
[in]sideIntended usage: = PlasmaLeft: apply Q or Q**H from the left; = PlasmaRight: apply Q or Q**H from the right. Currently only PlasmaLeft is supported.
[in]transIntended usage: = PlasmaNoTrans: no transpose, apply Q; = PlasmaConjTrans: conjugate transpose, apply Q**H. Currently only PlasmaConjTrans is supported.
[in]MThe number of rows of the matrix C. M >= 0.
[in]NThe number of columns of the matrix C. N >= 0.
[in]KThe number of columns of elementary tile reflectors whose product defines the matrix Q. If side == PlasmaLeft, M >= K >= 0. If side == PlasmaRight, N >= K >= 0.
[in]ADetails of the QR factorization of the original matrix A as returned by PLASMA_zgeqrf.
[in]LDAThe leading dimension of the array A. If side == PlasmaLeft, LDA >= max(1,M). If side == PlasmaRight, LDA >= max(1,N).
[in]TAuxiliary factorization data, computed by PLASMA_zgeqrf.
[in,out]BOn entry, the M-by-N matrix B. On exit, B is overwritten by Q*B or Q**H*B.
[in]LDBThe leading dimension of the array C. LDC >= max(1,M).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value
See also:
PLASMA_zunmqr_Tile
PLASMA_zunmqr_Tile_Async
PLASMA_cunmqr
PLASMA_dormqr
PLASMA_sormqr
PLASMA_zgeqrf

Definition at line 85 of file zunmqr.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_ZGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_zunmqr_Tile_Async(), PlasmaComplexDouble, PlasmaConjTrans, PlasmaLeft, PlasmaNoTrans, PlasmaRight, plasma_sequence_t::status, and T.

{
int NB, IB, IBNB, Am, MT, KT;
int status;
PLASMA_sequence *sequence = NULL;
PLASMA_desc descA, descB, descT;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zunmqr", "PLASMA not initialized");
}
if ( side == PlasmaLeft ) {
Am = M;
} else {
Am = N;
}
/* Check input arguments */
if ((side != PlasmaLeft) && (side != PlasmaRight)) {
plasma_error("PLASMA_zunmqr", "illegal value of side");
return -1;
}
plasma_error("PLASMA_zunmqr", "illegal value of trans");
return -2;
}
if (M < 0) {
plasma_error("PLASMA_zunmqr", "illegal value of M");
return -3;
}
if (N < 0) {
plasma_error("PLASMA_zunmqr", "illegal value of N");
return -4;
}
if ( (K < 0) || (K > Am) ) {
plasma_error("PLASMA_zunmqr", "illegal value of K");
return -5;
}
if ( LDA < max(1, Am) ) {
plasma_error("PLASMA_zunmqr", "illegal value of LDA");
return -7;
}
if (LDB < max(1, M)) {
plasma_error("PLASMA_zunmqr", "illegal value of LDB");
return -10;
}
/* Quick return - currently NOT equivalent to LAPACK's:
* CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) */
if (min(M, min(N, K)) == 0)
/* Tune NB & IB depending on M, K & N; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZGELS, M, K, N);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zunmqr", "plasma_tune() failed");
return status;
}
/* Set MT, NT & NTRHS */
NB = PLASMA_NB;
IB = PLASMA_IB;
IBNB = IB*NB;
MT = (Am%NB==0) ? (Am/NB) : (Am/NB+1);
KT = (K%NB==0) ? (K /NB) : (K /NB+1);
plasma_sequence_create(plasma, &sequence);
IB, NB, IBNB,
MT*IB, KT*NB, 0, 0, MT*IB, KT*NB);
}
else {
/* Double the size of T to accomodate the tree reduction phase */
IB, NB, IBNB,
MT*IB, 2*KT*NB, 0, 0, MT*IB, 2*KT*NB);
}
descT.mat = T;
plasma_zooplap2tile( descA, A, NB, NB, LDA, K, 0, 0, Am, K, plasma_desc_mat_free(&(descA)) );
plasma_zooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
} else {
plasma_ziplap2tile( descA, A, NB, NB, LDA, K, 0, 0, Am, K);
plasma_ziplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N);
}
/* Call the tile interface */
side, trans, &descA, &descT, &descB, sequence, &request);
plasma_zooptile2lap( descB, B, NB, NB, LDB, N );
} else {
plasma_ziptile2lap( descA, A, NB, NB, LDA, K );
plasma_ziptile2lap( descB, B, NB, NB, LDB, N );
}
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function: