PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
Advanced Interface: Asynchronous - Double Complex

Functions

int PLASMA_zcgels_Tile_Async (PLASMA_enum trans, PLASMA_desc *A, PLASMA_desc *T, PLASMA_desc *B, PLASMA_desc *X, int *ITER, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zcgesv_Tile_Async (PLASMA_desc *A, int *IPIV, PLASMA_desc *B, PLASMA_desc *X, int *ITER, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zcposv_Tile_Async (PLASMA_enum uplo, PLASMA_desc *A, PLASMA_desc *B, PLASMA_desc *X, int *ITER, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zcungesv_Tile_Async (PLASMA_enum trans, PLASMA_desc *A, PLASMA_desc *T, PLASMA_desc *B, PLASMA_desc *X, int *ITER, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zgebrd_Tile_Async (PLASMA_enum jobu, PLASMA_enum jobvt, PLASMA_desc *A, double *D, double *E, PLASMA_desc *U, PLASMA_desc *VT, PLASMA_desc *T, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zgelqf_Tile_Async (PLASMA_desc *A, PLASMA_desc *T, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zgelqs_Tile_Async (PLASMA_desc *A, PLASMA_desc *T, PLASMA_desc *B, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zgels_Tile_Async (PLASMA_enum trans, PLASMA_desc *A, PLASMA_desc *T, PLASMA_desc *B, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zgemm_Tile_Async (PLASMA_enum transA, PLASMA_enum transB, PLASMA_Complex64_t alpha, PLASMA_desc *A, PLASMA_desc *B, PLASMA_Complex64_t beta, PLASMA_desc *C, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zgeqrf_Tile_Async (PLASMA_desc *A, PLASMA_desc *T, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zgeqrs_Tile_Async (PLASMA_desc *A, PLASMA_desc *T, PLASMA_desc *B, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zgesv_Tile_Async (PLASMA_desc *A, int *IPIV, PLASMA_desc *B, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zgesv_incpiv_Tile_Async (PLASMA_desc *A, PLASMA_desc *L, int *IPIV, PLASMA_desc *B, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zgesvd_Tile_Async (PLASMA_enum jobu, PLASMA_enum jobvt, PLASMA_desc *A, double *S, PLASMA_desc *U, PLASMA_desc *VT, PLASMA_desc *T, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zgetrf_Tile_Async (PLASMA_desc *A, int *IPIV, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zgetrf_incpiv_Tile_Async (PLASMA_desc *A, PLASMA_desc *L, int *IPIV, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zgetri_Tile_Async (PLASMA_desc *A, int *IPIV, PLASMA_desc *W, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zgetrs_Tile_Async (PLASMA_enum trans, PLASMA_desc *A, int *IPIV, PLASMA_desc *B, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zgetrs_incpiv_Tile_Async (PLASMA_desc *A, PLASMA_desc *L, int *IPIV, PLASMA_desc *B, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zheev_Tile_Async (PLASMA_enum jobz, PLASMA_enum uplo, PLASMA_desc *A, double *W, PLASMA_desc *T, PLASMA_desc *Q, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zheevd_Tile_Async (PLASMA_enum jobz, PLASMA_enum uplo, PLASMA_desc *A, double *W, PLASMA_desc *T, PLASMA_desc *Q, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zhegst_Tile_Async (PLASMA_enum itype, PLASMA_enum uplo, PLASMA_desc *A, PLASMA_desc *B, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zhegv_Tile_Async (PLASMA_enum itype, PLASMA_enum jobz, PLASMA_enum uplo, PLASMA_desc *A, PLASMA_desc *B, double *W, PLASMA_desc *T, PLASMA_desc *Q, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zhemm_Tile_Async (PLASMA_enum side, PLASMA_enum uplo, PLASMA_Complex64_t alpha, PLASMA_desc *A, PLASMA_desc *B, PLASMA_Complex64_t beta, PLASMA_desc *C, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zher2k_Tile_Async (PLASMA_enum uplo, PLASMA_enum trans, PLASMA_Complex64_t alpha, PLASMA_desc *A, PLASMA_desc *B, double beta, PLASMA_desc *C, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zherk_Tile_Async (PLASMA_enum uplo, PLASMA_enum trans, double alpha, PLASMA_desc *A, double beta, PLASMA_desc *C, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zhetrd_Tile_Async (PLASMA_enum jobz, PLASMA_enum uplo, PLASMA_desc *A, double *D, double *E, PLASMA_desc *T, PLASMA_desc *Q, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zlacpy_Tile_Async (PLASMA_enum uplo, PLASMA_desc *A, PLASMA_desc *B, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zlange_Tile_Async (PLASMA_enum norm, PLASMA_desc *A, double *work, double *value, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zlanhe_Tile_Async (PLASMA_enum norm, PLASMA_enum uplo, PLASMA_desc *A, double *work, double *value, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zlansy_Tile_Async (PLASMA_enum norm, PLASMA_enum uplo, PLASMA_desc *A, double *work, double *value, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zlaset_Tile_Async (PLASMA_enum uplo, PLASMA_Complex64_t alpha, PLASMA_Complex64_t beta, PLASMA_desc *A, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zlaswp_Tile_Async (PLASMA_desc *A, int K1, int K2, int *IPIV, int INCX, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zlaswpc_Tile_Async (PLASMA_desc *A, int K1, int K2, int *IPIV, int INCX, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zlauum_Tile_Async (PLASMA_enum uplo, PLASMA_desc *A, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zplghe_Tile_Async (double bump, PLASMA_desc *A, unsigned long long int seed, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zplgsy_Tile_Async (PLASMA_Complex64_t bump, PLASMA_desc *A, unsigned long long int seed, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zplrnt_Tile_Async (PLASMA_desc *A, unsigned long long int seed, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zposv_Tile_Async (PLASMA_enum uplo, PLASMA_desc *A, PLASMA_desc *B, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zpotrf_Tile_Async (PLASMA_enum uplo, PLASMA_desc *A, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zpotri_Tile_Async (PLASMA_enum uplo, PLASMA_desc *A, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zpotrs_Tile_Async (PLASMA_enum uplo, PLASMA_desc *A, PLASMA_desc *B, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zsymm_Tile_Async (PLASMA_enum side, PLASMA_enum uplo, PLASMA_Complex64_t alpha, PLASMA_desc *A, PLASMA_desc *B, PLASMA_Complex64_t beta, PLASMA_desc *C, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zsyr2k_Tile_Async (PLASMA_enum uplo, PLASMA_enum trans, PLASMA_Complex64_t alpha, PLASMA_desc *A, PLASMA_desc *B, PLASMA_Complex64_t beta, PLASMA_desc *C, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zsyrk_Tile_Async (PLASMA_enum uplo, PLASMA_enum trans, PLASMA_Complex64_t alpha, PLASMA_desc *A, PLASMA_Complex64_t beta, PLASMA_desc *C, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_ztrmm_Tile_Async (PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum transA, PLASMA_enum diag, PLASMA_Complex64_t alpha, PLASMA_desc *A, PLASMA_desc *B, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_ztrsm_Tile_Async (PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum transA, PLASMA_enum diag, PLASMA_Complex64_t alpha, PLASMA_desc *A, PLASMA_desc *B, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_ztrsmpl_Tile_Async (PLASMA_desc *A, PLASMA_desc *L, int *IPIV, PLASMA_desc *B, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_ztrsmrv_Tile_Async (PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum transA, PLASMA_enum diag, PLASMA_Complex64_t alpha, PLASMA_desc *A, PLASMA_desc *B, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_ztrtri_Tile_Async (PLASMA_enum uplo, PLASMA_enum diag, PLASMA_desc *A, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zunglq_Tile_Async (PLASMA_desc *A, PLASMA_desc *T, PLASMA_desc *Q, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zungqr_Tile_Async (PLASMA_desc *A, PLASMA_desc *T, PLASMA_desc *Q, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zunmlq_Tile_Async (PLASMA_enum side, PLASMA_enum trans, PLASMA_desc *A, PLASMA_desc *T, PLASMA_desc *B, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zunmqr_Tile_Async (PLASMA_enum side, PLASMA_enum trans, PLASMA_desc *A, PLASMA_desc *T, PLASMA_desc *B, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zLapack_to_Tile_Async (PLASMA_Complex64_t *Af77, int LDA, PLASMA_desc *A, PLASMA_sequence *sequence, PLASMA_request *request)
int PLASMA_zTile_to_Lapack_Async (PLASMA_desc *A, PLASMA_Complex64_t *Af77, int LDA, PLASMA_sequence *sequence, PLASMA_request *request)

Detailed Description

This is the group of double complex functions using the advanced asynchronous interface.


Function Documentation

int PLASMA_zcgels_Tile_Async ( PLASMA_enum  trans,
PLASMA_desc A,
PLASMA_desc T,
PLASMA_desc B,
PLASMA_desc X,
int *  ITER,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zcgels_Tile_Async - Solves overdetermined or underdetermined linear system of equations using the tile QR or the tile LQ factorization and mixed-precision iterative refinement. Non-blocking equivalent of PLASMA_zcgels_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zcgels
PLASMA_zcgels_Tile
PLASMA_dsgels_Tile_Async
PLASMA_zgels_Tile_Async

Definition at line 441 of file zcgels.c.

References A, B, plasma_desc_t::lm, plasma_desc_t::ln, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::n, plasma_desc_t::nb, PLASMA_clag2z, plasma_context_self(), plasma_desc_check(), plasma_desc_init(), plasma_desc_mat_alloc(), plasma_desc_mat_free(), plasma_desc_submatrix(), plasma_dynamic_sync, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_NOT_SUPPORTED, PLASMA_ERR_OUT_OF_RESOURCES, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_3, plasma_parallel_call_4, plasma_parallel_call_5, plasma_parallel_call_9, plasma_pcgelqf(), plasma_pcgeqrf(), plasma_pctrsm(), plasma_pcunmlq(), plasma_pcunmqr(), plasma_pzgelqf(), plasma_pzgemm(), plasma_pzgeqrf(), plasma_pztile_zero(), plasma_pztrsm(), plasma_pzunmlq(), plasma_pzunmqr(), plasma_request_fail(), plasma_shared_alloc(), plasma_shared_free(), PLASMA_SIZE, PLASMA_SUCCESS, PLASMA_zgeadd, PLASMA_zlacpy, PLASMA_zlag2c, PLASMA_zlange, PlasmaComplexDouble, PlasmaComplexFloat, PlasmaInfNorm, PlasmaLeft, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaRealDouble, PlasmaUpper, plasma_request_t::status, plasma_sequence_t::status, and T.

{
int M, N, NRHS, NB, NBNB, MT, NT, NTRHS;
PLASMA_desc descA = *A;
PLASMA_desc descT = *T;
PLASMA_desc descB = *B;
PLASMA_desc descX = *X;
double *work;
const int itermax = 30;
const double bwdmax = 1.0;
const PLASMA_Complex64_t negone = -1.0;
const PLASMA_Complex64_t one = 1.0;
int iiter;
double Anorm, cte, eps, Rnorm, Xnorm;
*ITER=0;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zcgels_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zcgels_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zcgels_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zcgels_Tile", "invalid first descriptor");
}
plasma_error("PLASMA_zcgels_Tile", "invalid second descriptor");
}
plasma_error("PLASMA_zcgels_Tile", "invalid third descriptor");
}
plasma_error("PLASMA_zcgels_Tile", "invalid fourth descriptor");
}
/* Check input arguments */
if (descA.nb != descA.mb || descB.nb != descB.mb || descX.nb != descX.mb) {
plasma_error("PLASMA_zcgels_Tile", "only square tiles supported");
}
if (trans != PlasmaNoTrans) {
plasma_error("PLASMA_zcgels_Tile", "only PlasmaNoTrans supported");
}
/* Quick return - currently NOT equivalent to LAPACK's:
if (min(M, min(N, NRHS)) == 0) {
for (i = 0; i < max(M, N); i++)
for (j = 0; j < NRHS; j++)
B[j*LDB+i] = 0.0;
return PLASMA_SUCCESS;
}
*/
if (0 == 0) {
// START SPECIFIC
/* Set M, M, NRHS, NB, MT, NT & NTRHS */
M = descA.lm;
N = descA.ln;
NRHS = descB.ln;
NB = descA.nb;
NBNB = NB*NB;
MT = (M%NB==0) ? (M/NB) : (M/NB+1);
NT = (N%NB==0) ? (N/NB) : (N/NB+1);
NTRHS = (NRHS%NB==0) ? (NRHS/NB) : (NRHS/NB+1);
printf("M %d, N %d, NRHS %d, NB %d, MT %d, NT %d, NTRHS %d\n", M, N, NRHS, NB, MT, NT, NTRHS);
work = (double *)plasma_shared_alloc(plasma, PLASMA_SIZE, PlasmaRealDouble);
if (work == NULL) {
plasma_error("PLASMA_zcgesv", "plasma_shared_alloc() failed");
plasma_shared_free(plasma, work);
}
NB, NB, NBNB,
M, NRHS, 0, 0, M, NRHS);
if (plasma_desc_mat_alloc(&descR)) {
plasma_error("PLASMA_zcgesv", "plasma_shared_alloc() failed");
plasma_shared_free(plasma, work);
}
NB, NB, NBNB,
M, N, 0, 0, M, N);
IB, NB, IBNB,
M, N, 0, 0, M, N);
NB, NB, NBNB,
M, NRHS, 0, 0, M, NRHS);
/* Allocate memory for single precision matrices in block layout */
if (plasma_desc_mat_alloc(&descSA) || plasma_desc_mat_alloc(&descST) || plasma_desc_mat_alloc(&descSX)) {
plasma_error("PLASMA_zcgesv", "plasma_shared_alloc() failed");
plasma_shared_free(plasma, work);
}
/* Compute some constants */
PLASMA_zlange(PlasmaInfNorm, descA, Anorm, work);
eps = LAPACKE_dlamch_work('e');
printf("Anorm=%e, cte=%e\n", Anorm, cte);
/* Convert B from double precision to single precision and store
the result in SX. */
PLASMA_zlag2c(descB, descSX);
if (sequence->status != PLASMA_SUCCESS)
/* Convert A from double precision to single precision and store
the result in SA. */
PLASMA_zlag2c(descA, descSA);
if (sequence->status != PLASMA_SUCCESS)
if (descSA.m >= descSA.n) {
/* Compute the QR factorization of SA */
printf("Facto\n"); fflush(stdout);
PLASMA_desc, descSA,
PLASMA_desc, descST,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
printf("Solve\n"); fflush(stdout);
PLASMA_desc, descSA,
PLASMA_desc, descSX,
PLASMA_desc, descST,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, plasma_desc_submatrix(descSA, 0, 0, descSA.n, descSA.n),
PLASMA_desc, plasma_desc_submatrix(descSX, 0, 0, descSA.n, descSX.n),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
else {
PLASMA_desc, plasma_desc_submatrix(descSX, descSA.m, 0, descSA.n-descSA.m, descSX.n),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, descSA,
PLASMA_desc, descST,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, plasma_desc_submatrix(descSA, 0, 0, descSA.m, descSA.m),
PLASMA_desc, plasma_desc_submatrix(descSX, 0, 0, descSA.m, descSX.n),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, descSA,
PLASMA_desc, descSX,
PLASMA_desc, descST,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
/* Convert SX back to double precision */
PLASMA_clag2z(descSX, descX);
/* Compute R = B - AX. */
printf("R = B - Ax\n"); fflush(stdout);
printf("R = B - Ax ... cpy\n"); fflush(stdout);
PLASMA_zlacpy(descB,descR);
printf("R = B - Ax ... gemm\n"); fflush(stdout);
PLASMA_desc, descA,
PLASMA_desc, descX,
PLASMA_desc, descR,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* Check whether the NRHS normwise backward error satisfies the
stopping criterion. If yes return. Note that ITER=0 (already set). */
printf("Norm of X and R\n"); fflush(stdout);
PLASMA_zlange(PlasmaInfNorm, descX, Xnorm, work);
PLASMA_zlange(PlasmaInfNorm, descR, Rnorm, work);
/* Wait the end of Anorm, Xnorm and Bnorm computations */
cte = Anorm*eps*((double) N)*bwdmax;
if (Rnorm < Xnorm * cte){
/* The NRHS normwise backward errors satisfy the
stopping criterion. We are good to exit. */
plasma_shared_free(plasma, work);
}
printf("Rnorm=%e, Xnorm * cte=%e, Rnorm=%e, cte=%e\n", Rnorm, Xnorm * cte, Rnorm, cte);
/* Iterative refinement */
for (iiter = 0; iiter < itermax; iiter++){
/* Convert R from double precision to single precision
and store the result in SX. */
PLASMA_zlag2c(descR, descSX);
/* Solve the system SA*SX = SR */
if (descSA.m >= descSA.n) {
PLASMA_desc, descSA,
PLASMA_desc, descSX,
PLASMA_desc, descST,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, plasma_desc_submatrix(descSA, 0, 0, descSA.n, descSA.n),
PLASMA_desc, plasma_desc_submatrix(descSX, 0, 0, descSA.n, descSX.n),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
} else {
PLASMA_desc, plasma_desc_submatrix(descSA, 0, 0, descSA.m, descSA.m),
PLASMA_desc, plasma_desc_submatrix(descSX, 0, 0, descSA.m, descSX.n),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, descSA,
PLASMA_desc, descSX,
PLASMA_desc, descST,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
/* Convert SX back to double precision and update the current
iterate. */
PLASMA_clag2z(descSX, descR);
PLASMA_zgeadd(one, descR, descX);
/* Compute R = B - AX. */
PLASMA_zlacpy(descB,descR);
PLASMA_desc, descA,
PLASMA_desc, descX,
PLASMA_desc, descR,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* Check whether the NRHS normwise backward errors satisfy the
stopping criterion. If yes, set ITER=IITER>0 and return. */
PLASMA_zlange(PlasmaInfNorm, descX, Xnorm, work);
PLASMA_zlange(PlasmaInfNorm, descR, Rnorm, work);
/* Wait the end of Xnorm and Bnorm computations */
printf("Rnorm=%e, Xnorm * cte=%e, Rnorm=%e, cte=%e\n", Rnorm, Xnorm * cte, Rnorm, cte);
if (Rnorm < Xnorm * cte){
/* The NRHS normwise backward errors satisfy the
stopping criterion. We are good to exit. */
*ITER = iiter;
plasma_shared_free(plasma, work);
}
}
/* We have performed ITER=itermax iterations and never satisified
the stopping criterion, set up the ITER flag accordingly and
follow up on double precision routine. */
*ITER = -itermax - 1;
plasma_shared_free(plasma, work);
printf("Go back DOUBLE\n");
// END SPECIFIC
}
/* Single-precision iterative refinement failed to converge to a
satisfactory solution, so we resort to double precision. */
PLASMA_zlacpy(descB, descX);
if (descA.m >= descA.n) {
PLASMA_desc, descA,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, descA,
PLASMA_desc, descX,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, plasma_desc_submatrix(descA, 0, 0, descA.n, descA.n),
PLASMA_desc, plasma_desc_submatrix(descX, 0, 0, descA.n, descX.n),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
else {
PLASMA_desc, plasma_desc_submatrix(descX, descA.m, 0, descA.n-descA.m, descX.n),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, descA,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, plasma_desc_submatrix(descA, 0, 0, descA.m, descA.m),
PLASMA_desc, plasma_desc_submatrix(descX, 0, 0, descA.m, descX.n),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, descA,
PLASMA_desc, descX,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zcgesv_Tile_Async ( PLASMA_desc A,
int *  IPIV,
PLASMA_desc B,
PLASMA_desc X,
int *  ITER,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zcgesv_Tile_Async - Solves a system of linear equations using the tile LU factorization and mixed-precision iterative refinement. Non-blocking equivalent of PLASMA_zcgesv_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zcgesv
PLASMA_zcgesv_Tile
PLASMA_dsgesv_Tile_Async
PLASMA_zgesv_Tile_Async

Definition at line 424 of file zcgesv.c.

References A, B, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::n, plasma_desc_t::nb, plasma_cdesc_alloc, PLASMA_cgetrs, PLASMA_clag2z, plasma_context_self(), plasma_desc_check(), plasma_desc_mat_free(), plasma_dynamic_call_3, plasma_dynamic_call_4, plasma_dynamic_sync, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_OUT_OF_RESOURCES, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_memzero(), plasma_parallel_call_9, plasma_pzgemm(), plasma_request_fail(), plasma_shared_alloc(), plasma_shared_free(), PLASMA_SIZE, PLASMA_SUCCESS, plasma_zdesc_alloc, PLASMA_zgeadd, PLASMA_zgetrs, PLASMA_zlacpy, PLASMA_zlag2c, PLASMA_zlange, PlasmaInfNorm, PlasmaInteger, PlasmaMaxNorm, PlasmaNoTrans, PlasmaRealDouble, plasma_request_t::status, and plasma_sequence_t::status.

{
int N, NB;
PLASMA_desc descA = *A;
PLASMA_desc descB = *B;
PLASMA_desc descX = *X;
PLASMA_desc descR, descSA, descSX;
double *work;
const int itermax = 30;
const double bwdmax = 1.0;
const PLASMA_Complex64_t negone = -1.0;
const PLASMA_Complex64_t one = 1.0;
int iiter;
double Anorm, cte, eps, Rnorm, Xnorm;
*ITER=0;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zcgesv_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zcgesv_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zcgesv_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zcgesv_Tile", "invalid first descriptor");
}
plasma_error("PLASMA_zcgesv_Tile", "invalid third descriptor");
}
plasma_error("PLASMA_zcgesv_Tile", "invalid fourth descriptor");
}
/* Check input arguments */
if (descA.nb != descA.mb || descB.nb != descB.mb || descX.nb != descX.mb) {
plasma_error("PLASMA_zcgesv_Tile", "only square tiles supported");
}
/* Set N, NRHS, NT */
N = descA.m;
NB = descA.nb;
work = (double *)plasma_shared_alloc(plasma, PLASMA_SIZE, PlasmaRealDouble);
if (work == NULL) {
plasma_error("PLASMA_zcgesv", "plasma_shared_alloc() failed");
plasma_shared_free(plasma, work);
}
plasma_zdesc_alloc( descR, NB, NB, descB.m, descB.n, 0, 0, descB.m, descB.n,
plasma_shared_free( plasma, work ); plasma_desc_mat_free(&descR) );
plasma_cdesc_alloc( descSA, NB, NB, descA.m, descA.n, 0, 0, descA.m, descA.n,
plasma_shared_free( plasma, work ); plasma_desc_mat_free(&descR);
plasma_cdesc_alloc( descSX, NB, NB, descX.m, descX.n, 0, 0, descX.m, descX.n,
plasma_shared_free( plasma, work ); plasma_desc_mat_free(&descR);
/* Compute some constants */
PLASMA_zlange(PlasmaInfNorm, descA, Anorm, work);
eps = LAPACKE_dlamch_work('e');
/* Convert B from double precision to single precision and store
the result in SX. */
PLASMA_zlag2c(descB, descSX);
if (sequence->status != PLASMA_SUCCESS)
/* Convert A from double precision to single precision and store
the result in SA. */
PLASMA_zlag2c(descA, descSA);
if (sequence->status != PLASMA_SUCCESS)
/* Clear IPIV and Lbdl */
/* Compute the LU factorization of SA */
plasma_pcbarrier_tl2pnl,
PLASMA_desc, descSA,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
plasma_dynamic_call_4(plasma_pcgetrf_rectil,
PLASMA_desc, descSA,
int*, IPIV,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* Solve the system SA*SX = SB */
PLASMA_cgetrs(descSA, IPIV, descSX);
/* Convert SX back to double precision */
PLASMA_clag2z(descSX, descX);
/* Compute R = B - AX. */
PLASMA_zlacpy(descB, descR);
PLASMA_desc, descA,
PLASMA_desc, descX,
PLASMA_desc, descR,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* Check whether the NRHS normwise backward error satisfies the
stopping criterion. If yes return. Note that ITER=0 (already set). */
PLASMA_zlange(PlasmaMaxNorm, descX, Xnorm, work);
PLASMA_zlange(PlasmaMaxNorm, descR, Rnorm, work);
/* Wait the end of Anorm, Xnorm and Bnorm computations */
cte = Anorm*eps*sqrt((double) N)*bwdmax;
if (Rnorm < Xnorm * cte){
/* The NRHS normwise backward errors satisfy the
stopping criterion. We are good to exit. */
plasma_shared_free(plasma, work);
}
/* Iterative refinement */
for (iiter = 0; iiter < itermax; iiter++){
/* Convert R from double precision to single precision
and store the result in SX. */
PLASMA_zlag2c(descR, descSX);
/* Solve the system SA*SX = SB */
PLASMA_cgetrs(descSA, IPIV, descSX);
/* Convert SX back to double precision and update the current
iterate. */
PLASMA_clag2z(descSX, descR);
PLASMA_zgeadd(one, descR, descX);
/* Compute R = B - AX. */
PLASMA_zlacpy(descB,descR);
PLASMA_desc, descA,
PLASMA_desc, descX,
PLASMA_desc, descR,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* Check whether the NRHS normwise backward errors satisfy the
stopping criterion. If yes, set ITER=IITER>0 and return. */
PLASMA_zlange(PlasmaInfNorm, descX, Xnorm, work);
PLASMA_zlange(PlasmaInfNorm, descR, Rnorm, work);
/* Wait the end of Xnorm and Bnorm computations */
if (Rnorm < Xnorm * cte){
/* The NRHS normwise backward errors satisfy the
stopping criterion. We are good to exit. */
*ITER = iiter;
plasma_shared_free(plasma, work);
}
}
/* We have performed ITER=itermax iterations and never satisified
the stopping criterion, set up the ITER flag accordingly and
follow up on double precision routine. */
*ITER = -itermax - 1;
plasma_shared_free(plasma, work);
/* Single-precision iterative refinement failed to converge to a
satisfactory solution, so we resort to double precision. */
/* Clear IPIV and Lbdl */
plasma_dynamic_call_4(plasma_pzgetrf_rectil,
PLASMA_desc, descA,
int*, IPIV,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_zlacpy(descB, descX);
PLASMA_zgetrs(descA, IPIV, descX);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zcposv_Tile_Async ( PLASMA_enum  uplo,
PLASMA_desc A,
PLASMA_desc B,
PLASMA_desc X,
int *  ITER,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zcposv_Tile_Async - Solves a symmetric positive definite or Hermitian positive definite system of linear equations using the Cholesky factorization and mixed-precision iterative refinement. Non-blocking equivalent of PLASMA_zcposv_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zcposv
PLASMA_zcposv_Tile
PLASMA_dsposv_Tile_Async
PLASMA_zposv_Tile_Async

Definition at line 374 of file zcposv.c.

References A, B, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::n, plasma_desc_t::nb, plasma_cdesc_alloc, PLASMA_clag2z, plasma_context_self(), plasma_desc_check(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_OUT_OF_RESOURCES, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_4, plasma_parallel_call_9, plasma_pcpotrf(), plasma_pctrsm(), plasma_pzhemm(), plasma_pzpotrf(), plasma_pztrsm(), plasma_request_fail(), plasma_shared_alloc(), plasma_shared_free(), PLASMA_SIZE, PLASMA_SUCCESS, plasma_zdesc_alloc, PLASMA_zgeadd, PLASMA_zlacpy, PLASMA_zlag2c, PLASMA_zlange, PLASMA_zlanhe, PlasmaConjTrans, PlasmaInfNorm, PlasmaLeft, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaRealDouble, PlasmaUpper, plasma_request_t::status, and plasma_sequence_t::status.

{
int N, NB;
PLASMA_desc descA = *A;
PLASMA_desc descB = *B;
PLASMA_desc descX = *X;
double *work;
PLASMA_desc descR, descSA, descSX;
const int itermax = 30;
const double bwdmax = 1.0;
const PLASMA_Complex64_t negone = -1.0;
const PLASMA_Complex64_t one = 1.0;
int iiter;
double Anorm, cte, eps, Rnorm, Xnorm;
*ITER=0;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zcposv_Tile_Async", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zcposv_Tile_Async", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zcposv_Tile_Async", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zcposv_Tile_Async", "invalid first descriptor");
}
plasma_error("PLASMA_zcposv_Tile_Async", "invalid second descriptor");
}
plasma_error("PLASMA_zcposv_Tile_Async", "invalid third descriptor");
}
/* Check input arguments */
if (descA.nb != descA.mb || descB.nb != descB.mb || descX.nb != descX.mb) {
plasma_error("PLASMA_zcposv_Tile_Async", "only square tiles supported");
}
if (uplo != PlasmaUpper && uplo != PlasmaLower) {
plasma_error("PLASMA_zcposv_Tile_Async", "illegal value of uplo");
return -1;
}
/* Quick return - currently NOT equivalent to LAPACK's
* LAPACK does not have such check for DPOSV */
/*
if (min(N, NRHS) == 0)
return PLASMA_SUCCESS;
*/
/* Set N, NRHS */
N = descA.m;
NB = descA.nb;
work = (double *)plasma_shared_alloc(plasma, PLASMA_SIZE, PlasmaRealDouble);
if (work == NULL) {
plasma_error("PLASMA_zcposv_Tile_Async", "plasma_shared_alloc() failed");
plasma_shared_free(plasma, work);
}
plasma_zdesc_alloc( descR, NB, NB, descB.m, descB.n, 0, 0, descB.m, descB.n, plasma_shared_free( plasma, work ); plasma_desc_mat_free(&descR) );
plasma_cdesc_alloc( descSA, NB, NB, descA.m, descA.n, 0, 0, descA.m, descA.n, plasma_shared_free( plasma, work ); plasma_desc_mat_free(&descR); plasma_desc_mat_free(&descSA) );
plasma_cdesc_alloc( descSX, NB, NB, descX.m, descX.n, 0, 0, descX.m, descX.n, plasma_shared_free( plasma, work ); plasma_desc_mat_free(&descR); plasma_desc_mat_free(&descSA); plasma_desc_mat_free(&descSX) );
/* Compute some constants */
PLASMA_zlanhe(PlasmaInfNorm, uplo, descA, Anorm, work);
eps = LAPACKE_dlamch_work('e');
/* Convert B from double precision to single precision and store
the result in SX. */
PLASMA_zlag2c(descB, descSX);
if (sequence->status != PLASMA_SUCCESS)
/* Convert A from double precision to single precision and store
the result in SA. */
PLASMA_zlag2c(descA, descSA);
if (sequence->status != PLASMA_SUCCESS)
/* Compute the Cholesky factorization of SA */
PLASMA_desc, descSA,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* Solve the system SA*SX = SB */
/* Forward substitution */
PLASMA_desc, descSA,
PLASMA_desc, descSX,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* Backward substitution */
PLASMA_desc, descSA,
PLASMA_desc, descSX,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* Convert SX back to double precision */
PLASMA_clag2z(descSX, descX);
/* Compute R = B - AX. */
PLASMA_zlacpy(descB,descR);
PLASMA_desc, descA,
PLASMA_desc, descX,
PLASMA_desc, descR,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* Check whether the NRHS normwise backward error satisfies the
stopping criterion. If yes return. Note that ITER=0 (already set). */
PLASMA_zlange(PlasmaInfNorm, descX, Xnorm, work);
PLASMA_zlange(PlasmaInfNorm, descR, Rnorm, work);
/* Wait the end of Anorm, Xnorm and Bnorm computations */
cte = Anorm*eps*((double) N)*bwdmax;
if (Rnorm < Xnorm * cte){
/* The NRHS normwise backward errors satisfy the
stopping criterion. We are good to exit. */
plasma_shared_free(plasma, work);
}
/* Iterative refinement */
for (iiter = 0; iiter < itermax; iiter++){
/* Convert R from double precision to single precision
and store the result in SX. */
PLASMA_zlag2c(descR, descSX);
/* Solve the system SA*SX = SR */
/* Forward substitution */
PLASMA_desc, descSA,
PLASMA_desc, descSX,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* Backward substitution */
PLASMA_desc, descSA,
PLASMA_desc, descSX,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* Convert SX back to double precision and update the current
iterate. */
PLASMA_clag2z(descSX, descR);
PLASMA_zgeadd(one, descR, descX);
/* Compute R = B - AX. */
PLASMA_zlacpy(descB,descR);
PLASMA_desc, descA,
PLASMA_desc, descX,
PLASMA_desc, descR,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* Check whether the NRHS normwise backward errors satisfy the
stopping criterion. If yes, set ITER=IITER>0 and return. */
PLASMA_zlange(PlasmaInfNorm, descX, Xnorm, work);
PLASMA_zlange(PlasmaInfNorm, descR, Rnorm, work);
/* Wait the end of Xnorm and Bnorm computations */
if (Rnorm < Xnorm * cte){
/* The NRHS normwise backward errors satisfy the
stopping criterion. We are good to exit. */
*ITER = iiter;
plasma_shared_free(plasma, work);
}
}
/* We have performed ITER=itermax iterations and never satisified
the stopping criterion, set up the ITER flag accordingly and
follow up on double precision routine. */
*ITER = -itermax - 1;
plasma_shared_free(plasma, work);
/* Single-precision iterative refinement failed to converge to a
satisfactory solution, so we resort to double precision. */
PLASMA_desc, descA,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_zlacpy(descB,descX);
PLASMA_desc, descA,
PLASMA_desc, descX,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, descA,
PLASMA_desc, descX,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zcungesv_Tile_Async ( PLASMA_enum  trans,
PLASMA_desc A,
PLASMA_desc T,
PLASMA_desc B,
PLASMA_desc X,
int *  ITER,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zcungesv_Tile_Async - Solves symmetric linear system of equations using the tile QR or the tile LQ factorization and mixed-precision iterative refinement. Non-blocking equivalent of PLASMA_zcungesv_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zcungesv
PLASMA_zcungesv_Tile
PLASMA_dsungesv_Tile_Async
PLASMA_zgels_Tile_Async

Definition at line 378 of file zcungesv.c.

References A, B, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::n, plasma_desc_t::nb, plasma_cdesc_alloc, PLASMA_clag2z, plasma_context_self(), plasma_desc_check(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_NOT_SUPPORTED, PLASMA_ERR_OUT_OF_RESOURCES, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_4, plasma_parallel_call_7, plasma_parallel_call_9, plasma_pcgeqrf(), plasma_pctrsm(), plasma_pcunmqr(), plasma_pzgemm(), plasma_pzgeqrf(), plasma_pztrsm(), plasma_pzunmqr(), plasma_request_fail(), plasma_shared_alloc(), plasma_shared_free(), PLASMA_SIZE, PLASMA_SUCCESS, plasma_zdesc_alloc, PLASMA_zgeadd, PLASMA_zlacpy, PLASMA_zlag2c, PLASMA_zlange, PlasmaConjTrans, PlasmaInfNorm, PlasmaLeft, PlasmaNonUnit, PlasmaNoTrans, PlasmaRealDouble, PlasmaUpper, plasma_request_t::status, plasma_sequence_t::status, and T.

{
int N, NB, IB;
PLASMA_desc descA = *A;
PLASMA_desc descT = *T;
PLASMA_desc descB = *B;
PLASMA_desc descX = *X;
PLASMA_desc descR, descSA, descST, descSX;
double *work;
const int itermax = 30;
const double bwdmax = 1.0;
const PLASMA_Complex64_t negone = -1.0;
const PLASMA_Complex64_t one = 1.0;
int iiter;
double Anorm, cte, eps, Rnorm, Xnorm;
*ITER=0;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zcungesv_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zcungesv_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zcungesv_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zcungesv_Tile", "invalid first descriptor");
}
plasma_error("PLASMA_zcungesv_Tile", "invalid second descriptor");
}
plasma_error("PLASMA_zcungesv_Tile", "invalid third descriptor");
}
plasma_error("PLASMA_zcungesv_Tile", "invalid fourth descriptor");
}
/* Check input arguments */
if ( (descA.nb != descA.mb) || (descB.nb != descB.mb) || (descX.nb != descX.mb) ||
(descA.mb != descB.mb) || (descB.mb != descX.mb) ) {
plasma_error("PLASMA_zcungesv_Tile", "only square tiles of same size are supported");
}
if (trans != PlasmaNoTrans) {
plasma_error("PLASMA_zcungesv_Tile", "only PlasmaNoTrans supported");
}
/* Set N, NRHS, NB */
N = descA.m;
NB = descA.nb;
IB = descT.mb;
work = (double *)plasma_shared_alloc(plasma, PLASMA_SIZE, PlasmaRealDouble);
if (work == NULL) {
plasma_error("PLASMA_zcungesv", "plasma_shared_alloc() failed");
plasma_shared_free(plasma, work);
}
plasma_zdesc_alloc( descR, NB, NB, descB.m, descB.n, 0, 0, descB.m, descB.n, plasma_shared_free( plasma, work ); plasma_desc_mat_free(&descR) );
plasma_cdesc_alloc( descSA, NB, NB, descA.m, descA.n, 0, 0, descA.m, descA.n, plasma_shared_free( plasma, work ); plasma_desc_mat_free(&descR); plasma_desc_mat_free(&descSA) );
plasma_cdesc_alloc( descST, IB, NB, descT.m, descT.n, 0, 0, descT.m, descT.n, plasma_shared_free( plasma, work ); plasma_desc_mat_free(&descR); plasma_desc_mat_free(&descSA); plasma_desc_mat_free(&descST) );
plasma_cdesc_alloc( descSX, NB, NB, descX.m, descX.n, 0, 0, descX.m, descX.n, plasma_shared_free( plasma, work ); plasma_desc_mat_free(&descR); plasma_desc_mat_free(&descSA); plasma_desc_mat_free(&descST); plasma_desc_mat_free(&descSX) );
/* Compute some constants */
PLASMA_zlange(PlasmaInfNorm, descA, Anorm, work);
eps = LAPACKE_dlamch_work('e');
/* Convert B from double precision to single precision and store
the result in SX. */
PLASMA_zlag2c(descB, descSX);
if (sequence->status != PLASMA_SUCCESS)
/* Convert A from double precision to single precision and store
the result in SA. */
PLASMA_zlag2c(descA, descSA);
if (sequence->status != PLASMA_SUCCESS)
/* Compute the QR factorization of SA */
PLASMA_desc, descSA,
PLASMA_desc, descST,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* Compute the solve in simple */
PLASMA_desc, descSA,
PLASMA_desc, descSX,
PLASMA_desc, descST,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, descSA,
PLASMA_desc, descSX,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* Convert SX back to double precision */
PLASMA_clag2z(descSX, descX);
/* Compute R = B - AX. */
PLASMA_zlacpy(descB, descR);
PLASMA_desc, descA,
PLASMA_desc, descX,
PLASMA_desc, descR,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* Check whether the NRHS normwise backward error satisfies the
stopping criterion. If yes return. Note that ITER=0 (already set). */
PLASMA_zlange(PlasmaInfNorm, descX, Xnorm, work);
PLASMA_zlange(PlasmaInfNorm, descR, Rnorm, work);
/* Wait the end of Anorm, Xnorm and Bnorm computations */
cte = Anorm*eps*((double) N)*bwdmax;
if (Rnorm < Xnorm * cte){
/* The NRHS normwise backward errors satisfy the
stopping criterion. We are good to exit. */
plasma_shared_free(plasma, work);
}
/* Iterative refinement */
for (iiter = 0; iiter < itermax; iiter++){
/* Convert R from double precision to single precision
and store the result in SX. */
PLASMA_zlag2c(descR, descSX);
PLASMA_desc, descSA,
PLASMA_desc, descSX,
PLASMA_desc, descST,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, descSA,
PLASMA_desc, descSX,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* Convert SX back to double precision and update the current
iterate. */
PLASMA_clag2z(descSX, descR);
PLASMA_zgeadd(one, descR, descX);
/* Compute R = B - AX. */
PLASMA_zlacpy(descB,descR);
PLASMA_desc, descA,
PLASMA_desc, descX,
PLASMA_desc, descR,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* Check whether the NRHS normwise backward errors satisfy the
stopping criterion. If yes, set ITER=IITER>0 and return. */
PLASMA_zlange(PlasmaInfNorm, descX, Xnorm, work);
PLASMA_zlange(PlasmaInfNorm, descR, Rnorm, work);
/* Wait the end of Xnorm and Bnorm computations */
if (Rnorm < Xnorm * cte){
/* The NRHS normwise backward errors satisfy the
stopping criterion. We are good to exit. */
*ITER = iiter;
plasma_shared_free(plasma, work);
}
}
/* We have performed ITER=itermax iterations and never satisified
the stopping criterion, set up the ITER flag accordingly and
follow up on double precision routine. */
*ITER = -itermax - 1;
plasma_shared_free(plasma, work);
/* Single-precision iterative refinement failed to converge to a
satisfactory solution, so we restart to double precision. */
PLASMA_zlacpy(descB, descX);
PLASMA_desc, descA,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, descA,
PLASMA_desc, descX,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, descA,
PLASMA_desc, descX,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgebrd_Tile_Async ( PLASMA_enum  jobu,
PLASMA_enum  jobvt,
PLASMA_desc A,
double *  D,
double *  E,
PLASMA_desc U,
PLASMA_desc VT,
PLASMA_desc T,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zgebrd_Tile_Async - computes the singular value decomposition (SVD) of a complex M-by-N matrix A, optionally computing the left and/or right singular vectors. Non-blocking equivalent of PLASMA_zgebrd_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zgebrd
PLASMA_zgebrd_Tile
PLASMA_cgebrd_Tile_Async
PLASMA_dgebrd_Tile_Async
PLASMA_sgebrd_Tile_Async

Definition at line 383 of file zgebrd.c.

References A, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::n, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), plasma_desc_submatrix(), plasma_dynamic_call_4, plasma_dynamic_call_5, plasma_dynamic_call_7, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_NOT_SUPPORTED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_pzgerbb(), plasma_request_fail(), PLASMA_SUCCESS, PlasmaLower, PlasmaNoVec, PlasmaUpper, PlasmaVec, plasma_request_t::status, plasma_sequence_t::status, and T.

{
PLASMA_desc descA = *A;
PLASMA_desc descT = *T;
plasma = plasma_context_self();
if (jobu != PlasmaNoVec && jobu !=PlasmaVec) {
plasma_error("PLASMA_zgebrd_Tile_Async", "illegal value of jobu");
}
if (jobvt != PlasmaNoVec && jobvt != PlasmaVec) {
plasma_error("PLASMA_zgebrd_Tile_Async", "illegal value of jobvt");
}
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zgebrd_Tile_Async", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zgebrd_Tile_Async", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zgebrd_Tile_Async", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zgebrd_Tile_Async", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if ((jobu != PlasmaNoVec) && (plasma_desc_check(U) != PLASMA_SUCCESS)) {
plasma_error("PLASMA_zgebrd_Tile_Async", "invalid third descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if ((jobvt != PlasmaNoVec) && (plasma_desc_check(VT) != PLASMA_SUCCESS) ) {
plasma_error("PLASMA_zgebrd_Tile_Async", "invalid fourth descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zgebrd_Tile_Async", "invalid fifth descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb) {
plasma_error("PLASMA_zgebrd_Tile_Async", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (( (jobu != PlasmaNoVec) && (U->nb != U->mb) ) || ( (jobvt != PlasmaNoVec) && (VT->nb != VT->mb) )) {
plasma_error("PLASMA_zgebrd_Tile_Async", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if ((jobu == PlasmaVec) || (jobvt == PlasmaVec) ){
plasma_error("PLASMA_zgebrd_Tile_Async", "computing the singular vectors is not supported in this version");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Reduction to bidiagonal form
* with a two-stage approach.
*/
/* Reduction to BAND bidiagonal form
* May be further optimized using the algo described in Trefethen
*/
/* if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) { */
PLASMA_desc, descA,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* } */
/* else { */
/* plasma_dynamic_call_4(plasma_pzgerbbrh, */
/* PLASMA_desc, descA, */
/* PLASMA_desc, descT, */
/* PLASMA_sequence*, sequence, */
/* PLASMA_request*, request); */
/* } */
/* Build the U of the first stage */
/* if (jobu == PlasmaVec){ */
/* /\* Initialize U to Identity *\/ */
/* plasma_dynamic_call_6(plasma_pzlaset, */
/* PLASMA_enum, PlasmaUpperLower, */
/* PLASMA_Complex64_t, 0.0, */
/* PLASMA_Complex64_t, 1.0, */
/* PLASMA_desc, descU, */
/* PLASMA_sequence*, sequence, */
/* PLASMA_request*, request); */
/* /\* Accumulate the transformations from the first stage *\/ */
/* plasma_dynamic_call_6(plasma_pzungbr, */
/* PLASMA_enum, PlasmaLeft, */
/* PLASMA_desc, descA, */
/* PLASMA_desc, descU, */
/* PLASMA_desc, descT, */
/* PLASMA_sequence*, sequence, */
/* PLASMA_request*, request); */
/* } */
/* Build the VT of the first stage */
/* if (jobvt == PlasmaVec){ */
/* /\* Initialize VT to Identity *\/ */
/* plasma_dynamic_call_6(plasma_pzlaset, */
/* PLASMA_enum, PlasmaUpperLower, */
/* PLASMA_Complex64_t, 0.0, */
/* PLASMA_Complex64_t, 1.0, */
/* PLASMA_desc, descVT, */
/* PLASMA_sequence*, sequence, */
/* PLASMA_request*, request); */
/* /\* Accumulate the transformations from the first stage *\/ */
/* plasma_dynamic_call_6(plasma_pzungbr, */
/* PLASMA_enum, PlasmaRight, */
/* PLASMA_desc, descA, */
/* PLASMA_desc, descVT, */
/* PLASMA_desc, descT, */
/* PLASMA_sequence*, sequence, */
/* PLASMA_request*, request); */
/* } */
/* Set the V's to zero before the 2nd stage i.e., bulge chasing */
plasma_dynamic_call_5(plasma_pzlaset2,
PLASMA_desc, descA.m >= descA.n ? descA : plasma_desc_submatrix(descA, descA.mb, 0, descA.m-descA.mb, descA.n),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
plasma_dynamic_call_5(plasma_pzlaset2,
PLASMA_desc, descA.m >= descA.n ? plasma_desc_submatrix(descA, 0, descA.nb, descA.m, descA.n-descA.nb) : descA,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* Reduction from BAND bidiagonal to the final condensed form */
plasma_dynamic_call_7(plasma_pzgbrdb,
PLASMA_enum, descA.m >= descA.n ? PlasmaUpper : PlasmaLower,
PLASMA_desc, descA,
double*, D,
double*, E,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/*
*/
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgelqf_Tile_Async ( PLASMA_desc A,
PLASMA_desc T,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zgelqf_Tile_Async - Computes the tile LQ factorization of a matrix. Non-blocking equivalent of PLASMA_zgelqf_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zgelqf
PLASMA_zgelqf_Tile
PLASMA_cgelqf_Tile_Async
PLASMA_dgelqf_Tile_Async
PLASMA_sgelqf_Tile_Async
PLASMA_zgelqs_Tile_Async

Definition at line 235 of file zgelqf.c.

References A, plasma_context_struct::householder, plasma_desc_t::mb, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), plasma_dynamic_call_5, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, plasma_parallel_call_4, plasma_pzgelqf(), plasma_request_fail(), PLASMA_RHBLK, PLASMA_SUCCESS, plasma_request_t::status, plasma_sequence_t::status, and T.

{
PLASMA_desc descA = *A;
PLASMA_desc descT = *T;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zgelqf_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zgelqf_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zgelqf_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zgelqf_Tile", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zgelqf_Tile", "invalid second descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb) {
plasma_error("PLASMA_zgelqf_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Quick return */
/*
if (min(M, N) == 0)
return PLASMA_SUCCESS;
*/
PLASMA_desc, descA,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
else {
plasma_dynamic_call_5(plasma_pzgelqfrh,
PLASMA_desc, descA,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgelqs_Tile_Async ( PLASMA_desc A,
PLASMA_desc T,
PLASMA_desc B,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zgelqs_Tile_Async - Computes a minimum-norm solution using previously computed LQ factorization. Non-blocking equivalent of PLASMA_zgelqs_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zgelqs
PLASMA_zgelqs_Tile
PLASMA_cgelqs_Tile_Async
PLASMA_dgelqs_Tile_Async
PLASMA_sgelqs_Tile_Async
PLASMA_zgelqf_Tile_Async

Definition at line 256 of file zgelqs.c.

References A, B, plasma_context_struct::householder, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::n, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), plasma_desc_submatrix(), plasma_dynamic_call_8, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, plasma_parallel_call_3, plasma_parallel_call_7, plasma_parallel_call_9, plasma_pztile_zero(), plasma_pztrsm(), plasma_pzunmlq(), plasma_request_fail(), PLASMA_RHBLK, PLASMA_SUCCESS, PlasmaConjTrans, PlasmaLeft, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, plasma_request_t::status, plasma_sequence_t::status, and T.

{
PLASMA_desc descA = *A;
PLASMA_desc descT = *T;
PLASMA_desc descB = *B;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zgelqs_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zgelqs_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zgelqs_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zgelqs_Tile", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zgelqs_Tile", "invalid second descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zgelqs_Tile", "invalid third descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb || descB.nb != descB.mb) {
plasma_error("PLASMA_zgelqs_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Quick return */
/*
if (min(M, min(N, NRHS)) == 0) {
return PLASMA_SUCCESS;
}
*/
PLASMA_desc, plasma_desc_submatrix(descB, descA.m, 0, descA.n-descA.m, descB.n),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, plasma_desc_submatrix(descA, 0, 0, descA.m, descA.m),
PLASMA_desc, plasma_desc_submatrix(descB, 0, 0, descA.m, descB.n),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
else {
plasma_dynamic_call_8(plasma_pzunmlqrh,
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgels_Tile_Async ( PLASMA_enum  trans,
PLASMA_desc A,
PLASMA_desc T,
PLASMA_desc B,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zgels_Tile_Async - Solves overdetermined or underdetermined linear system of equations using the tile QR or the tile LQ factorization. Non-blocking equivalent of PLASMA_zgels_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zgels
PLASMA_zgels_Tile
PLASMA_cgels_Tile_Async
PLASMA_dgels_Tile_Async
PLASMA_sgels_Tile_Async

Definition at line 316 of file zgels.c.

References A, B, plasma_context_struct::householder, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::n, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), plasma_desc_submatrix(), plasma_dynamic_call_5, plasma_dynamic_call_8, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_NOT_SUPPORTED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, plasma_parallel_call_3, plasma_parallel_call_4, plasma_parallel_call_7, plasma_parallel_call_9, plasma_pzgelqf(), plasma_pzgeqrf(), plasma_pztile_zero(), plasma_pztrsm(), plasma_pzunmlq(), plasma_pzunmqr(), plasma_request_fail(), PLASMA_RHBLK, PLASMA_SUCCESS, PlasmaConjTrans, PlasmaLeft, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaUpper, plasma_request_t::status, plasma_sequence_t::status, and T.

{
PLASMA_desc descA = *A;
PLASMA_desc descT = *T;
PLASMA_desc descB = *B;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zgels_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zgels_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zgels_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zgels_Tile", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zgels_Tile", "invalid second descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zgels_Tile", "invalid third descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb || descB.nb != descB.mb) {
plasma_error("PLASMA_zgels_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (trans != PlasmaNoTrans) {
plasma_error("PLASMA_zgels_Tile", "only PlasmaNoTrans supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_NOT_SUPPORTED);
}
/* Quick return - currently NOT equivalent to LAPACK's:
if (min(M, min(N, NRHS)) == 0) {
for (i = 0; i < max(M, N); i++)
for (j = 0; j < NRHS; j++)
B[j*LDB+i] = 0.0;
return PLASMA_SUCCESS;
}
*/
if (descA.m >= descA.n) {
PLASMA_desc, descA,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
else {
plasma_dynamic_call_5(plasma_pzgeqrfrh,
PLASMA_desc, descA,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
plasma_dynamic_call_8(plasma_pzunmqrrh,
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
PLASMA_desc, plasma_desc_submatrix(descA, 0, 0, descA.n, descA.n),
PLASMA_desc, plasma_desc_submatrix(descB, 0, 0, descA.n, descB.n),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
else {
PLASMA_desc, plasma_desc_submatrix(descB, descA.m, 0, descA.n-descA.m, descB.n),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, descA,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
else {
plasma_dynamic_call_5(plasma_pzgelqfrh,
PLASMA_desc, descA,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
PLASMA_desc, plasma_desc_submatrix(descA, 0, 0, descA.m, descA.m),
PLASMA_desc, plasma_desc_submatrix(descB, 0, 0, descA.m, descB.n),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
else {
plasma_dynamic_call_8(plasma_pzunmlqrh,
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgemm_Tile_Async ( PLASMA_enum  transA,
PLASMA_enum  transB,
PLASMA_Complex64_t  alpha,
PLASMA_desc A,
PLASMA_desc B,
PLASMA_Complex64_t  beta,
PLASMA_desc C,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zgemm_Tile_Async - Performs matrix multiplication. Non-blocking equivalent of PLASMA_zgemm_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zgemm
PLASMA_zgemm_Tile
PLASMA_cgemm_Tile_Async
PLASMA_dgemm_Tile_Async
PLASMA_sgemm_Tile_Async

Definition at line 313 of file zgemm.c.

References A, B, C, plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::n, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_9, plasma_pzgemm(), plasma_request_fail(), PLASMA_SUCCESS, PlasmaConjTrans, PlasmaNoTrans, PlasmaTrans, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
PLASMA_desc descB = *B;
PLASMA_desc descC = *C;
int M, N, K;
int Am, An, Ai, Aj, Amb, Anb;
int Bm, Bn, Bi, Bj, Bmb, Bnb;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zgemm_Tile_Async", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zgemm_Tile_Async", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zgemm_Tile_Async", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zgemm_Tile_Async", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zgemm_Tile_Async", "invalid second descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zgemm_Tile_Async", "invalid third descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if ((transA != PlasmaNoTrans) && (transA != PlasmaTrans) && (transA != PlasmaConjTrans)) {
plasma_error("PLASMA_zgemm_Tile_Async", "illegal value of transA");
return plasma_request_fail(sequence, request, -1);
}
if ((transB != PlasmaNoTrans) && (transB != PlasmaTrans) && (transB != PlasmaConjTrans)) {
plasma_error("PLASMA_zgemm_Tile_Async", "illegal value of transB");
return plasma_request_fail(sequence, request, -2);
}
if ( transA == PlasmaNoTrans ) {
Am = descA.m;
An = descA.n;
Amb = descA.mb;
Anb = descA.nb;
Ai = descA.i;
Aj = descA.j;
} else {
Am = descA.n;
An = descA.m;
Amb = descA.nb;
Anb = descA.mb;
Ai = descA.j;
Aj = descA.i;
}
if ( transB == PlasmaNoTrans ) {
Bm = descB.m;
Bn = descB.n;
Bmb = descB.mb;
Bnb = descB.nb;
Bi = descB.i;
Bj = descB.j;
} else {
Bm = descB.n;
Bn = descB.m;
Bmb = descB.nb;
Bnb = descB.mb;
Bi = descB.j;
Bj = descB.i;
}
if ( (Amb != descC.mb) || (Anb != Bmb) || (Bnb != descC.nb) ) {
plasma_error("PLASMA_zgemm_Tile_Async", "tile sizes have to match");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if ( (Am != descC.m) || (An != Bm) || (Bn != descC.n) ) {
plasma_error("PLASMA_zgemm_Tile_Async", "sizes of matrices have to match");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if ( (Ai != descC.i) || (Aj != Bi) || (Bj != descC.j) ) {
plasma_error("PLASMA_zgemm_Tile_Async", "start indexes have to match");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
M = descC.m;
N = descC.n;
K = An;
/* Quick return */
if (M == 0 || N == 0 ||
((alpha == (PLASMA_Complex64_t)0.0 || K == 0) && beta == (PLASMA_Complex64_t)1.0))
PLASMA_enum, transA,
PLASMA_enum, transB,
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_desc, descC,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgeqrf_Tile_Async ( PLASMA_desc A,
PLASMA_desc T,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zgeqrf_Tile_Async - Computes the tile QR factorization of a matrix. Non-blocking equivalent of PLASMA_zgeqrf_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zgeqrf
PLASMA_zgeqrf_Tile
PLASMA_cgeqrf_Tile_Async
PLASMA_dgeqrf_Tile_Async
PLASMA_sgeqrf_Tile_Async
PLASMA_zgeqrs_Tile_Async

Definition at line 234 of file zgeqrf.c.

References A, plasma_context_struct::householder, plasma_desc_t::mb, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), plasma_dynamic_call_5, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, plasma_parallel_call_4, plasma_pzgeqrf(), plasma_request_fail(), PLASMA_RHBLK, PLASMA_SUCCESS, plasma_request_t::status, plasma_sequence_t::status, and T.

{
PLASMA_desc descA = *A;
PLASMA_desc descT = *T;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_error("PLASMA_zgeqrf_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zgeqrf_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zgeqrf_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zgeqrf_Tile", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zgeqrf_Tile", "invalid second descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb) {
plasma_error("PLASMA_zgeqrf_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Quick return */
/*
if (min(M, N) == 0)
return PLASMA_SUCCESS;
*/
PLASMA_desc, descA,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
else {
plasma_dynamic_call_5(plasma_pzgeqrfrh,
PLASMA_desc, descA,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgeqrs_Tile_Async ( PLASMA_desc A,
PLASMA_desc T,
PLASMA_desc B,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zgeqrs_Tile_Async - Computes a minimum-norm solution using the tile QR factorization. Non-blocking equivalent of PLASMA_zgeqrs_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zgeqrs
PLASMA_zgeqrs_Tile
PLASMA_cgeqrs_Tile_Async
PLASMA_dgeqrs_Tile_Async
PLASMA_sgeqrs_Tile_Async
PLASMA_zgeqrf_Tile_Async

Definition at line 255 of file zgeqrs.c.

References A, B, plasma_context_struct::householder, plasma_desc_t::mb, plasma_desc_t::n, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), plasma_desc_submatrix(), plasma_dynamic_call_8, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, plasma_parallel_call_7, plasma_parallel_call_9, plasma_pztrsm(), plasma_pzunmqr(), plasma_request_fail(), PLASMA_RHBLK, PLASMA_SUCCESS, PlasmaConjTrans, PlasmaLeft, PlasmaNonUnit, PlasmaNoTrans, PlasmaUpper, plasma_request_t::status, plasma_sequence_t::status, and T.

{
PLASMA_desc descA = *A;
PLASMA_desc descT = *T;
PLASMA_desc descB = *B;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zgeqrs_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zgeqrs_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zgeqrs_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zgeqrs_Tile", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zgeqrs_Tile", "invalid second descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zgeqrs_Tile", "invalid third descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb || descB.nb != descB.mb) {
plasma_error("PLASMA_zgeqrs_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Quick return */
/*
if (min(M, min(N, NRHS)) == 0) {
return PLASMA_SUCCESS;
}
*/
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
else {
plasma_dynamic_call_8(plasma_pzunmqrrh,
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
PLASMA_desc, plasma_desc_submatrix(descA, 0, 0, descA.n, descA.n),
PLASMA_desc, plasma_desc_submatrix(descB, 0, 0, descA.n, descB.n),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgesv_incpiv_Tile_Async ( PLASMA_desc A,
PLASMA_desc L,
int *  IPIV,
PLASMA_desc B,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zgesv_incpiv_Tile_Async - Solves a system of linear equations using the tile LU factorization. Non-blocking equivalent of PLASMA_zgesv_incpiv_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zgesv_incpiv
PLASMA_zgesv_incpiv_Tile
PLASMA_cgesv_incpiv_Tile_Async
PLASMA_dgesv_incpiv_Tile_Async
PLASMA_sgesv_incpiv_Tile_Async
PLASMA_zcgesv_Tile_Async

Definition at line 252 of file zgesv_incpiv.c.

References A, B, L, plasma_desc_t::mb, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_5, plasma_parallel_call_6, plasma_parallel_call_9, plasma_pzgetrf_incpiv(), plasma_pztrsm(), plasma_pztrsmpl(), plasma_request_fail(), PLASMA_SUCCESS, PlasmaLeft, PlasmaNonUnit, PlasmaNoTrans, PlasmaUpper, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
PLASMA_desc descL = *L;
PLASMA_desc descB = *B;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zgesv_incpiv_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zgesv_incpiv_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zgesv_incpiv_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zgesv_incpiv_Tile", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zgesv_incpiv_Tile", "invalid second descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zgesv_incpiv_Tile", "invalid third descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb || descB.nb != descB.mb) {
plasma_error("PLASMA_zgesv_incpiv_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Quick return */
/*
if (min(N, NRHS) == 0)
return PLASMA_SUCCESS;
*/
PLASMA_desc, descA,
PLASMA_desc, descL,
int*, IPIV,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_desc, descL,
int*, IPIV,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgesv_Tile_Async ( PLASMA_desc A,
int *  IPIV,
PLASMA_desc B,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zgesv_Tile_Async - Solves a system of linear equations using the tile LU factorization. Non-blocking equivalent of PLASMA_zgesv_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zgesv
PLASMA_zgesv_Tile
PLASMA_cgesv_Tile_Async
PLASMA_dgesv_Tile_Async
PLASMA_sgesv_Tile_Async
PLASMA_zcgesv_Tile_Async

Definition at line 236 of file zgesv.c.

References A, B, plasma_desc_t::mb, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), plasma_dynamic_call_3, plasma_dynamic_call_4, plasma_dynamic_call_5, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_9, plasma_pztrsm(), plasma_request_fail(), PLASMA_SUCCESS, PlasmaLeft, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaUnit, PlasmaUpper, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
PLASMA_desc descB = *B;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zgesv_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zgesv_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zgesv_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zgesv_Tile", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zgesv_Tile", "invalid third descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb || descB.nb != descB.mb) {
plasma_error("PLASMA_zgesv_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Quick return */
/*
if (min(N, NRHS) == 0)
return PLASMA_SUCCESS;
*/
plasma_pzbarrier_tl2pnl,
PLASMA_desc, descA,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
plasma_dynamic_call_4(plasma_pzgetrf_rectil,
PLASMA_desc, descA,
int*, IPIV,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* swap */
plasma_pzbarrier_tl2pnl,
PLASMA_desc, descB,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
plasma_pzlaswp,
PLASMA_desc, descB,
int *, IPIV,
int, 1,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgesvd_Tile_Async ( PLASMA_enum  jobu,
PLASMA_enum  jobvt,
PLASMA_desc A,
double *  S,
PLASMA_desc U,
PLASMA_desc VT,
PLASMA_desc T,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zgesvd_Tile_Async - computes the singular value decomposition (SVD) of a complex M-by-N matrix A, optionally computing the left and/or right singular vectors. Non-blocking equivalent of PLASMA_zgesvd_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zgesvd
PLASMA_zgesvd_Tile
PLASMA_cgesvd_Tile_Async
PLASMA_dgesvd_Tile_Async
PLASMA_sgesvd_Tile_Async

Definition at line 383 of file zgesvd.c.

References A, lapack_const, plasma_desc_t::m, plasma_desc_t::mb, min, plasma_desc_t::n, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), plasma_desc_submatrix(), plasma_dynamic_call_4, plasma_dynamic_call_5, plasma_dynamic_call_7, plasma_dynamic_sync, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_NOT_SUPPORTED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_pzgerbb(), plasma_request_fail(), plasma_shared_alloc(), plasma_shared_free(), PLASMA_SUCCESS, PlasmaLower, PlasmaNoVec, PlasmaRealDouble, PlasmaUpper, PlasmaVec, plasma_request_t::status, plasma_sequence_t::status, and T.

{
PLASMA_desc descA = *A;
PLASMA_desc descT = *T;
double *E;
int minMN = min(descA.m, descA.n);
int NCVT = 0;
int NRU = 0;
int NCC = 0;
plasma = plasma_context_self();
if (jobu != PlasmaNoVec && jobu !=PlasmaVec) {
plasma_error("PLASMA_zgesvd_Tile_Async", "illegal value of jobu");
}
if (jobvt != PlasmaNoVec && jobvt != PlasmaVec) {
plasma_error("PLASMA_zgesvd_Tile_Async", "illegal value of jobvt");
}
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zgesvd_Tile_Async", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zgesvd_Tile_Async", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zgesvd_Tile_Async", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zgesvd_Tile_Async", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if ((jobu != PlasmaNoVec) && (plasma_desc_check(U) != PLASMA_SUCCESS)) {
plasma_error("PLASMA_zgesvd_Tile_Async", "invalid second descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if ((jobvt != PlasmaNoVec) && (plasma_desc_check(VT) != PLASMA_SUCCESS) ) {
plasma_error("PLASMA_zgesvd_Tile_Async", "invalid third descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zgesvd_Tile_Async", "invalid fourth descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb) {
plasma_error("PLASMA_zgesvd_Tile_Async", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (( (jobu != PlasmaNoVec) && (U->nb != U->mb) ) || ( (jobvt != PlasmaNoVec) && (VT->nb != VT->mb) )) {
plasma_error("PLASMA_zgesvd_Tile_Async", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if ((jobu == PlasmaVec) || (jobvt == PlasmaVec) ){
plasma_error("PLASMA_zgesvd_Tile_Async", "computing the singular vectors is not supported in this version");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
E = (double *)plasma_shared_alloc(plasma, minMN-1, PlasmaRealDouble);
/*
* Reduction to bidiagonal form with a two-stage approach.
*/
/*
* 1: Reduction to BAND bidiagonal form
* May be further optimized using the algo described in Trefethen
*/
/* if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) { */
PLASMA_desc, descA,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* } */
/* else { */
/* plasma_dynamic_call_4(plasma_pzgerbbrh, */
/* PLASMA_desc, descA, */
/* PLASMA_desc, descT, */
/* PLASMA_sequence*, sequence, */
/* PLASMA_request*, request); */
/* } */
/* Build the U of the first stage */
/* if (jobu == PlasmaVec){ */
/* /\* Initialize U to Identity *\/ */
/* plasma_dynamic_call_6(plasma_pzlaset, */
/* PLASMA_enum, PlasmaUpperLower, */
/* PLASMA_Complex64_t, 0.0, */
/* PLASMA_Complex64_t, 1.0, */
/* PLASMA_desc, descU, */
/* PLASMA_sequence*, sequence, */
/* PLASMA_request*, request); */
/* /\* Accumulate the transformations from the first stage *\/ */
/* if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) { */
/* plasma_dynamic_call_6(plasma_pzungbr, */
/* PLASMA_enum, PlasmaLeft, */
/* PLASMA_desc, descA, */
/* PLASMA_desc, descU, */
/* PLASMA_desc, descT, */
/* PLASMA_sequence*, sequence, */
/* PLASMA_request*, request); */
/* } */
/* else { */
/* plasma_dynamic_call_6(plasma_pzungbrrh, */
/* PLASMA_enum, PlasmaLeft, */
/* PLASMA_desc, descA, */
/* PLASMA_desc, descU, */
/* PLASMA_desc, descT, */
/* PLASMA_sequence*, sequence, */
/* PLASMA_request*, request); */
/* } */
/* } */
/* Build the VT of the first stage */
/* if (jobvt == PlasmaVec){ */
/* /\* Initialize VT to Identity *\/ */
/* plasma_dynamic_call_6(plasma_pzlaset, */
/* PLASMA_enum, PlasmaUpperLower, */
/* PLASMA_Complex64_t, 0.0, */
/* PLASMA_Complex64_t, 1.0, */
/* PLASMA_desc, descVT, */
/* PLASMA_sequence*, sequence, */
/* PLASMA_request*, request); */
/* /\* Accumulate the transformations from the first stage *\/ */
/* if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) { */
/* plasma_dynamic_call_6(plasma_pzungbr, */
/* PLASMA_enum, PlasmaRight, */
/* PLASMA_desc, descA, */
/* PLASMA_desc, descVT, */
/* PLASMA_desc, descT, */
/* PLASMA_sequence*, sequence, */
/* PLASMA_request*, request); */
/* } */
/* else { */
/* plasma_dynamic_call_6(plasma_pzungbrrh, */
/* PLASMA_enum, PlasmaRight, */
/* PLASMA_desc, descA, */
/* PLASMA_desc, descVT, */
/* PLASMA_desc, descT, */
/* PLASMA_sequence*, sequence, */
/* PLASMA_request*, request); */
/* } */
/* } */
/*
* Set the V's to zero before the 2nd stage i.e., bulge chasing
*/
plasma_dynamic_call_5(plasma_pzlaset2,
PLASMA_desc, descA.m >= descA.n ? descA : plasma_desc_submatrix(descA, descA.mb, 0, descA.m-descA.mb, descA.n),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
plasma_dynamic_call_5(plasma_pzlaset2,
PLASMA_desc, descA.m >= descA.n ? plasma_desc_submatrix(descA, 0, descA.nb, descA.m, descA.n-descA.nb) : descA,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/*
* 2: Reduction from BAND bidiagonal to the final condensed form
*/
plasma_dynamic_call_7(plasma_pzgbrdb,
PLASMA_enum, descA.m >= descA.n ? PlasmaUpper : PlasmaLower,
PLASMA_desc, descA,
double*, S,
double*, E,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/*
* Compute the singular values ONLY for now
*/
if (descA.m >= descA.n)
LAPACKE_zbdsqr(
LAPACK_COL_MAJOR, lapack_const(PlasmaUpper),
minMN, NCVT, NRU, NCC,
S, E,
NULL, 1, NULL, 1, NULL, 1 );
else {
LAPACKE_zbdsqr(
LAPACK_COL_MAJOR, lapack_const(PlasmaLower),
minMN, NCVT, NRU, NCC,
S, E,
NULL, 1, NULL, 1, NULL, 1 );
}
plasma_shared_free(plasma, E);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgetrf_incpiv_Tile_Async ( PLASMA_desc A,
PLASMA_desc L,
int *  IPIV,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zgetrf_incpiv_Tile_Async - Computes the tile LU factorization of a matrix. Non-blocking equivalent of PLASMA_zgetrf_incpiv_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zgetrf_incpiv
PLASMA_zgetrf_incpiv_Tile
PLASMA_cgetrf_incpiv_Tile_Async
PLASMA_dgetrf_incpiv_Tile_Async
PLASMA_sgetrf_incpiv_Tile_Async
PLASMA_zgetrs_incpiv_Tile_Async

Definition at line 232 of file zgetrf_incpiv.c.

References A, L, plasma_desc_t::mb, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_5, plasma_pzgetrf_incpiv(), plasma_request_fail(), PLASMA_SUCCESS, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
PLASMA_desc descL = *L;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zgetrf_incpiv_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zgetrf_incpiv_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zgetrf_incpiv_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zgetrf_incpiv_Tile", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zgetrf_incpiv_Tile", "invalid second descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb) {
plasma_error("PLASMA_zgetrf_incpiv_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Quick return */
/*
if (min(M, N) == 0)
return PLASMA_SUCCESS;
*/
PLASMA_desc, descA,
PLASMA_desc, descL,
int*, IPIV,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgetrf_Tile_Async ( PLASMA_desc A,
int *  IPIV,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zgetrf_Tile_Async - Computes the tile LU factorization of a matrix. Non-blocking equivalent of PLASMA_zgetrf_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zgetrf
PLASMA_zgetrf_Tile
PLASMA_cgetrf_Tile_Async
PLASMA_dgetrf_Tile_Async
PLASMA_sgetrf_Tile_Async
PLASMA_zgetrs_Tile_Async

Definition at line 237 of file zgetrf.c.

References A, plasma_desc_t::mb, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), plasma_dynamic_call_3, plasma_dynamic_call_4, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_request_fail(), PLASMA_SUCCESS, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zgetrf_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zgetrf_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zgetrf_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zgetrf_Tile", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb) {
plasma_error("PLASMA_zgetrf_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Quick return */
/*
if (min(M, N) == 0)
return PLASMA_SUCCESS;
*/
plasma_pzbarrier_tl2pnl,
PLASMA_desc, descA,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
plasma_dynamic_call_4(plasma_pzgetrf_rectil,
PLASMA_desc, descA,
int*, IPIV,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
plasma_pzbarrier_pnl2tl,
PLASMA_desc, descA,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgetri_Tile_Async ( PLASMA_desc A,
int *  IPIV,
PLASMA_desc W,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zgetri_Tile_Async - Computes the inverse of a matrix using the LU factorization computed by PLASMA_zgetrf. This method inverts U and then computes inv(A) by solving the system inv(A)*L = inv(U) for inv(A). Non-blocking equivalent of PLASMA_zgetri_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zgetri
PLASMA_zgetri_Tile
PLASMA_cgetri_Tile_Async
PLASMA_dgetri_Tile_Async
PLASMA_sgetri_Tile_Async
PLASMA_zgetrf_Tile_Async

Definition at line 233 of file zgetri.c.

References A, plasma_desc_t::m, max, plasma_desc_t::mb, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), plasma_dynamic_call_3, plasma_dynamic_call_5, plasma_dynamic_call_9, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_pztrsmrv(), plasma_request_fail(), PLASMA_SUCCESS, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaRight, PlasmaUnit, PlasmaUpper, plasma_request_t::status, plasma_sequence_t::status, and W.

{
PLASMA_desc descA = *A;
PLASMA_desc descW = *W;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zgetri_Tile_Async", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zgetri_Tile_Async", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zgetri_Tile_Async", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zgetri_Tile_Async", "invalid descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb) {
plasma_error("PLASMA_zgetri_Tile_Async", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Quick return */
if (max(descA.m, 0) == 0)
plasma_dynamic_call_5(plasma_pztrtri,
PLASMA_desc, descA,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, descA,
PLASMA_desc, descW,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* No need for barrier tile2row because of previous dependencies */
/* swap */
plasma_pzlaswpc,
PLASMA_desc, descA,
int *, IPIV,
int, -1,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
plasma_pzbarrier_row2tl,
PLASMA_desc, descA,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgetrs_incpiv_Tile_Async ( PLASMA_desc A,
PLASMA_desc L,
int *  IPIV,
PLASMA_desc B,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zgetrs_incpiv_Tile_Async - Solves a system of linear equations using previously computed LU factorization. Non-blocking equivalent of PLASMA_zgetrs_incpiv_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zgetrs_incpiv
PLASMA_zgetrs_incpiv_Tile
PLASMA_cgetrs_incpiv_Tile_Async
PLASMA_dgetrs_incpiv_Tile_Async
PLASMA_sgetrs_incpiv_Tile_Async
PLASMA_zgetrf_incpiv_Tile_Async

Definition at line 255 of file zgetrs_incpiv.c.

References A, B, L, plasma_desc_t::mb, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_6, plasma_parallel_call_9, plasma_pztrsm(), plasma_pztrsmpl(), plasma_request_fail(), PLASMA_SUCCESS, PlasmaLeft, PlasmaNonUnit, PlasmaNoTrans, PlasmaUpper, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
PLASMA_desc descL = *L;
PLASMA_desc descB = *B;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zgetrs_incpiv_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zgetrs_incpiv_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zgetrs_incpiv_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zgetrs_incpiv_Tile", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zgetrs_incpiv_Tile", "invalid second descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zgetrs_incpiv_Tile", "invalid third descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb || descB.nb != descB.mb) {
plasma_error("PLASMA_zgetrs_incpiv_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Quick return */
/*
if (min(N, NRHS) == 0)
return PLASMA_SUCCESS;
*/
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_desc, descL,
int*, IPIV,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgetrs_Tile_Async ( PLASMA_enum  trans,
PLASMA_desc A,
int *  IPIV,
PLASMA_desc B,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zgetrs_Tile_Async - Solves a system of linear equations using previously computed LU factorization. Non-blocking equivalent of PLASMA_zgetrs_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zgetrs
PLASMA_zgetrs_Tile
PLASMA_cgetrs_Tile_Async
PLASMA_dgetrs_Tile_Async
PLASMA_sgetrs_Tile_Async
PLASMA_zgetrf_Tile_Async

Definition at line 248 of file zgetrs.c.

References A, B, plasma_desc_t::mb, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), plasma_dynamic_call_3, plasma_dynamic_call_5, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_9, plasma_pztrsm(), plasma_request_fail(), PLASMA_SUCCESS, PlasmaLeft, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaUnit, PlasmaUpper, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
PLASMA_desc descB = *B;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zgetrs_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zgetrs_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zgetrs_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zgetrs_Tile", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zgetrs_Tile", "invalid third descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb || descB.nb != descB.mb) {
plasma_error("PLASMA_zgetrs_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Quick return */
/*
if (min(N, NRHS) == 0)
return PLASMA_SUCCESS;
*/
if ( trans == PlasmaNoTrans )
{
plasma_pzbarrier_tl2pnl,
PLASMA_desc, descB,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* swap */
plasma_pzlaswp,
PLASMA_desc, descB,
int *, IPIV,
int, 1,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
else {
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
plasma_pzbarrier_tl2pnl,
PLASMA_desc, descB,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* swap */
plasma_pzlaswp,
PLASMA_desc, descB,
int *, IPIV,
int, -1,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
plasma_pzbarrier_pnl2tl,
PLASMA_desc, descB,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zheev_Tile_Async ( PLASMA_enum  jobz,
PLASMA_enum  uplo,
PLASMA_desc A,
double *  W,
PLASMA_desc T,
PLASMA_desc Q,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zheev_Tile_Async - Computes all eigenvalues and, optionally, eigenvectors of a complex Hermitian matrix A using a two-stage approach: First stage: reduction to band tridiagonal form; Second stage: reduction from band to tridiagonal form.

May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zheev
PLASMA_zheev_Tile
PLASMA_cheev_Tile_Async
PLASMA_dsyev_Tile_Async
PLASMA_ssyev_Tile_Async

Definition at line 326 of file zheev.c.

References A, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::n, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), plasma_desc_submatrix(), plasma_dynamic_call_5, plasma_dynamic_call_7, plasma_dynamic_sync, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_request_fail(), plasma_shared_alloc(), plasma_shared_free(), PLASMA_SUCCESS, PlasmaLower, PlasmaNoVec, PlasmaRealDouble, PlasmaUpper, PlasmaVec, plasma_request_t::status, plasma_sequence_t::status, and T.

{
PLASMA_desc descA = *A;
PLASMA_desc descT = *T;
double *E;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zheev_Tile_Async", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zheev_Tile_Async", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zheev_Tile_Async", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zheev_Tile_Async", "invalid descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zheev_Tile_Async", "invalid descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (jobz == PlasmaVec){
plasma_error("PLASMA_zheev_Tile_Async", "invalid descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
}
/* Check input arguments */
if (jobz != PlasmaNoVec && jobz != PlasmaVec) {
plasma_error("PLASMA_zheev_Tile_Async", "illegal value of jobz");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (uplo != PlasmaLower && uplo != PlasmaUpper) {
plasma_error("PLASMA_zheev_Tile_Async", "illegal value of uplo");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (descA.m != descA.n) {
plasma_error("PLASMA_zheev_Tile_Async", "matrix need to be square");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (descA.nb != descA.mb) {
plasma_error("PLASMA_zheev_Tile_Async", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (jobz == PlasmaVec) {
plasma_error("PLASMA_zheev_Tile_Async", "computing the eigenvectors is not supported in this version");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (jobz == PlasmaVec){
if (Q->nb != Q->mb) {
plasma_error("PLASMA_zheev_Tile_Async", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
}
E = (double *)plasma_shared_alloc(plasma, descA.n-1, PlasmaRealDouble);
/* Currently NOT equivalent to LAPACK's
*/
/* Reduction to tridiagonal form
* with a two-stage approach.
*/
/* Reduction to BAND tridiagonal form
*/
plasma_dynamic_call_5(plasma_pzherbt,
PLASMA_desc, descA,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/*
* Build the Q of the first stage
*/
/* if (jobz == PlasmaVec){ */
/* /\* Initialize Q to Identity *\/ */
/* plasma_dynamic_call_6(plasma_pzlaset, */
/* PLASMA_enum, PlasmaUpperLower, */
/* PLASMA_Complex64_t, 0.0, */
/* PLASMA_Complex64_t, 1.0, */
/* PLASMA_desc, descQ, */
/* PLASMA_sequence*, sequence, */
/* PLASMA_request*, request); */
/* /\* Accumulate the transformations from the first stage *\/ */
/* plasma_dynamic_call_6(plasma_pzungtr, */
/* PLASMA_enum, uplo, */
/* PLASMA_desc, descA, */
/* PLASMA_desc, descQ, */
/* PLASMA_desc, descT, */
/* PLASMA_sequence*, sequence, */
/* PLASMA_request*, request); */
/* } */
/* Set the V's to zero before the 2nd stage (bulge chasing) */
plasma_pzlaset2,
PLASMA_desc, uplo == PlasmaLower ? plasma_desc_submatrix(descA, descA.mb, 0, descA.m-descA.mb, descA.n-descA.nb) :
plasma_desc_submatrix(descA, 0, descA.nb, descA.m-descA.mb, descA.n-descA.nb),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* Reduction from BAND tridiagonal to the final condensed form
*/
plasma_dynamic_call_7(plasma_pzhbrdt,
PLASMA_desc, descA,
double*, W,
double*, E,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/*
* For eigenvalues only, call DSTERF.
* For eigenvectors, first call ZUNGTR to generate the unitary matrix,
* then call ZSTEQR.
*/
if (jobz == PlasmaNoVec){
LAPACKE_dsterf(descA.n, W, E);
}else {
LAPACKE_dsterf(descA.n, W, E);
/* Accumulate the transformations from the second stage */
/*
plasma_dynamic_call_6(plasma_pzungtr,
PLASMA_enum, uplo,
PLASMA_desc, descA,
PLASMA_desc, descQ,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
LAPACKE_zsteqr(jobz, descA.n, W, E, Q->mat, Q->lm);
*/
}
/* If matrix was scaled, then rescale eigenvalues appropriately.
*/
plasma_shared_free(plasma, E);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zheevd_Tile_Async ( PLASMA_enum  jobz,
PLASMA_enum  uplo,
PLASMA_desc A,
double *  W,
PLASMA_desc T,
PLASMA_desc Q,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zheevd_Tile_Async - Computes all eigenvalues and, optionally, eigenvectors of a complex Hermitian matrix A using a two-stage approach: First stage: reduction to band tridiagonal form; Second stage: reduction from band to tridiagonal form.

May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zheevd
PLASMA_zheevd_Tile
PLASMA_cheevd_Tile_Async
PLASMA_dsyevd_Tile_Async
PLASMA_ssyevd_Tile_Async

Definition at line 328 of file zheevd.c.

References A, lapack_const, plasma_desc_t::ln, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::n, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), plasma_desc_submatrix(), plasma_dynamic_call_5, plasma_dynamic_call_7, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), PLASMA_IB, PLASMA_NB, plasma_request_fail(), plasma_shared_alloc(), plasma_shared_free(), PLASMA_SUCCESS, PlasmaLower, PlasmaNoVec, PlasmaRealDouble, PlasmaVec, plasma_request_t::status, plasma_sequence_t::status, and T.

{
int NB, IB, IBNB, NT, INFO;
PLASMA_desc descA = *A;
PLASMA_desc descT = *T;
double *E;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zheevd_Tile_Async", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zheevd_Tile_Async", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zheevd_Tile_Async", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Set NT & NTRHS */
NB = PLASMA_NB;
IB = PLASMA_IB;
IBNB = IB*NB;
NT = (descA.ln%NB==0) ? (descA.ln/NB) : (descA.ln/NB+1);
/* Check descriptors for correctness */
plasma_error("PLASMA_zheevd_Tile_Async", "invalid descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zheevd_Tile_Async", "invalid descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (jobz == PlasmaVec){
plasma_error("PLASMA_zheevd_Tile_Async", "invalid descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
}
/* Check input arguments */
if (jobz != PlasmaNoVec && jobz != PlasmaVec) {
plasma_error("PLASMA_zheevd_Tile_Async", "illegal value of jobz");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (descA.m != descA.n) {
plasma_error("PLASMA_zheevd_Tile_Async", "matrix need to be square");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (descA.nb != descA.mb) {
plasma_error("PLASMA_zheevd_Tile_Async", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (jobz == PlasmaVec) {
plasma_error("PLASMA_zheevd_Tile_Async", "computing the eigenvectors is not supported in this version");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (jobz == PlasmaVec){
if (Q->nb != Q->mb) {
plasma_error("PLASMA_zheevd_Tile_Async", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
}
E = (double *)plasma_shared_alloc(plasma, descA.n-1, PlasmaRealDouble);
/* Currently NOT equivalent to LAPACK's
*/
/* Reduction to tridiagonal form
* with a two-stage approach.
*/
/* Reduction to BAND tridiagonal form
*/
plasma_dynamic_call_5(plasma_pzherbt,
PLASMA_desc, descA,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/*
* Build the Q of the first stage
*/
/* if (jobz == PlasmaVec){ */
/* /\* Initialize Q to Identity *\/ */
/* plasma_dynamic_call_6(plasma_pzlaset, */
/* PLASMA_enum, PlasmaUpperLower, */
/* PLASMA_Complex64_t, 0.0, */
/* PLASMA_Complex64_t, 1.0, */
/* PLASMA_desc, descQ, */
/* PLASMA_sequence*, sequence, */
/* PLASMA_request*, request); */
/* /\* Accumulate the transformations from the first stage *\/ */
/* plasma_dynamic_call_6(plasma_pzungtr, */
/* PLASMA_enum, uplo, */
/* PLASMA_desc, descA, */
/* PLASMA_desc, descQ, */
/* PLASMA_desc, descT, */
/* PLASMA_sequence*, sequence, */
/* PLASMA_request*, request); */
/* } */
/* Set the V's to zero before the 2nd stage (bulge chasing) */
plasma_pzlaset2,
PLASMA_desc, uplo == PlasmaLower ? plasma_desc_submatrix(descA, descA.mb, 0, descA.m-descA.mb, descA.n-descA.nb) :
plasma_desc_submatrix(descA, 0, descA.nb, descA.m-descA.mb, descA.n-descA.nb),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* Reduction from BAND tridiagonal to the final condensed form
*/
plasma_dynamic_call_7(plasma_pzhbrdt,
PLASMA_desc, descA,
double*, W,
double*, E,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/*
* For eigenvalues only, call DSTERF.
* For eigenvectors, first call ZUNGTR to generate the unitary matrix,
* then call ZSTEQR.
*/
if (jobz == PlasmaNoVec){
INFO = LAPACKE_zstedc( LAPACK_COL_MAJOR, lapack_const(PlasmaNoVec),
descA.n, W, E, NULL, 1);
}else {
INFO = LAPACKE_zstedc( LAPACK_COL_MAJOR, lapack_const(PlasmaNoVec),
descA.n, W, E, NULL, 1);
/* Accumulate the transformations from the second stage */
/*
plasma_dynamic_call_6(plasma_pzungtr,
PLASMA_enum, uplo,
PLASMA_desc, descA,
PLASMA_desc, descQ,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
LAPACKE_zsteqr(jobz, descA.n, W, E, Q->mat, Q->lm);
*/
}
/* If matrix was scaled, then rescale eigenvalues appropriately.
*/
plasma_shared_free(plasma, E);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zhegst_Tile_Async ( PLASMA_enum  itype,
PLASMA_enum  uplo,
PLASMA_desc A,
PLASMA_desc B,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zhegst_Tile_Async - reduces a complex Hermitian-definite generalized eigenproblem to standard form. If PlasmaItype == 1, the problem is A*x = lambda*B*x, and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) If PlasmaItype == 2 or 3, the problem is A*B*x = lambda*x or B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L. B must have been previously factorized as U**H*U or L*L**H by PLASMA_ZPOTRF. ONLY PlasmaItype == 1 and PlasmaLower supported! Non-blocking equivalent of PLASMA_zhegst_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zhegst
PLASMA_zhegst_Tile
PLASMA_chegst_Tile_Async
PLASMA_dsygst_Tile_Async
PLASMA_ssygst_Tile_Async
PLASMA_zhegv_Tile_Async

Definition at line 291 of file zhegst.c.

References A, B, plasma_desc_t::mb, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), plasma_dynamic_call_6, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_request_fail(), PLASMA_SUCCESS, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
PLASMA_desc descB = *B;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zhegst_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zhegst_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zhegst_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zhegst_Tile", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zhegst_Tile", "invalid second descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb) {
plasma_error("PLASMA_zhegst_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/*
* Transform Hermitian-definite generalized eigenproblem
* to standard form
*/
plasma_dynamic_call_6(plasma_pzhegst,
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zhegv_Tile_Async ( PLASMA_enum  itype,
PLASMA_enum  jobz,
PLASMA_enum  uplo,
PLASMA_desc A,
PLASMA_desc B,
double *  W,
PLASMA_desc T,
PLASMA_desc Q,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zhegv_Tile - Computes all eigenvalues and, optionally, eigenvectors of a complex generalized Hermitian-definite eigenproblem of the form: A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and B are assumed to be Hermitian and B is also positive definite.

Non-blocking equivalent of PLASMA_zhegv_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zhegv
PLASMA_zhegv_Tile
PLASMA_chegv_Tile_Async
PLASMA_dsygv_Tile_Async
PLASMA_ssygv_Tile_Async

Definition at line 424 of file zhegv.c.

References A, B, plasma_desc_t::ln, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::n, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), plasma_desc_submatrix(), plasma_dynamic_call_5, plasma_dynamic_call_6, plasma_dynamic_call_7, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_4, plasma_pzpotrf(), plasma_request_fail(), plasma_shared_alloc(), plasma_shared_free(), PLASMA_SUCCESS, PlasmaLower, PlasmaNoVec, PlasmaRealDouble, PlasmaUpper, PlasmaVec, plasma_request_t::status, plasma_sequence_t::status, and T.

{
int status;
PLASMA_desc descA = *A;
PLASMA_desc descB = *B;
PLASMA_desc descT = *T;
double *E;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zhegv_Tile_Async", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zhegv_Tile_Async", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zhegv_Tile_Async", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zhegv_Tile_Async", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zhegv_Tile_Async", "invalid second descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zhegv_Tile_Async", "invalid descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (jobz == PlasmaVec){
plasma_error("PLASMA_zhegv_Tile_Async", "invalid descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
}
/* Check input arguments */
if (itype != 1 && itype != 2 && itype != 3) {
plasma_error("PLASMA_zhegv_Tile_Async", "Illegal value of itype");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (jobz != PlasmaNoVec && jobz != PlasmaVec) {
plasma_error("PLASMA_zhegv_Tile_Async", "illegal value of jobz");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (uplo != PlasmaLower && uplo != PlasmaUpper) {
plasma_error("PLASMA_zheev_Tile_Async", "illegal value of uplo");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (descA.nb != descA.mb) {
plasma_error("PLASMA_zhegv_Tile_Async", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (jobz == PlasmaVec) {
plasma_error("PLASMA_zhegv_Tile_Async", "computing the eigenvectors is not supported in this version");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if ( (jobz == PlasmaVec) && (Q->nb != Q->mb) ) {
plasma_error("PLASMA_zhegv_Tile_Async", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
E = (double *)plasma_shared_alloc(plasma, descA.n-1, PlasmaRealDouble);
/* Currently NOT equivalent to LAPACK's
*/
PLASMA_desc, descB,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
status = sequence->status;
if (status != 0){
status = descA.ln + status;
return status;
}
/*
* Transform problem to standard eigenvalue problem and solve
*/
plasma_dynamic_call_6(plasma_pzhegst,
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* Reduction to tridiagonal form
* with a two-stage approach.
*/
/*
*Reduction to BAND tridiagonal form
*/
plasma_dynamic_call_5(plasma_pzherbt,
PLASMA_desc, descA,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/*
* Build the Q of the first stage
*/
/* if (jobz == PlasmaVec){ */
/* /\* Initialize Q to Identity *\/ */
/* plasma_dynamic_call_6(plasma_pzlaset, */
/* PLASMA_enum, PlasmaUpperLower, */
/* PLASMA_Complex64_t, 0.0, */
/* PLASMA_Complex64_t, 1.0, */
/* PLASMA_desc, descQ, */
/* PLASMA_sequence*, sequence, */
/* PLASMA_request*, request); */
/* /\* Accumulate the transformations from the first stage *\/ */
/* plasma_dynamic_call_6(plasma_pzungtr, */
/* PLASMA_enum, uplo, */
/* PLASMA_desc, descA, */
/* PLASMA_desc, descQ, */
/* PLASMA_desc, descT, */
/* PLASMA_sequence*, sequence, */
/* PLASMA_request*, request); */
/* } */
/* Set the V's to zero before the 2nd stage (bulge chasing) */
plasma_pzlaset2,
PLASMA_desc, uplo == PlasmaLower ? plasma_desc_submatrix(descA, descA.mb, 0, descA.m-descA.mb, descA.n-descA.nb) :
plasma_desc_submatrix(descA, 0, descA.nb, descA.m-descA.mb, descA.n-descA.nb),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/*
* Reduction from BAND tridiagonal to the final condensed form
*/
plasma_dynamic_call_7(plasma_pzhbrdt,
PLASMA_desc, descA,
double*, W,
double*, E,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* For eigenvalues only, call DSTERF.
* For eigenvectors, first call ZUNGTR to generate the unitary matrix,
* then call ZSTEQR.
*/
if (jobz == PlasmaNoVec)
status = LAPACKE_dsterf(descA.n, W, E);
else {
status = LAPACKE_dsterf(descA.n, W, E);
/* Accumulate the transformations from the second stage */
/*
plasma_dynamic_call_6(plasma_pzungtr,
PLASMA_enum, uplo,
PLASMA_desc, descA,
PLASMA_desc, descQ,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
LAPACKE_zsteqr(jobz, descA.n, W, E, descQ.mat, descQ.lm);
*/
}
/* If matrix was scaled, then rescale eigenvalues appropriately.
*/
plasma_shared_free(plasma, E);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zhemm_Tile_Async ( PLASMA_enum  side,
PLASMA_enum  uplo,
PLASMA_Complex64_t  alpha,
PLASMA_desc A,
PLASMA_desc B,
PLASMA_Complex64_t  beta,
PLASMA_desc C,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zhemm_Tile_Async - Performs Hermitian matrix multiplication. Non-blocking equivalent of PLASMA_zhemm_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zhemm
PLASMA_zhemm_Tile
PLASMA_chemm_Tile_Async
PLASMA_dhemm_Tile_Async
PLASMA_shemm_Tile_Async

Definition at line 303 of file zhemm.c.

References A, B, C, plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::n, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_9, plasma_pzhemm(), plasma_request_fail(), PLASMA_SUCCESS, PlasmaLeft, PlasmaLower, PlasmaRight, PlasmaUpper, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
PLASMA_desc descB = *B;
PLASMA_desc descC = *C;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zhemm_Tile_Async", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zhemm_Tile_Async", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zhemm_Tile_Async", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zhemm_Tile_Async", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zhemm_Tile_Async", "invalid second descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zhemm_Tile_Async", "invalid third descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if ( (side != PlasmaLeft) && (side != PlasmaRight) ){
plasma_error("PLASMA_zhemm_Tile_Async", "illegal value of side");
return plasma_request_fail(sequence, request, -1);
}
if ((uplo != PlasmaLower) && (uplo != PlasmaUpper)) {
plasma_error("PLASMA_zhemm_Tile_Async", "illegal value of uplo");
return plasma_request_fail(sequence, request, -2);
}
/* Check matrices sizes */
if ( (descB.m != descC.m) || (descB.n != descC.n) ) {
plasma_error("PLASMA_zhemm_Tile_Async", "B and C must have the same size");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if ( (descA.m != descA.n) ||
( (side == PlasmaLeft) && (descA.m != descB.m ) ) ||
( (side == PlasmaRight) && (descA.m != descB.n ) ) ) {
plasma_error("PLASMA_zhemm_Tile_Async", "Matrix A must be square of size M or N regarding side.");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check tiles sizes */
if ( (descB.mb != descC.mb) || (descB.nb != descC.nb) ) {
plasma_error("PLASMA_zhemm_Tile_Async", "B and C must have the same tile sizes");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if ( (descA.mb != descA.nb) ||
( (side == PlasmaLeft) && (descA.mb != descB.mb ) ) ||
( (side == PlasmaRight) && (descA.mb != descB.nb ) ) ) {
plasma_error("PLASMA_zhemm_Tile_Async", "Matrix A must be square with square tiles wich fits the reagding tile size of B and C");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check submatrix starting point */
/* if ( (descB.i != descC.i) || (descB.j != descC.j) ) { */
/* plasma_error("PLASMA_zhemm_Tile_Async", "B and C submatrices doesn't match"); */
/* return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); */
/* } */
/* if ( (descA.i != descA.j) || */
/* ( (side == PlasmaLeft) && (descA.i != descB.i ) ) || */
/* ( (side == PlasmaRight) && (descA.i != descB.j ) ) ) { */
/* plasma_error("PLASMA_zhemm_Tile_Async", "Submatrix A must start on diagnonal and match submatrices B and C."); */
/* return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); */
/* } */
if( (descA.i != 0) || (descA.j != 0) ||
(descB.i != 0) || (descB.j != 0) ||
(descC.i != 0) || (descC.j != 0) ) {
plasma_error("PLASMA_zhemm_Tile_Async", "Submatrices are not supported for now");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Quick return */
if (descC.m == 0 || descC.n == 0 ||
( (alpha == (PLASMA_Complex64_t)0.0) && (beta == (PLASMA_Complex64_t)1.0) ))
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_desc, descC,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zher2k_Tile_Async ( PLASMA_enum  uplo,
PLASMA_enum  trans,
PLASMA_Complex64_t  alpha,
PLASMA_desc A,
PLASMA_desc B,
double  beta,
PLASMA_desc C,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zher2k_Tile_Async - Performs Hermitian rank-k update. Non-blocking equivalent of PLASMA_zher2k_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zher2k
PLASMA_zher2k_Tile
PLASMA_cher2k_Tile_Async
PLASMA_dher2k_Tile_Async
PLASMA_sher2k_Tile_Async

Definition at line 299 of file zher2k.c.

References A, B, C, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::n, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_9, plasma_pzher2k(), plasma_request_fail(), PLASMA_SUCCESS, PlasmaConjTrans, PlasmaLower, PlasmaNoTrans, PlasmaUpper, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
PLASMA_desc descB = *B;
PLASMA_desc descC = *C;
int N, K;
int Am, An, Amb;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zher2k_Tile_Async", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zher2k_Tile_Async", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zher2k_Tile_Async", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zher2k_Tile_Async", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zher2k_Tile_Async", "invalid second descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zher2k_Tile_Async", "invalid third descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if ((uplo != PlasmaUpper) && (uplo != PlasmaLower)) {
plasma_error("PLASMA_zher2k", "illegal value of uplo");
return plasma_request_fail(sequence, request, -1);
}
plasma_error("PLASMA_zher2k", "illegal value of trans");
return plasma_request_fail(sequence, request, -2);
}
if ( trans == PlasmaNoTrans ) {
Am = descA.m;
An = descA.n;
Amb = descA.mb;
} else {
Am = descA.n;
An = descA.m;
Amb = descA.nb;
}
if (descC.mb != descC.nb) {
plasma_error("PLASMA_zher2k_Tile_Async", "only square tiles for C are supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if ( (descB.mb != descA.mb) || (descB.nb != descA.nb) || (Amb != descC.mb) ){
plasma_error("PLASMA_zher2k_Tile_Async", "tile sizes have to match");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (descC.m != descC.n) {
plasma_error("PLASMA_zher2k_Tile_Async", "only square matrix C is supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if ( (descB.m != descA.m) || (descB.n != descA.n) || (Am != descC.m) ){
plasma_error("PLASMA_zher2k_Tile_Async", "sizes of matrices have to match");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
N = descC.m;
K = An;
/* Quick return */
if ( N == 0 ||
((alpha == (PLASMA_Complex64_t)0.0 || K == 0) && beta == (double)1.0))
PLASMA_desc, descA,
PLASMA_desc, descB,
double, beta,
PLASMA_desc, descC,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zherk_Tile_Async ( PLASMA_enum  uplo,
PLASMA_enum  trans,
double  alpha,
PLASMA_desc A,
double  beta,
PLASMA_desc C,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zherk_Tile_Async - Performs Hermitian rank-k update. Non-blocking equivalent of PLASMA_zherk_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zherk
PLASMA_zherk_Tile
PLASMA_cherk_Tile_Async
PLASMA_dherk_Tile_Async
PLASMA_sherk_Tile_Async

Definition at line 276 of file zherk.c.

References A, C, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::n, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_8, plasma_pzherk(), plasma_request_fail(), PLASMA_SUCCESS, PlasmaConjTrans, PlasmaLower, PlasmaNoTrans, PlasmaUpper, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
PLASMA_desc descC = *C;
int N, K;
int Am, An, Amb;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zherk_Tile_Async", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zherk_Tile_Async", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zherk_Tile_Async", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zherk_Tile_Async", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zherk_Tile_Async", "invalid third descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if ((uplo != PlasmaUpper) && (uplo != PlasmaLower)) {
plasma_error("PLASMA_zherk", "illegal value of uplo");
return plasma_request_fail(sequence, request, -1);
}
plasma_error("PLASMA_zherk", "illegal value of transA");
return plasma_request_fail(sequence, request, -2);
}
if ( trans == PlasmaNoTrans ) {
Am = descA.m;
An = descA.n;
Amb = descA.mb;
} else {
Am = descA.n;
An = descA.m;
Amb = descA.nb;
}
if (descC.mb != descC.nb) {
plasma_error("PLASMA_zherk_Tile_Async", "only square tiles are supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (Amb != descC.mb) {
plasma_error("PLASMA_zherk_Tile_Async", "tile sizes have to match");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (descC.m != descC.n) {
plasma_error("PLASMA_zherk_Tile_Async", "only square matrix C is supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (Am != descC.m) {
plasma_error("PLASMA_zherk_Tile_Async", "sizes of matrices have to match");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
N = descC.m;
K = An;
/* Quick return */
if ( N == 0 ||
((alpha == (double)0.0 || K == 0) && beta == (double)1.0))
double, alpha,
PLASMA_desc, descA,
double, beta,
PLASMA_desc, descC,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zhetrd_Tile_Async ( PLASMA_enum  jobz,
PLASMA_enum  uplo,
PLASMA_desc A,
double *  D,
double *  E,
PLASMA_desc T,
PLASMA_desc Q,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zhetrd_Tile_Async - Computes all eigenvalues and, optionally, eigenvectors of a complex Hermitian matrix A using a two-stage approach: First stage: reduction to band tridiagonal form; Second stage: reduction from band to tridiagonal form.

May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zhetrd
PLASMA_zhetrd_Tile
PLASMA_chetrd_Tile_Async
PLASMA_dsytrd_Tile_Async
PLASMA_ssytrd_Tile_Async

Definition at line 331 of file zhetrd.c.

References A, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::n, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), plasma_desc_submatrix(), plasma_dynamic_call_5, plasma_dynamic_call_7, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_request_fail(), PLASMA_SUCCESS, PlasmaLower, PlasmaNoVec, PlasmaVec, plasma_request_t::status, plasma_sequence_t::status, and T.

{
PLASMA_desc descA = *A;
PLASMA_desc descT = *T;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zhetrd_Tile_Async", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zhetrd_Tile_Async", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zhetrd_Tile_Async", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zhetrd_Tile_Async", "invalid descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zhetrd_Tile_Async", "invalid descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if ( (jobz == PlasmaVec) && (plasma_desc_check(Q) != PLASMA_SUCCESS) ) {
plasma_error("PLASMA_zhetrd_Tile_Async", "invalid descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (jobz != PlasmaNoVec && jobz != PlasmaVec) {
plasma_error("PLASMA_zhetrd_Tile_Async", "illegal value of jobz");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (descA.m != descA.n) {
plasma_error("PLASMA_zhetrd_Tile_Async", "matrix need to be square");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (descA.nb != descA.mb) {
plasma_error("PLASMA_zhetrd_Tile_Async", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (jobz == PlasmaVec) {
plasma_error("PLASMA_zhetrd_Tile_Async", "computing the eigenvectors is not supported in this version");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if ( (jobz == PlasmaVec) && (Q->nb != Q->mb) ) {
plasma_error("PLASMA_zhetrd_Tile_Async", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Reduction to tridiagonal form
* with a two-stage approach.
*/
/* Reduction to BAND tridiagonal form
*/
plasma_dynamic_call_5(plasma_pzherbt,
PLASMA_desc, descA,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/*
* Build the Q of the first stage
*/
/* if (jobz == PlasmaVec){ */
/* /\* Initialize Q to Identity *\/ */
/* plasma_dynamic_call_6(plasma_pzlaset, */
/* PLASMA_enum, PlasmaUpperLower, */
/* PLASMA_Complex64_t, 0.0, */
/* PLASMA_Complex64_t, 1.0, */
/* PLASMA_desc, descQ, */
/* PLASMA_sequence*, sequence, */
/* PLASMA_request*, request); */
/* /\* Accumulate the transformations from the first stage*\/ */
/* plasma_dynamic_call_6(plasma_pzungtr, */
/* PLASMA_enum, uplo, */
/* PLASMA_desc, descA, */
/* PLASMA_desc, descQ, */
/* PLASMA_desc, descT, */
/* PLASMA_sequence*, sequence, */
/* PLASMA_request*, request); */
/* } */
/* Set the V's to zero before the 2nd stage (bulge chasing) */
/*
*/
plasma_dynamic_call_5(plasma_pzlaset2,
PLASMA_desc, uplo == PlasmaLower ? plasma_desc_submatrix(descA, descA.mb, 0, descA.m-descA.mb, descA.n-descA.nb)
: plasma_desc_submatrix(descA, 0, descA.nb, descA.m-descA.mb, descA.n-descA.nb),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* Reduction from BAND tridiagonal to the final condensed form
*/
plasma_dynamic_call_7(plasma_pzhbrdt,
PLASMA_desc, descA,
double*, D,
double*, E,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zlacpy_Tile_Async ( PLASMA_enum  uplo,
PLASMA_desc A,
PLASMA_desc B,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zlacpy_Tile_Async - Non-blocking equivalent of PLASMA_zlacpy_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zlacpy
PLASMA_zlacpy_Tile
PLASMA_clacpy_Tile_Async
PLASMA_dlacpy_Tile_Async
PLASMA_slacpy_Tile_Async

Definition at line 227 of file zlacpy.c.

References A, B, plasma_desc_t::m, plasma_desc_t::mb, min, plasma_desc_t::n, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_5, plasma_pzlacpy(), plasma_request_fail(), PLASMA_SUCCESS, PlasmaLower, PlasmaUpper, PlasmaUpperLower, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
PLASMA_desc descB = *B;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zlacpy_Tile_Async", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zlacpy_Tile_Async", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zlacpy_Tile_Async", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zlacpy_Tile_Async", "invalid descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb) {
plasma_error("PLASMA_zlacpy_Tile_Async", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if ( (uplo != PlasmaUpperLower) &&
(uplo != PlasmaUpper) &&
(uplo != PlasmaLower) ) {
plasma_error("PLASMA_zlacpy_Tile_Async", "illegal value of uplo");
return -1;
}
/* Quick return */
if (min(descA.m, descA.n) == 0) {
}
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zlange_Tile_Async ( PLASMA_enum  norm,
PLASMA_desc A,
double *  work,
double *  value,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zlange_Tile_Async - Non-blocking equivalent of PLASMA_zlange_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zlange
PLASMA_zlange_Tile
PLASMA_clange_Tile_Async
PLASMA_dlange_Tile_Async
PLASMA_slange_Tile_Async

Definition at line 238 of file zlange.c.

References A, plasma_desc_t::m, plasma_desc_t::mb, min, plasma_desc_t::n, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_6, plasma_pzlange(), plasma_request_fail(), PLASMA_SUCCESS, PlasmaFrobeniusNorm, PlasmaInfNorm, PlasmaMaxNorm, PlasmaOneNorm, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zlange_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zlange_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zlange_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zlange_Tile", "invalid descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb) {
plasma_error("PLASMA_zlange_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zlange", "illegal value of norm");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Quick return */
if (min(descA.m, descA.n) == 0) {
*value = 0.0;
}
PLASMA_desc, descA,
double*, work,
double*, value,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zlanhe_Tile_Async ( PLASMA_enum  norm,
PLASMA_enum  uplo,
PLASMA_desc A,
double *  work,
double *  value,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zlanhe_Tile_Async - Non-blocking equivalent of PLASMA_zlanhe_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zlanhe
PLASMA_zlanhe_Tile
PLASMA_clanhe_Tile_Async
PLASMA_dlanhe_Tile_Async
PLASMA_slanhe_Tile_Async

Definition at line 240 of file zlanhe.c.

References A, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_7, plasma_pzlanhe(), plasma_request_fail(), PLASMA_SUCCESS, PlasmaFrobeniusNorm, PlasmaInfNorm, PlasmaLower, PlasmaMaxNorm, PlasmaOneNorm, PlasmaUpper, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zlanhe_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zlanhe_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zlanhe_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zlanhe_Tile", "invalid descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb) {
plasma_error("PLASMA_zlanhe_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zlanhe_Tile", "illegal value of norm");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if ( (uplo != PlasmaUpper) && (uplo != PlasmaLower) ) {
plasma_error("PLASMA_zlanhe_Tile", "illegal value of uplo");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Quick return */
if ( descA.m == 0) {
*value = 0.0;
}
PLASMA_desc, descA,
double*, work,
double*, value,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zlansy_Tile_Async ( PLASMA_enum  norm,
PLASMA_enum  uplo,
PLASMA_desc A,
double *  work,
double *  value,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zlansy_Tile_Async - Non-blocking equivalent of PLASMA_zlansy_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zlansy
PLASMA_zlansy_Tile
PLASMA_clansy_Tile_Async
PLASMA_dlansy_Tile_Async
PLASMA_slansy_Tile_Async

Definition at line 240 of file zlansy.c.

References A, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_7, plasma_pzlansy(), plasma_request_fail(), PLASMA_SUCCESS, PlasmaFrobeniusNorm, PlasmaInfNorm, PlasmaLower, PlasmaMaxNorm, PlasmaOneNorm, PlasmaUpper, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zlansy_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zlansy_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zlansy_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zlansy_Tile", "invalid descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb) {
plasma_error("PLASMA_zlansy_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zlansy_Tile", "illegal value of norm");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if ( (uplo != PlasmaUpper) && (uplo != PlasmaLower) ) {
plasma_error("PLASMA_zlansy_Tile", "illegal value of uplo");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Quick return */
if ( descA.m == 0) {
*value = 0.0;
}
PLASMA_desc, descA,
double*, work,
double*, value,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zLapack_to_Tile_Async ( PLASMA_Complex64_t Af77,
int  LDA,
PLASMA_desc A,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zLapack_to_Tile_Async - Conversion from LAPACK layout to tile layout. Non-blocking equivalent of PLASMA_zLapack_to_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]Af77LAPACK matrix.
[in]LDAThe leading dimension of the matrix Af77.
[in,out]ADescriptor of the PLASMA matrix in tile layout. If PLASMA_TRANSLATION_MODE is set to PLASMA_INPLACE, A->mat is not used and set to Af77 when returns, else if PLASMA_TRANSLATION_MODE is set to PLASMA_OUTOFPLACE, A->mat has to be allocated before.
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zTile_to_Lapack_Async
PLASMA_zLapack_to_Tile
PLASMA_cLapack_to_Tile_Async
PLASMA_dLapack_to_Tile_Async
PLASMA_sLapack_to_Tile_Async

Definition at line 128 of file ztile.c.

References A, plasma_context_self(), plasma_desc_check(), PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_5, plasma_pzlapack_to_tile(), and PLASMA_SUCCESS.

{
PLASMA_desc descA = *A;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zLapack_to_Tile", "PLASMA not initialized");
}
/* Check descriptor for correctness */
plasma_error("PLASMA_zLapack_to_Tile", "invalid descriptor");
}
int, LDA,
PLASMA_desc, descA,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

int PLASMA_zlaset_Tile_Async ( PLASMA_enum  uplo,
PLASMA_Complex64_t  alpha,
PLASMA_Complex64_t  beta,
PLASMA_desc A,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zlaset_Tile_Async - Non-blocking equivalent of PLASMA_zlaset_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zlaset
PLASMA_zlaset_Tile
PLASMA_claset_Tile_Async
PLASMA_dlaset_Tile_Async
PLASMA_slaset_Tile_Async

Definition at line 218 of file zlaset.c.

References A, plasma_desc_t::m, plasma_desc_t::mb, min, plasma_desc_t::n, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), plasma_dynamic_call_6, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_request_fail(), PLASMA_SUCCESS, PlasmaLower, PlasmaUpper, PlasmaUpperLower, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zlaset_Tile_Async", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zlaset_Tile_Async", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zlaset_Tile_Async", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zlaset_Tile_Async", "invalid descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb) {
plasma_error("PLASMA_zlaset_Tile_Async", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if ( (uplo != PlasmaUpperLower) &&
(uplo != PlasmaUpper) &&
(uplo != PlasmaLower) ) {
plasma_error("PLASMA_zlaset_Tile_Async", "illegal value of uplo");
return -1;
}
/* Quick return */
if (min(descA.m, descA.n) == 0) {
}
plasma_dynamic_call_6(plasma_pzlaset,
PLASMA_desc, descA,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zlaswp_Tile_Async ( PLASMA_desc A,
int  K1,
int  K2,
int *  IPIV,
int  INCX,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zlaswp_Tile_Async - performs a series of row interchanges on the matrix A. One row interchange is initiated for each of rows K1 through K2 of A. Non-blocking equivalent of PLASMA_zlaswp_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zlaswp
PLASMA_zlaswp_Tile
PLASMA_claswp_Tile_Async
PLASMA_dlaswp_Tile_Async
PLASMA_slaswp_Tile_Async
PLASMA_zgetrf_Tile_Async

Definition at line 226 of file zlaswp.c.

References A, plasma_desc_t::m, plasma_context_self(), plasma_desc_check(), plasma_dynamic_call_3, plasma_dynamic_call_5, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_request_fail(), PLASMA_SUCCESS, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zlaswp_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zlaswp_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zlaswp_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zlaswp_Tile", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if ( (K1 != 1) || (K2 != descA.m) ) {
plasma_error("PLASMA_zlaswp_Tile", "invalid K1 or K2 (1..M is the only interval supported right now)");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_pzbarrier_tl2pnl,
PLASMA_desc, descA,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* swap */
plasma_pzlaswp,
PLASMA_desc, descA,
int *, IPIV,
int, INCX,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
plasma_pzbarrier_pnl2tl,
PLASMA_desc, descA,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zlaswpc_Tile_Async ( PLASMA_desc A,
int  K1,
int  K2,
int *  IPIV,
int  INCX,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zlaswpc_Tile_Async - performs a series of row interchanges on the matrix A. One row interchange is initiated for each of rows K1 through K2 of A. Non-blocking equivalent of PLASMA_zlaswpc_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zlaswpc
PLASMA_zlaswpc_Tile
PLASMA_claswpc_Tile_Async
PLASMA_dlaswpc_Tile_Async
PLASMA_slaswpc_Tile_Async
PLASMA_zgetrf_Tile_Async

Definition at line 226 of file zlaswpc.c.

References A, plasma_desc_t::m, plasma_context_self(), plasma_desc_check(), plasma_dynamic_call_3, plasma_dynamic_call_5, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_request_fail(), PLASMA_SUCCESS, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zlaswpc_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zlaswpc_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zlaswpc_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zlaswpc_Tile", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if ( (K1 != 1) || (K2 != descA.m) ) {
plasma_error("PLASMA_zlaswpc_Tile", "invalid K1 or K2 (1..M is the only interval supported right now)");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_pzbarrier_tl2row,
PLASMA_desc, descA,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* swap */
plasma_pzlaswpc,
PLASMA_desc, descA,
int *, IPIV,
int, INCX,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
plasma_pzbarrier_row2tl,
PLASMA_desc, descA,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zlauum_Tile_Async ( PLASMA_enum  uplo,
PLASMA_desc A,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zlauum_Tile_Async - Computes the product U * U' or L' * L, where the triangular factor U or L is stored in the upper or lower triangular part of the array A. Non-blocking equivalent of PLASMA_zlauum_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zlauum
PLASMA_zlauum_Tile
PLASMA_clauum_Tile_Async
PLASMA_dlauum_Tile_Async
PLASMA_slauum_Tile_Async
PLASMA_zpotri_Tile_Async

Definition at line 222 of file zlauum.c.

References A, plasma_desc_t::mb, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), plasma_dynamic_call_4, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_request_fail(), PLASMA_SUCCESS, PlasmaLower, PlasmaUpper, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zlauum_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zlauum_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zlauum_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zlauum_Tile", "invalid descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb) {
plasma_error("PLASMA_zlauum_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (uplo != PlasmaUpper && uplo != PlasmaLower) {
plasma_error("PLASMA_zlauum_Tile", "illegal value of uplo");
return plasma_request_fail(sequence, request, -1);
}
/* Quick return */
/*
if (max(N, 0) == 0)
return PLASMA_SUCCESS;
*/
plasma_dynamic_call_4(plasma_pzlauum,
PLASMA_desc, descA,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zplghe_Tile_Async ( double  bump,
PLASMA_desc A,
unsigned long long int  seed,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zplghe_Tile_Async - Generate a random hermitian matrix by tiles. Non-blocking equivalent of PLASMA_zplghe_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zplghe
PLASMA_zplghe_Tile
PLASMA_cplghe_Tile_Async
PLASMA_dplghe_Tile_Async
PLASMA_splghe_Tile_Async
PLASMA_zplghe_Tile_Async
PLASMA_zplgsy_Tile_Async

Definition at line 203 of file zplghe.c.

References A, plasma_desc_t::m, plasma_desc_t::mb, min, plasma_desc_t::n, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_5, plasma_pzplghe(), plasma_request_fail(), PLASMA_SUCCESS, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zplghe_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zplghe_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zplghe_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zplghe_Tile", "invalid descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb) {
plasma_error("PLASMA_zplghe_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Quick return */
if (min( descA.m, descA.n ) == 0)
double, bump,
PLASMA_desc, descA,
unsigned long long int, seed,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zplgsy_Tile_Async ( PLASMA_Complex64_t  bump,
PLASMA_desc A,
unsigned long long int  seed,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zplgsy_Tile_Async - Generate a random hermitian matrix by tiles. Non-blocking equivalent of PLASMA_zplgsy_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zplgsy
PLASMA_zplgsy_Tile
PLASMA_cplgsy_Tile_Async
PLASMA_dplgsy_Tile_Async
PLASMA_splgsy_Tile_Async
PLASMA_zplgsy_Tile_Async
PLASMA_zplgsy_Tile_Async

Definition at line 203 of file zplgsy.c.

References A, plasma_desc_t::m, plasma_desc_t::mb, min, plasma_desc_t::n, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_5, plasma_pzplgsy(), plasma_request_fail(), PLASMA_SUCCESS, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zplgsy_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zplgsy_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zplgsy_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zplgsy_Tile", "invalid descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb) {
plasma_error("PLASMA_zplgsy_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Quick return */
if (min( descA.m, descA.n ) == 0)
PLASMA_desc, descA,
unsigned long long int, seed,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zplrnt_Tile_Async ( PLASMA_desc A,
unsigned long long int  seed,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zplrnt_Tile_Async - Generate a random matrix by tiles. Non-blocking equivalent of PLASMA_zplrnt_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zplrnt
PLASMA_zplrnt_Tile
PLASMA_cplrnt_Tile_Async
PLASMA_dplrnt_Tile_Async
PLASMA_splrnt_Tile_Async
PLASMA_zplghe_Tile_Async
PLASMA_zplgsy_Tile_Async

Definition at line 201 of file zplrnt.c.

References A, plasma_desc_t::m, plasma_desc_t::mb, min, plasma_desc_t::n, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_4, plasma_pzplrnt(), plasma_request_fail(), PLASMA_SUCCESS, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zplrnt_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zplrnt_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zplrnt_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zplrnt_Tile", "invalid descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb) {
plasma_error("PLASMA_zplrnt_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Quick return */
if (min( descA.m, descA.n ) == 0)
PLASMA_desc, descA,
unsigned long long int, seed,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zposv_Tile_Async ( PLASMA_enum  uplo,
PLASMA_desc A,
PLASMA_desc B,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zposv_Tile_Async - Solves a symmetric positive definite or Hermitian positive definite system of linear equations using the Cholesky factorization. Non-blocking equivalent of PLASMA_zposv_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zposv
PLASMA_zposv_Tile
PLASMA_cposv_Tile_Async
PLASMA_dposv_Tile_Async
PLASMA_sposv_Tile_Async

Definition at line 260 of file zposv.c.

References A, B, plasma_desc_t::mb, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_4, plasma_parallel_call_9, plasma_pzpotrf(), plasma_pztrsm(), plasma_request_fail(), PLASMA_SUCCESS, PlasmaConjTrans, PlasmaLeft, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaUpper, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
PLASMA_desc descB = *B;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zposv_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zposv_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zposv_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zposv_Tile", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zposv_Tile", "invalid second descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb || descB.nb != descB.mb) {
plasma_error("PLASMA_zposv_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (uplo != PlasmaUpper && uplo != PlasmaLower) {
plasma_error("PLASMA_zposv_Tile", "illegal value of uplo");
return plasma_request_fail(sequence, request, -1);
}
/* Quick return - currently NOT equivalent to LAPACK's
* LAPACK does not have such check for DPOSV */
/*
if (min(N, NRHS) == 0)
return PLASMA_SUCCESS;
*/
PLASMA_desc, descA,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zpotrf_Tile_Async ( PLASMA_enum  uplo,
PLASMA_desc A,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zpotrf_Tile_Async - Computes the Cholesky factorization of a symmetric positive definite or Hermitian positive definite matrix. Non-blocking equivalent of PLASMA_zpotrf_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zpotrf
PLASMA_zpotrf_Tile
PLASMA_cpotrf_Tile_Async
PLASMA_dpotrf_Tile_Async
PLASMA_spotrf_Tile_Async
PLASMA_zpotrs_Tile_Async

Definition at line 232 of file zpotrf.c.

References A, plasma_desc_t::mb, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_4, plasma_pzpotrf(), plasma_request_fail(), PLASMA_SUCCESS, PlasmaLower, PlasmaUpper, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zpotrf_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zpotrf_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zpotrf_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zpotrf_Tile", "invalid descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb) {
plasma_error("PLASMA_zpotrf_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (uplo != PlasmaUpper && uplo != PlasmaLower) {
plasma_error("PLASMA_zpotrf_Tile", "illegal value of uplo");
return plasma_request_fail(sequence, request, -1);
}
/* Quick return */
/*
if (max(N, 0) == 0)
return PLASMA_SUCCESS;
*/
PLASMA_desc, descA,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zpotri_Tile_Async ( PLASMA_enum  uplo,
PLASMA_desc A,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zpotri_Tile_Async - Computes the inverse of a complex Hermitian positive definite matrix A using the Cholesky factorization A = U**H*U or A = L*L**H computed by PLASMA_zpotrf. Non-blocking equivalent of PLASMA_zpotri_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zpotri
PLASMA_zpotri_Tile
PLASMA_cpotri_Tile_Async
PLASMA_dpotri_Tile_Async
PLASMA_spotri_Tile_Async
PLASMA_zpotrf_Tile_Async

Definition at line 222 of file zpotri.c.

References A, plasma_desc_t::mb, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), plasma_dynamic_call_4, plasma_dynamic_call_5, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_request_fail(), PLASMA_SUCCESS, PlasmaLower, PlasmaNonUnit, PlasmaUpper, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zpotri_Tile_Async", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zpotri_Tile_Async", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zpotri_Tile_Async", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zpotri_Tile_Async", "invalid descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb) {
plasma_error("PLASMA_zpotri_Tile_Async", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (uplo != PlasmaUpper && uplo != PlasmaLower) {
plasma_error("PLASMA_zpotri_Tile_Async", "illegal value of uplo");
return plasma_request_fail(sequence, request, -1);
}
/* Quick return */
/*
if (max(N, 0) == 0)
return PLASMA_SUCCESS;
*/
plasma_dynamic_call_5(plasma_pztrtri,
PLASMA_desc, descA,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
plasma_dynamic_call_4(plasma_pzlauum,
PLASMA_desc, descA,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zpotrs_Tile_Async ( PLASMA_enum  uplo,
PLASMA_desc A,
PLASMA_desc B,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zpotrs_Tile_Async - Solves a system of linear equations using previously computed Cholesky factorization. Non-blocking equivalent of PLASMA_zpotrs_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zpotrs
PLASMA_zpotrs_Tile
PLASMA_cpotrs_Tile_Async
PLASMA_dpotrs_Tile_Async
PLASMA_spotrs_Tile_Async
PLASMA_zpotrf_Tile_Async

Definition at line 236 of file zpotrs.c.

References A, B, plasma_desc_t::mb, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_9, plasma_pztrsm(), plasma_request_fail(), PLASMA_SUCCESS, PlasmaConjTrans, PlasmaLeft, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaUpper, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
PLASMA_desc descB = *B;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zpotrs_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zpotrs_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zpotrs_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zpotrs_Tile", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zpotrs_Tile", "invalid second descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb || descB.nb != descB.mb) {
plasma_error("PLASMA_zpotrs_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (uplo != PlasmaUpper && uplo != PlasmaLower) {
plasma_error("PLASMA_zpotrs_Tile", "illegal value of uplo");
return plasma_request_fail(sequence, request, -1);
}
/* Quick return */
/*
if (min(N, NRHS) == 0)
return PLASMA_SUCCESS;
*/
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zsymm_Tile_Async ( PLASMA_enum  side,
PLASMA_enum  uplo,
PLASMA_Complex64_t  alpha,
PLASMA_desc A,
PLASMA_desc B,
PLASMA_Complex64_t  beta,
PLASMA_desc C,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zsymm_Tile_Async - Performs symmetric matrix multiplication. Non-blocking equivalent of PLASMA_zsymm_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zsymm
PLASMA_zsymm_Tile
PLASMA_csymm_Tile_Async
PLASMA_dsymm_Tile_Async
PLASMA_ssymm_Tile_Async

Definition at line 303 of file zsymm.c.

References A, B, C, plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::n, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_9, plasma_pzsymm(), plasma_request_fail(), PLASMA_SUCCESS, PlasmaLeft, PlasmaLower, PlasmaRight, PlasmaUpper, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
PLASMA_desc descB = *B;
PLASMA_desc descC = *C;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zsymm_Tile_Async", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zsymm_Tile_Async", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zsymm_Tile_Async", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zsymm_Tile_Async", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zsymm_Tile_Async", "invalid second descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zsymm_Tile_Async", "invalid third descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if ( (side != PlasmaLeft) && (side != PlasmaRight) ){
plasma_error("PLASMA_zsymm_Tile_Async", "illegal value of side");
return plasma_request_fail(sequence, request, -1);
}
if ((uplo != PlasmaLower) && (uplo != PlasmaUpper)) {
plasma_error("PLASMA_zsymm_Tile_Async", "illegal value of uplo");
return plasma_request_fail(sequence, request, -2);
}
/* Check matrices sizes */
if ( (descB.m != descC.m) || (descB.n != descC.n) ) {
plasma_error("PLASMA_zsymm_Tile_Async", "B and C must have the same size");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if ( (descA.m != descA.n) ||
( (side == PlasmaLeft) && (descA.m != descB.m ) ) ||
( (side == PlasmaRight) && (descA.m != descB.n ) ) ) {
plasma_error("PLASMA_zsymm_Tile_Async", "Matrix A must be square of size M or N regarding side.");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check tiles sizes */
if ( (descB.mb != descC.mb) || (descB.nb != descC.nb) ) {
plasma_error("PLASMA_zsymm_Tile_Async", "B and C must have the same tile sizes");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if ( (descA.mb != descA.nb) ||
( (side == PlasmaLeft) && (descA.mb != descB.mb ) ) ||
( (side == PlasmaRight) && (descA.mb != descB.nb ) ) ) {
plasma_error("PLASMA_zsymm_Tile_Async", "Matrix A must be square with square tiles wich fits the reagding tile size of B and C");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check submatrix starting point */
/* if ( (descB.i != descC.i) || (descB.j != descC.j) ) { */
/* plasma_error("PLASMA_zsymm_Tile_Async", "B and C submatrices doesn't match"); */
/* return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); */
/* } */
/* if ( (descA.i != descA.j) || */
/* ( (side == PlasmaLeft) && (descA.i != descB.i ) ) || */
/* ( (side == PlasmaRight) && (descA.i != descB.j ) ) ) { */
/* plasma_error("PLASMA_zsymm_Tile_Async", "Submatrix A must start on diagnonal and match submatrices B and C."); */
/* return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); */
/* } */
if( (descA.i != 0) || (descA.j != 0) ||
(descB.i != 0) || (descB.j != 0) ||
(descC.i != 0) || (descC.j != 0) ) {
plasma_error("PLASMA_zhemm_Tile_Async", "Submatrices are not supported for now");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Quick return */
if (descC.m == 0 || descC.n == 0 ||
( (alpha == (PLASMA_Complex64_t)0.0) && (beta == (PLASMA_Complex64_t)1.0) ))
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_desc, descC,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zsyr2k_Tile_Async ( PLASMA_enum  uplo,
PLASMA_enum  trans,
PLASMA_Complex64_t  alpha,
PLASMA_desc A,
PLASMA_desc B,
PLASMA_Complex64_t  beta,
PLASMA_desc C,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zsyr2k_Tile_Async - Performs symmetric rank-k update. Non-blocking equivalent of PLASMA_zsyr2k_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zsyr2k
PLASMA_zsyr2k_Tile
PLASMA_csyr2k_Tile_Async
PLASMA_dsyr2k_Tile_Async
PLASMA_ssyr2k_Tile_Async

Definition at line 299 of file zsyr2k.c.

References A, B, C, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::n, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_9, plasma_pzsyr2k(), plasma_request_fail(), PLASMA_SUCCESS, PlasmaLower, PlasmaNoTrans, PlasmaTrans, PlasmaUpper, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
PLASMA_desc descB = *B;
PLASMA_desc descC = *C;
int N, K;
int Am, An, Amb;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zsyr2k_Tile_Async", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zsyr2k_Tile_Async", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zsyr2k_Tile_Async", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zsyr2k_Tile_Async", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zsyr2k_Tile_Async", "invalid second descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zsyr2k_Tile_Async", "invalid third descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if ((uplo != PlasmaUpper) && (uplo != PlasmaLower)) {
plasma_error("PLASMA_zsyr2k", "illegal value of uplo");
return plasma_request_fail(sequence, request, -1);
}
if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans)) {
plasma_error("PLASMA_zsyr2k", "illegal value of trans");
return plasma_request_fail(sequence, request, -2);
}
if ( trans == PlasmaNoTrans ) {
Am = descA.m;
An = descA.n;
Amb = descA.mb;
} else {
Am = descA.n;
An = descA.m;
Amb = descA.nb;
}
if (descC.mb != descC.nb) {
plasma_error("PLASMA_zsyr2k_Tile_Async", "only square tiles for C are supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if ( (descB.mb != descA.mb) || (descB.nb != descA.nb) || (Amb != descC.mb) ){
plasma_error("PLASMA_zsyr2k_Tile_Async", "tile sizes have to match");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (descC.m != descC.n) {
plasma_error("PLASMA_zsyr2k_Tile_Async", "only square matrix C is supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if ( (descB.m != descA.m) || (descB.n != descA.n) || (Am != descC.m) ){
plasma_error("PLASMA_zsyr2k_Tile_Async", "sizes of matrices have to match");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
N = descC.m;
K = An;
/* Quick return */
if ( N == 0 ||
((alpha == (PLASMA_Complex64_t)0.0 || K == 0) && beta == (PLASMA_Complex64_t)1.0))
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_desc, descC,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zsyrk_Tile_Async ( PLASMA_enum  uplo,
PLASMA_enum  trans,
PLASMA_Complex64_t  alpha,
PLASMA_desc A,
PLASMA_Complex64_t  beta,
PLASMA_desc C,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zsyrk_Tile_Async - Performs rank-k update. Non-blocking equivalent of PLASMA_zsyrk_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zsyrk
PLASMA_zsyrk_Tile
PLASMA_csyrk_Tile_Async
PLASMA_dsyrk_Tile_Async
PLASMA_ssyrk_Tile_Async

Definition at line 276 of file zsyrk.c.

References A, C, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::n, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_8, plasma_pzsyrk(), plasma_request_fail(), PLASMA_SUCCESS, PlasmaLower, PlasmaNoTrans, PlasmaTrans, PlasmaUpper, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
PLASMA_desc descC = *C;
int N, K;
int Am, An, Amb;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zsyrk_Tile_Async", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zsyrk_Tile_Async", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zsyrk_Tile_Async", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zsyrk_Tile_Async", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zsyrk_Tile_Async", "invalid third descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if ((uplo != PlasmaUpper) && (uplo != PlasmaLower)) {
plasma_error("PLASMA_zsyrk", "illegal value of uplo");
return plasma_request_fail(sequence, request, -1);
}
if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans)) {
plasma_error("PLASMA_zsyrk", "illegal value of transA");
return plasma_request_fail(sequence, request, -2);
}
if ( trans == PlasmaNoTrans ) {
Am = descA.m;
An = descA.n;
Amb = descA.mb;
} else {
Am = descA.n;
An = descA.m;
Amb = descA.nb;
}
if (descC.mb != descC.nb) {
plasma_error("PLASMA_zsyrk_Tile_Async", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (Amb != descC.mb) {
plasma_error("PLASMA_zsyrk_Tile_Async", "tile sizes have to match");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (descC.m != descC.n) {
plasma_error("PLASMA_zsyrk_Tile_Async", "only square matrix C is supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (Am != descC.m) {
plasma_error("PLASMA_zsyrk_Tile_Async", "sizes of matrices have to match");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
N = descC.m;
K = An;
/* Quick return */
if ( N == 0 ||
((alpha == (PLASMA_Complex64_t)0.0 || K == 0) && beta == (PLASMA_Complex64_t)1.0))
PLASMA_desc, descA,
PLASMA_desc, descC,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zTile_to_Lapack_Async ( PLASMA_desc A,
PLASMA_Complex64_t Af77,
int  LDA,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_zTile_to_Lapack_Async - Conversion from LAPACK layout to tile layout. Non-blocking equivalent of PLASMA_zTile_to_Lapack(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]ADescriptor of the PLASMA matrix in tile layout.
[in,out]Af77LAPACK matrix. If PLASMA_TRANSLATION_MODE is set to PLASMA_INPLACE, Af77 has to be A->mat, else if PLASMA_TRANSLATION_MODE is set to PLASMA_OUTOFPLACE, Af77 has to be allocated before.
[in]LDAThe leading dimension of the matrix Af77.
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zLapack_to_Tile_Async
PLASMA_zTile_to_Lapack
PLASMA_cTile_to_Lapack_Async
PLASMA_dTile_to_Lapack_Async
PLASMA_sTile_to_Lapack_Async

Definition at line 264 of file ztile.c.

References A, plasma_context_self(), plasma_desc_check(), PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), plasma_pztile_to_lapack(), plasma_static_call_5, and PLASMA_SUCCESS.

{
PLASMA_desc descA = *A;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zTile_to_Lapack", "PLASMA not initialized");
}
/* Check descriptor for correctness */
plasma_error("PLASMA_zTile_to_Lapack", "invalid descriptor");
}
PLASMA_desc, descA,
int, LDA,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

int PLASMA_ztrmm_Tile_Async ( PLASMA_enum  side,
PLASMA_enum  uplo,
PLASMA_enum  transA,
PLASMA_enum  diag,
PLASMA_Complex64_t  alpha,
PLASMA_desc A,
PLASMA_desc B,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_ztrmm_Tile_Async - Performs triangular matrix multiplication. Non-blocking equivalent of PLASMA_ztrmm_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_ztrmm
PLASMA_ztrmm_Tile
PLASMA_ctrmm_Tile_Async
PLASMA_dtrmm_Tile_Async
PLASMA_strmm_Tile_Async

Definition at line 298 of file ztrmm.c.

References A, B, plasma_desc_t::mb, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), plasma_dynamic_call_9, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_pztrmm(), plasma_request_fail(), PLASMA_SUCCESS, PlasmaConjTrans, PlasmaLeft, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaRight, PlasmaTrans, PlasmaUnit, PlasmaUpper, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
PLASMA_desc descB = *B;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_ztrmm_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_ztrmm_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_ztrmm_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_ztrmm_Tile", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_ztrmm_Tile", "invalid second descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb || descB.nb != descB.mb) {
plasma_error("PLASMA_ztrmm_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (side != PlasmaLeft && side != PlasmaRight) {
plasma_error("PLASMA_ztrmm_Tile", "illegal value of side");
return plasma_request_fail(sequence, request, -1);
}
if (uplo != PlasmaUpper && uplo != PlasmaLower) {
plasma_error("PLASMA_ztrmm_Tile", "illegal value of uplo");
return plasma_request_fail(sequence, request, -2);
}
if (transA != PlasmaConjTrans && transA != PlasmaNoTrans && transA != PlasmaTrans) {
plasma_error("PLASMA_ztrmm_Tile", "illegal value of transA");
return plasma_request_fail(sequence, request, -3);
}
plasma_error("PLASMA_ztrmm_Tile", "illegal value of diag");
return plasma_request_fail(sequence, request, -4);
}
/* Quick return */
PLASMA_enum, transA,
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_ztrsm_Tile_Async ( PLASMA_enum  side,
PLASMA_enum  uplo,
PLASMA_enum  transA,
PLASMA_enum  diag,
PLASMA_Complex64_t  alpha,
PLASMA_desc A,
PLASMA_desc B,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_ztrsm_Tile_Async - Computes triangular solve. Non-blocking equivalent of PLASMA_ztrsm_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_ztrsm
PLASMA_ztrsm_Tile
PLASMA_ctrsm_Tile_Async
PLASMA_dtrsm_Tile_Async
PLASMA_strsm_Tile_Async

Definition at line 298 of file ztrsm.c.

References A, B, plasma_desc_t::mb, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_9, plasma_pztrsm(), plasma_request_fail(), PLASMA_SUCCESS, PlasmaConjTrans, PlasmaLeft, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaRight, PlasmaTrans, PlasmaUnit, PlasmaUpper, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
PLASMA_desc descB = *B;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_ztrsm_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_ztrsm_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_ztrsm_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_ztrsm_Tile", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_ztrsm_Tile", "invalid second descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb || descB.nb != descB.mb) {
plasma_error("PLASMA_ztrsm_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (side != PlasmaLeft && side != PlasmaRight) {
plasma_error("PLASMA_ztrsm_Tile", "illegal value of side");
return plasma_request_fail(sequence, request, -1);
}
if (uplo != PlasmaUpper && uplo != PlasmaLower) {
plasma_error("PLASMA_ztrsm_Tile", "illegal value of uplo");
return plasma_request_fail(sequence, request, -2);
}
if (transA != PlasmaConjTrans && transA != PlasmaNoTrans && transA != PlasmaTrans) {
plasma_error("PLASMA_ztrsm_Tile", "illegal value of transA");
return plasma_request_fail(sequence, request, -3);
}
plasma_error("PLASMA_ztrsm_Tile", "illegal value of diag");
return plasma_request_fail(sequence, request, -4);
}
/* Quick return */
PLASMA_enum, transA,
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_ztrsmpl_Tile_Async ( PLASMA_desc A,
PLASMA_desc L,
int *  IPIV,
PLASMA_desc B,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_ztrsmpl_Tile - Performs the forward substitution step of solving a system of linear equations after the tile LU factorization of the matrix. Non-blocking equivalent of PLASMA_ztrsmpl_Tile(). Returns control to the user thread before worker threads finish the computation to allow for pipelined execution of diferent routines.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_ztrsmpl
PLASMA_ztrsmpl_Tile
PLASMA_ctrsmpl_Tile_Async
PLASMA_dtrsmpl_Tile_Async
PLASMA_strsmpl_Tile_Async
PLASMA_zgetrf_incpiv_Tile_Async

Definition at line 240 of file ztrsmpl.c.

References A, B, L, plasma_desc_t::mb, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_6, plasma_pztrsmpl(), plasma_request_fail(), PLASMA_SUCCESS, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
PLASMA_desc descL = *L;
PLASMA_desc descB = *B;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_ztrsmpl_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_ztrsmpl_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_ztrsmpl_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_ztrsmpl_Tile", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_ztrsmpl_Tile", "invalid second descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_ztrsmpl_Tile", "invalid third descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb || descB.nb != descB.mb) {
plasma_error("PLASMA_ztrsmpl_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Quick return */
/*
if (min(N, NRHS) == 0)
return PLASMA_SUCCESS;
*/
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_desc, descL,
int*, IPIV,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_ztrsmrv_Tile_Async ( PLASMA_enum  side,
PLASMA_enum  uplo,
PLASMA_enum  transA,
PLASMA_enum  diag,
PLASMA_Complex64_t  alpha,
PLASMA_desc A,
PLASMA_desc B,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_ztrsmrv_Tile_Async - Computes triangular solve. Non-blocking equivalent of PLASMA_ztrsmrv_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_ztrsmrv
PLASMA_ztrsmrv_Tile
PLASMA_ctrsmrv_Tile_Async
PLASMA_dtrsmrv_Tile_Async
PLASMA_strsmrv_Tile_Async

Definition at line 298 of file ztrsmrv.c.

References A, B, plasma_desc_t::mb, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), plasma_dynamic_call_9, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_pztrsmrv(), plasma_request_fail(), PLASMA_SUCCESS, PlasmaConjTrans, PlasmaLeft, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaRight, PlasmaTrans, PlasmaUnit, PlasmaUpper, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
PLASMA_desc descB = *B;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_ztrsmrv_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_ztrsmrv_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_ztrsmrv_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_ztrsmrv_Tile", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_ztrsmrv_Tile", "invalid second descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb || descB.nb != descB.mb) {
plasma_error("PLASMA_ztrsmrv_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (side != PlasmaLeft && side != PlasmaRight) {
plasma_error("PLASMA_ztrsmrv_Tile", "illegal value of side");
return plasma_request_fail(sequence, request, -1);
}
if (uplo != PlasmaUpper && uplo != PlasmaLower) {
plasma_error("PLASMA_ztrsmrv_Tile", "illegal value of uplo");
return plasma_request_fail(sequence, request, -2);
}
if (transA != PlasmaConjTrans && transA != PlasmaNoTrans && transA != PlasmaTrans) {
plasma_error("PLASMA_ztrsmrv_Tile", "illegal value of transA");
return plasma_request_fail(sequence, request, -3);
}
plasma_error("PLASMA_ztrsmrv_Tile", "illegal value of diag");
return plasma_request_fail(sequence, request, -4);
}
if ( side != PlasmaRight ) {
plasma_error("PLASMA_ztrsmrv_Tile", "PlasmaLeft is not supported");
return plasma_request_fail(sequence, request, -1);
}
if ( uplo != PlasmaLower ) {
plasma_error("PLASMA_ztrsmrv_Tile", "PlasmaUpper is not supported");
return plasma_request_fail(sequence, request, -2);
}
if ( transA != PlasmaNoTrans ) {
plasma_error("PLASMA_ztrsmrv_Tile", "Only PlasmaNoTrans is supported");
return plasma_request_fail(sequence, request, -3);
}
if ( diag != PlasmaUnit ) {
plasma_error("PLASMA_ztrsmrv_Tile", "PlasmaNonUnit is not supported");
return plasma_request_fail(sequence, request, -4);
}
/* Quick return */
PLASMA_enum, transA,
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_ztrtri_Tile_Async ( PLASMA_enum  uplo,
PLASMA_enum  diag,
PLASMA_desc A,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_ztrtri_Tile_Async - Computes the inverse of a complex upper or lower triangular matrix A. Non-blocking equivalent of PLASMA_ztrtri_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_ztrtri
PLASMA_ztrtri_Tile
PLASMA_ctrtri_Tile_Async
PLASMA_dtrtri_Tile_Async
PLASMA_strtri_Tile_Async
PLASMA_zpotri_Tile_Async

Definition at line 240 of file ztrtri.c.

References A, plasma_desc_t::mb, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), plasma_dynamic_call_5, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), plasma_request_fail(), PLASMA_SUCCESS, PlasmaLower, PlasmaNonUnit, PlasmaUnit, PlasmaUpper, plasma_request_t::status, and plasma_sequence_t::status.

{
PLASMA_desc descA = *A;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_ztrtri_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_ztrtri_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_ztrtri_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_ztrtri_Tile", "invalid descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb) {
plasma_error("PLASMA_ztrtri_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (uplo != PlasmaUpper && uplo != PlasmaLower) {
plasma_error("PLASMA_ztrtri_Tile", "illegal value of uplo");
return plasma_request_fail(sequence, request, -1);
}
plasma_error("PLASMA_ztrtri_Tile", "illegal value of diag");
return plasma_request_fail(sequence, request, -2);
}
/* Quick return */
/*
if (max(N, 0) == 0)
return PLASMA_SUCCESS;
*/
plasma_dynamic_call_5(plasma_pztrtri,
PLASMA_desc, descA,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zunglq_Tile_Async ( PLASMA_desc A,
PLASMA_desc T,
PLASMA_desc Q,
PLASMA_sequence sequence,
PLASMA_request request 
)

Non-blocking equivalent of PLASMA_zunglq_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zunglq
PLASMA_zunglq_Tile
PLASMA_cunglq_Tile_Async
PLASMA_dorglq_Tile_Async
PLASMA_sorglq_Tile_Async
PLASMA_zgelqf_Tile_Async

Definition at line 249 of file zunglq.c.

References A, plasma_context_struct::householder, plasma_desc_t::mb, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), plasma_dynamic_call_5, plasma_dynamic_call_6, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, plasma_pzunglq(), plasma_request_fail(), PLASMA_RHBLK, PLASMA_SUCCESS, Q, plasma_request_t::status, plasma_sequence_t::status, and T.

{
PLASMA_desc descA = *A;
PLASMA_desc descT = *T;
PLASMA_desc descQ = *Q;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zunglq_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zunglq_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zunglq_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zunglq_Tile", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zunglq_Tile", "invalid second descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zunglq_Tile", "invalid third descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb || descQ.nb != descQ.mb) {
plasma_error("PLASMA_zunglq_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Quick return - currently NOT equivalent to LAPACK's:
* CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, Q, LDQ ) */
/*
if (min(M, N) == 0)
return PLASMA_SUCCESS;
*/
PLASMA_desc, descA,
PLASMA_desc, descQ,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
else {
plasma_dynamic_call_6(plasma_pzunglqrh,
PLASMA_desc, descA,
PLASMA_desc, descQ,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zungqr_Tile_Async ( PLASMA_desc A,
PLASMA_desc T,
PLASMA_desc Q,
PLASMA_sequence sequence,
PLASMA_request request 
)

Non-blocking equivalent of PLASMA_zungqr_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zungqr
PLASMA_zungqr_Tile
PLASMA_cungqr_Tile_Async
PLASMA_dorgqr_Tile_Async
PLASMA_sorgqr_Tile_Async
PLASMA_zgeqrf_Tile_Async

Definition at line 247 of file zungqr.c.

References A, plasma_context_struct::householder, plasma_desc_t::mb, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), plasma_dynamic_call_5, plasma_dynamic_call_6, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, plasma_pzungqr(), plasma_pzungqrrh(), plasma_request_fail(), PLASMA_RHBLK, PLASMA_SUCCESS, Q, plasma_request_t::status, plasma_sequence_t::status, and T.

{
PLASMA_desc descA = *A;
PLASMA_desc descT = *T;
PLASMA_desc descQ = *Q;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zungqr_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zungqr_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zungqr_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zungqr_Tile", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zungqr_Tile", "invalid second descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zungqr_Tile", "invalid third descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb || descQ.nb != descQ.mb) {
plasma_error("PLASMA_zungqr_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Quick return */
/*
if (N <= 0)
return PLASMA_SUCCESS;
*/
PLASMA_desc, descA,
PLASMA_desc, descQ,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
else {
PLASMA_desc, descA,
PLASMA_desc, descQ,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zunmlq_Tile_Async ( PLASMA_enum  side,
PLASMA_enum  trans,
PLASMA_desc A,
PLASMA_desc T,
PLASMA_desc B,
PLASMA_sequence sequence,
PLASMA_request request 
)

Non-blocking equivalent of PLASMA_zunmlq_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zunmlq
PLASMA_zunmlq_Tile
PLASMA_cunmlq_Tile_Async
PLASMA_dormlq_Tile_Async
PLASMA_sormlq_Tile_Async
PLASMA_zgelqf_Tile_Async

Definition at line 295 of file zunmlq.c.

References A, B, plasma_context_struct::householder, plasma_desc_t::mb, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), plasma_dynamic_call_7, plasma_dynamic_call_8, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, plasma_parallel_call_7, plasma_pzunmlq(), plasma_request_fail(), PLASMA_RHBLK, PLASMA_SUCCESS, PlasmaConjTrans, PlasmaLeft, PlasmaNoTrans, PlasmaRight, plasma_request_t::status, plasma_sequence_t::status, and T.

{
PLASMA_desc descA = *A;
PLASMA_desc descT = *T;
PLASMA_desc descB = *B;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zunmlq_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zunmlq_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zunmlq_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zunmlq_Tile", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zunmlq_Tile", "invalid second descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zunmlq_Tile", "invalid third descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb || descB.nb != descB.mb) {
plasma_error("PLASMA_zunmlq_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if ( (side != PlasmaLeft) && (side != PlasmaRight) ) {
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Quick return - currently NOT equivalent to LAPACK's:
* CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) */
/*
if (min(M, min(N, K)) == 0)
return PLASMA_SUCCESS;
*/
if ( (trans == PlasmaConjTrans) &&
(side == PlasmaLeft) ) {
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
} else {
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
}
else {
plasma_dynamic_call_8(plasma_pzunmlqrh,
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zunmqr_Tile_Async ( PLASMA_enum  side,
PLASMA_enum  trans,
PLASMA_desc A,
PLASMA_desc T,
PLASMA_desc B,
PLASMA_sequence sequence,
PLASMA_request request 
)

Non-blocking equivalent of PLASMA_zunmqr_Tile(). May return before the computation is finished. Allows for pipelining of operations ar runtime.

Parameters:
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_zunmqr
PLASMA_zunmqr_Tile
PLASMA_cunmqr_Tile_Async
PLASMA_dormqr_Tile_Async
PLASMA_sormqr_Tile_Async
PLASMA_zgeqrf_Tile_Async

Definition at line 298 of file zunmqr.c.

References A, B, plasma_context_struct::householder, plasma_desc_t::mb, plasma_desc_t::nb, plasma_context_self(), plasma_desc_check(), plasma_dynamic_call_7, plasma_dynamic_call_8, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_SEQUENCE_FLUSHED, PLASMA_ERR_UNALLOCATED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, plasma_parallel_call_7, plasma_pzunmqr(), plasma_request_fail(), PLASMA_RHBLK, PLASMA_SUCCESS, PlasmaConjTrans, PlasmaLeft, PlasmaNoTrans, PlasmaRight, plasma_request_t::status, plasma_sequence_t::status, and T.

{
PLASMA_desc descA = *A;
PLASMA_desc descT = *T;
PLASMA_desc descB = *B;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zunmqr_Tile", "PLASMA not initialized");
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zunmqr_Tile", "NULL sequence");
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zunmqr_Tile", "NULL request");
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
/* Check descriptors for correctness */
plasma_error("PLASMA_zunmqr_Tile", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zunmqr_Tile", "invalid second descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_error("PLASMA_zunmqr_Tile", "invalid third descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb || descB.nb != descB.mb) {
plasma_error("PLASMA_zunmqr_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if ((side != PlasmaLeft) && (side != PlasmaRight)) {
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Quick return - currently NOT equivalent to LAPACK's:
* CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) */
/*
if (min(M, min(N, K)) == 0)
return PLASMA_SUCCESS;
*/
if ( (trans == PlasmaConjTrans) &&
(side == PlasmaLeft) ) {
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
else {
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
}
else {
plasma_dynamic_call_8(plasma_pzunmqrrh,
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_desc, descT,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
}

Here is the call graph for this function:

Here is the caller graph for this function: