Functions
int	PLASMA_zcgels (PLASMA_enum trans, int M, int N, int NRHS, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t B, int LDB, PLASMA_Complex64_t X, int LDX, int ITER)
int	PLASMA_zcgesv (int N, int NRHS, PLASMA_Complex64_t A, int LDA, int IPIV, PLASMA_Complex64_t B, int LDB, PLASMA_Complex64_t X, int LDX, int *ITER)
int	PLASMA_zcposv (PLASMA_enum uplo, int N, int NRHS, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t B, int LDB, PLASMA_Complex64_t X, int LDX, int ITER)
int	PLASMA_zcungesv (PLASMA_enum trans, int N, int NRHS, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t B, int LDB, PLASMA_Complex64_t X, int LDX, int ITER)
int	PLASMA_zgebrd (PLASMA_enum jobu, PLASMA_enum jobvt, int M, int N, PLASMA_Complex64_t A, int LDA, double D, double E, PLASMA_Complex64_t U, int LDU, PLASMA_Complex64_t VT, int LDVT, PLASMA_desc descT)
int	PLASMA_zgelqf (int M, int N, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t T)
int	PLASMA_zgelqs (int M, int N, int NRHS, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t T, PLASMA_Complex64_t *B, int LDB)
int	PLASMA_zgels (PLASMA_enum trans, int M, int N, int NRHS, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t T, PLASMA_Complex64_t *B, int LDB)
int	PLASMA_zgemm (PLASMA_enum transA, PLASMA_enum transB, int M, int N, int K, PLASMA_Complex64_t alpha, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t B, int LDB, PLASMA_Complex64_t beta, PLASMA_Complex64_t *C, int LDC)
int	PLASMA_zgeqrf (int M, int N, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t T)
int	PLASMA_zgeqrs (int M, int N, int NRHS, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t T, PLASMA_Complex64_t *B, int LDB)
int	PLASMA_zgesv (int N, int NRHS, PLASMA_Complex64_t A, int LDA, int IPIV, PLASMA_Complex64_t *B, int LDB)
int	PLASMA_zgesv_incpiv (int N, int NRHS, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t L, int IPIV, PLASMA_Complex64_t B, int LDB)
int	PLASMA_zgesvd (PLASMA_enum jobu, PLASMA_enum jobvt, int M, int N, PLASMA_Complex64_t A, int LDA, double S, PLASMA_Complex64_t U, int LDU, PLASMA_Complex64_t VT, int LDVT, PLASMA_desc *descT)
int	PLASMA_zgetrf (int M, int N, PLASMA_Complex64_t A, int LDA, int IPIV)
int	PLASMA_zgetrf_incpiv (int M, int N, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t L, int *IPIV)
int	PLASMA_zgetri (int N, PLASMA_Complex64_t A, int LDA, int IPIV)
int	PLASMA_zgetrs (PLASMA_enum trans, int N, int NRHS, PLASMA_Complex64_t A, int LDA, int IPIV, PLASMA_Complex64_t *B, int LDB)
int	PLASMA_zgetrs_incpiv (PLASMA_enum trans, int N, int NRHS, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t L, int IPIV, PLASMA_Complex64_t B, int LDB)
int	PLASMA_zheev (PLASMA_enum jobz, PLASMA_enum uplo, int N, PLASMA_Complex64_t A, int LDA, double W, PLASMA_desc descT, PLASMA_Complex64_t Q, int LDQ)
int	PLASMA_zheevd (PLASMA_enum jobz, PLASMA_enum uplo, int N, PLASMA_Complex64_t A, int LDA, double W, PLASMA_desc T, PLASMA_Complex64_t Q, int LDQ)
int	PLASMA_zhegst (PLASMA_enum itype, PLASMA_enum uplo, int N, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t B, int LDB)
int	PLASMA_zhegv (PLASMA_enum itype, PLASMA_enum jobz, PLASMA_enum uplo, int N, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t B, int LDB, double W, PLASMA_desc descT, PLASMA_Complex64_t *Q, int LDQ)
int	PLASMA_zhemm (PLASMA_enum side, PLASMA_enum uplo, int M, int N, PLASMA_Complex64_t alpha, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t B, int LDB, PLASMA_Complex64_t beta, PLASMA_Complex64_t *C, int LDC)
int	PLASMA_zher2k (PLASMA_enum uplo, PLASMA_enum trans, int N, int K, PLASMA_Complex64_t alpha, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t B, int LDB, double beta, PLASMA_Complex64_t *C, int LDC)
int	PLASMA_zherk (PLASMA_enum uplo, PLASMA_enum trans, int N, int K, double alpha, PLASMA_Complex64_t A, int LDA, double beta, PLASMA_Complex64_t C, int LDC)
int	PLASMA_zhetrd (PLASMA_enum jobz, PLASMA_enum uplo, int N, PLASMA_Complex64_t A, int LDA, double D, double E, PLASMA_desc descT, PLASMA_Complex64_t *Q, int LDQ)
int	PLASMA_zlacpy (PLASMA_enum uplo, int M, int N, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t B, int LDB)
double	PLASMA_zlange (PLASMA_enum norm, int M, int N, PLASMA_Complex64_t A, int LDA, double work)
double	PLASMA_zlanhe (PLASMA_enum norm, PLASMA_enum uplo, int N, PLASMA_Complex64_t A, int LDA, double work)
double	PLASMA_zlansy (PLASMA_enum norm, PLASMA_enum uplo, int N, PLASMA_Complex64_t A, int LDA, double work)
int	PLASMA_zlaset (PLASMA_enum uplo, int M, int N, PLASMA_Complex64_t alpha, PLASMA_Complex64_t beta, PLASMA_Complex64_t *A, int LDA)
int	PLASMA_zlaswp (int N, PLASMA_Complex64_t A, int LDA, int K1, int K2, int IPIV, int INCX)
int	PLASMA_zlaswpc (int N, PLASMA_Complex64_t A, int LDA, int K1, int K2, int IPIV, int INCX)
int	PLASMA_zlauum (PLASMA_enum uplo, int N, PLASMA_Complex64_t *A, int LDA)
int	PLASMA_zplghe (double bump, int N, PLASMA_Complex64_t *A, int LDA, unsigned long long int seed)
int	PLASMA_zplgsy (PLASMA_Complex64_t bump, int N, PLASMA_Complex64_t *A, int LDA, unsigned long long int seed)
int	PLASMA_zplrnt (int M, int N, PLASMA_Complex64_t *A, int LDA, unsigned long long int seed)
int	PLASMA_zposv (PLASMA_enum uplo, int N, int NRHS, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t B, int LDB)
int	PLASMA_zpotrf (PLASMA_enum uplo, int N, PLASMA_Complex64_t *A, int LDA)
int	PLASMA_zpotri (PLASMA_enum uplo, int N, PLASMA_Complex64_t *A, int LDA)
int	PLASMA_zpotrs (PLASMA_enum uplo, int N, int NRHS, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t B, int LDB)
int	PLASMA_zsymm (PLASMA_enum side, PLASMA_enum uplo, int M, int N, PLASMA_Complex64_t alpha, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t B, int LDB, PLASMA_Complex64_t beta, PLASMA_Complex64_t *C, int LDC)
int	PLASMA_zsyr2k (PLASMA_enum uplo, PLASMA_enum trans, int N, int K, PLASMA_Complex64_t alpha, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t B, int LDB, PLASMA_Complex64_t beta, PLASMA_Complex64_t *C, int LDC)
int	PLASMA_zsyrk (PLASMA_enum uplo, PLASMA_enum trans, int N, int K, PLASMA_Complex64_t alpha, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t beta, PLASMA_Complex64_t C, int LDC)
int	PLASMA_ztrmm (PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum transA, PLASMA_enum diag, int N, int NRHS, PLASMA_Complex64_t alpha, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t B, int LDB)
int	PLASMA_ztrsm (PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum transA, PLASMA_enum diag, int N, int NRHS, PLASMA_Complex64_t alpha, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t B, int LDB)
int	PLASMA_ztrsmpl (int N, int NRHS, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t L, int IPIV, PLASMA_Complex64_t B, int LDB)
int	PLASMA_ztrsmrv (PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum transA, PLASMA_enum diag, int N, int NRHS, PLASMA_Complex64_t alpha, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t B, int LDB)
int	PLASMA_ztrtri (PLASMA_enum uplo, PLASMA_enum diag, int N, PLASMA_Complex64_t *A, int LDA)
int	PLASMA_zunglq (int M, int N, int K, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t T, PLASMA_Complex64_t *Q, int LDQ)
int	PLASMA_zungqr (int M, int N, int K, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t T, PLASMA_Complex64_t *Q, int LDQ)
int	PLASMA_zunmlq (PLASMA_enum side, PLASMA_enum trans, int M, int N, int K, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t T, PLASMA_Complex64_t *B, int LDB)
int	PLASMA_zunmqr (PLASMA_enum side, PLASMA_enum trans, int M, int N, int K, PLASMA_Complex64_t A, int LDA, PLASMA_Complex64_t T, PLASMA_Complex64_t *B, int LDB)
int	PLASMA_zLapack_to_Tile (PLASMA_Complex64_t Af77, int LDA, PLASMA_desc A)
int	PLASMA_zTile_to_Lapack (PLASMA_desc A, PLASMA_Complex64_t Af77, int LDA)

Detailed Description

This is the group of double complex functions using the simple user interface.

Function Documentation

int PLASMA_zcgels	(	PLASMA_enum	trans,
		int	M,
		int	N,
		int	NRHS,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t *	B,
		int	LDB,
		PLASMA_Complex64_t *	X,
		int	LDX,
		int *	ITER
	)

PLASMA_zcgels - Solves overdetermined or underdetermined linear systems involving an M-by-N matrix A using the QR or the LQ factorization of A. It is assumed that A has full rank. The following options are provided:

trans = PlasmaNoTrans and M >= N: find the least squares solution of an overdetermined

system, i.e., solve the least squares problem: minimize || B - A*X ||.

trans = PlasmaNoTrans and M < N: find the minimum norm solution of an underdetermined

system A * X = B.

Several right hand side vectors B and solution vectors X can be handled in a single call; they are stored as the columns of the M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix X.

PLASMA_zcgels first attempts to factorize the matrix in COMPLEX and use this factorization within an iterative refinement procedure to produce a solution with COMPLEX*16 normwise backward error quality (see below). If the approach fails the method switches to a COMPLEX*16 factorization and solve.

The iterative refinement is not going to be a winning strategy if the ratio COMPLEX performance over COMPLEX*16 performance is too small. A reasonable strategy should take the number of right-hand sides and the size of the matrix into account. This might be done with a call to ILAENV in the future. Up to now, we always try iterative refinement.

The iterative refinement process is stopped if ITER > ITERMAX or for all the RHS we have: RNRM < N*XNRM*ANRM*EPS*BWDMAX where:

ITER is the number of the current iteration in the iterative refinement process
RNRM is the infinity-norm of the residual
XNRM is the infinity-norm of the solution
ANRM is the infinity-operator-norm of the matrix A
EPS is the machine epsilon returned by DLAMCH('Epsilon').

Actually, in its current state (PLASMA 2.1.0), the test is slightly relaxed.

The values ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 respectively.

We follow Bjorck's algorithm proposed in "Iterative Refinement of Linear Least Squares solutions I", BIT, 7:257-278, 1967.

Parameters:

[in]	trans	Intended usage: = PlasmaNoTrans: the linear system involves A; = PlasmaConjTrans: the linear system involves A**H. Currently only PlasmaNoTrans is supported.
[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrices B and X. NRHS >= 0.
[in]	A	The M-by-N matrix A. This matrix is not modified.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[in]	B	The M-by-NRHS matrix B of right hand side vectors, stored columnwise. Not modified.
[in]	LDB	The leading dimension of the array B. LDB >= MAX(1,M,N).
[out]	X	If return value = 0, the solution vectors, stored columnwise. if M >= N, rows 1 to N of X contain the least squares solution vectors; the residual sum of squares for the solution in each column is given by the sum of squares of the modulus of elements N+1 to M in that column; if M < N, rows 1 to N of X contain the minimum norm solution vectors;
[in]	LDX	The leading dimension of the array X. LDX >= MAX(1,M,N).
[out]	ITER	The number of the current iteration in the iterative refinement process

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_zcgels_Tile; PLASMA_zcgels_Tile_Async; PLASMA_dsgels; PLASMA_zgels

Definition at line 166 of file zcgels.c.

References max, min, PLASMA_Alloc_Workspace_zgels_Tile(), plasma_context_self(), PLASMA_Dealloc_Handle_Tile(), plasma_desc_init(), plasma_desc_mat_alloc(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_NOT_SUPPORTED, PLASMA_ERR_OUT_OF_RESOURCES, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZCGELS, PLASMA_NB, plasma_parallel_call_5, plasma_pzlapack_to_tile(), plasma_pztile_to_lapack(), PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, plasma_tune(), PLASMA_zcgels_Tile_Async(), PlasmaComplexDouble, and PlasmaNoTrans.

{
    int i, j;
    int NB, NBNB, MT, NT, NTRHS;
    int status;
    PLASMA_desc  descA;
    PLASMA_desc  descB;
    PLASMA_desc *descT;
    PLASMA_desc  descX;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zcgels", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (trans != PlasmaNoTrans) {
        plasma_error("PLASMA_zcgels", "only PlasmaNoTrans supported");
        return PLASMA_ERR_NOT_SUPPORTED;
    }
    if (M < 0) {
        plasma_error("PLASMA_zcgels", "illegal value of M");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_zcgels", "illegal value of N");
        return -3;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_zcgels", "illegal value of NRHS");
        return -4;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_zcgels", "illegal value of LDA");
        return -6;
    }
    if (LDB < max(1, max(M, N))) {
        plasma_error("PLASMA_zcgels", "illegal value of LDB");
        return -9;
    }
    if (LDX < max(1, max(M, N))) {
        plasma_error("PLASMA_zcgels", "illegal value of LDX");
        return -10;
    }
    /* Quick return */
    if (min(M, min(N, NRHS)) == 0) {
        for (i = 0; i < max(M, N); i++)
            for (j = 0; j < NRHS; j++)
                B[j*LDB+i] = 0.0;
        return PLASMA_SUCCESS;
    }
    /* Tune NB & IB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZCGELS, M, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zcgels", "plasma_tune() failed");
        return status;
    }
    /* Set MT, NT & NTRHS */
    NB    = PLASMA_NB;
    NBNB  = NB*NB;
    NT    = (N%NB==0) ? (N/NB) : (N/NB+1);
    MT    = (M%NB==0) ? (M/NB) : (M/NB+1);
    NTRHS = (NRHS%NB==0) ? (NRHS/NB) : (NRHS/NB+1);
    printf("M %d, N %d, NRHS %d, NB %d, MT %d, NT %d, NTRHS %d\n", M, N, NRHS, NB, MT, NT, NTRHS);
    plasma_sequence_create(plasma, &sequence);
    descA = plasma_desc_init(
                PlasmaComplexDouble,
                NB, NB, NBNB,
                M, N, 0, 0, M, N);
    if (M >= N) {
        descB = plasma_desc_init(
            PlasmaComplexDouble,
            NB, NB, NBNB,
            M, NRHS, 0, 0, M, NRHS);
        descX = plasma_desc_init(
            PlasmaComplexDouble,
            NB, NB, NBNB,
            M, NRHS, 0, 0, M, NRHS);
    }
    else {
        descB = plasma_desc_init(
            PlasmaComplexDouble,
            NB, NB, NBNB,
            N, NRHS, 0, 0, N, NRHS);
        descX = plasma_desc_init(
            PlasmaComplexDouble,
            NB, NB, NBNB,
            N, NRHS, 0, 0, N, NRHS);
    }
    /* DOUBLE PRECISION INITIALIZATION */
    /* Allocate memory for matrices in block layout */
    if (plasma_desc_mat_alloc(&descA) || plasma_desc_mat_alloc(&descB) || plasma_desc_mat_alloc(&descX)) {
        plasma_error("PLASMA_zcgels", "plasma_shared_alloc() failed");
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
        plasma_desc_mat_free(&descX);
        return PLASMA_ERR_OUT_OF_RESOURCES;
    }
    plasma_parallel_call_5(plasma_pzlapack_to_tile,
        PLASMA_Complex64_t*, A,
        int, LDA,
        PLASMA_desc, descA,
        PLASMA_sequence*, sequence,
        PLASMA_request*, &request);
    plasma_parallel_call_5(plasma_pzlapack_to_tile,
        PLASMA_Complex64_t*, B,
        int, LDB,
        PLASMA_desc, descB,
        PLASMA_sequence*, sequence,
        PLASMA_request*, &request);
    /* Allocate workspace */
    PLASMA_Alloc_Workspace_zgels_Tile(M, N, &descT);
    /* Call the native interface */
    status = PLASMA_zcgels_Tile_Async(PlasmaNoTrans, &descA, descT, &descB, &descX, ITER,
                                      sequence, &request);
    if (status == PLASMA_SUCCESS) {
        plasma_parallel_call_5(plasma_pztile_to_lapack,
            PLASMA_desc, descX,
            PLASMA_Complex64_t*, X,
            int, LDX,
            PLASMA_sequence*, sequence,
            PLASMA_request*, &request);
    }
    plasma_dynamic_sync();
    PLASMA_Dealloc_Handle_Tile(&descT);
    plasma_sequence_destroy(plasma, sequence);
    plasma_desc_mat_free(&descA);
    plasma_desc_mat_free(&descB);
    plasma_desc_mat_free(&descX);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zcgesv	(	int	N,
		int	NRHS,
		PLASMA_Complex64_t *	A,
		int	LDA,
		int *	IPIV,
		PLASMA_Complex64_t *	B,
		int	LDB,
		PLASMA_Complex64_t *	X,
		int	LDX,
		int *	ITER
	)

PLASMA_zcgesv - Computes the solution to a system of linear equations A * X = B, where A is an N-by-N matrix and X and B are N-by-NRHS matrices.

PLASMA_zcgesv first attempts to factorize the matrix in COMPLEX and use this factorization within an iterative refinement procedure to produce a solution with COMPLEX*16 normwise backward error quality (see below). If the approach fails the method switches to a COMPLEX*16 factorization and solve.

The iterative refinement is not going to be a winning strategy if the ratio COMPLEX performance over COMPLEX*16 performance is too small. A reasonable strategy should take the number of right-hand sides and the size of the matrix into account. This might be done with a call to ILAENV in the future. Up to now, we always try iterative refinement.

The iterative refinement process is stopped if ITER > ITERMAX or for all the RHS we have: RNRM < N*XNRM*ANRM*EPS*BWDMAX where:

ITER is the number of the current iteration in the iterative refinement process
RNRM is the infinity-norm of the residual
XNRM is the infinity-norm of the solution
ANRM is the infinity-operator-norm of the matrix A
EPS is the machine epsilon returned by DLAMCH('Epsilon').

Actually, in its current state (PLASMA 2.1.0), the test is slightly relaxed.

The values ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 respectively.

Parameters:

[in]	N	The number of linear equations, i.e., the order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]	A	The N-by-N coefficient matrix A. This matrix is not modified.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[out]	IPIV	On exit, the pivot indices that define the permutations.
[in]	B	The N-by-NRHS matrix of right hand side matrix B.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).
[out]	X	If return value = 0, the N-by-NRHS solution matrix X.
[in]	LDX	The leading dimension of the array B. LDX >= max(1,N).
[out]	ITER	The number of the current iteration in the iterative refinement process

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, so the solution could not be computed.

See also:: PLASMA_zcgesv_Tile; PLASMA_zcgesv_Tile_Async; PLASMA_dsgesv; PLASMA_zgesv

Definition at line 227 of file zcgesv.c.

References plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZCGESV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zcgesv_Tile_Async(), plasma_zdesc_alloc, plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaComplexDouble, and plasma_sequence_t::status.

{
    int NB;
    int status;
    PLASMA_desc  descA;
    PLASMA_desc  descB;
    PLASMA_desc  descX;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zcgesv", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (N < 0) {
        plasma_error("PLASMA_zcgesv", "illegal value of N");
        return -1;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_zcgesv", "illegal value of NRHS");
        return -2;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_zcgesv", "illegal value of LDA");
        return -4;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_zcgesv", "illegal value of LDB");
        return -8;
    }
    if (LDX < max(1, N)) {
        plasma_error("PLASMA_zcgesv", "illegal value of LDX");
        return -10;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZCGESV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zcgesv", "plasma_tune() failed");
        return status;
    }
    NB = PLASMA_NB;
    
    plasma_sequence_create(plasma, &sequence);
    /* DOUBLE PRECISION INITIALIZATION */
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)) );
        plasma_zdesc_alloc(  descX, NB, NB, N, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descX)) );
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
        descX = plasma_desc_init(
            PlasmaComplexDouble, NB, NB, (NB*NB), 
            LDX, NRHS, 0, 0, N, NRHS);
        descX.mat = X;
    }
    /* Call the native interface */
    status = PLASMA_zcgesv_Tile_Async(&descA, IPIV, &descB, &descX, ITER, sequence, &request);
    if (status == PLASMA_SUCCESS) {
        if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
            plasma_zooptile2lap( descX, X, NB, NB, LDX, NRHS );
            plasma_dynamic_sync();
            plasma_desc_mat_free(&descA);
            plasma_desc_mat_free(&descB);
            plasma_desc_mat_free(&descX);
        } else {
            plasma_ziptile2lap( descA, A, NB, NB, LDA, N    );
            plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
            plasma_ziptile2lap( descX, X, NB, NB, LDX, NRHS );
            plasma_dynamic_sync();
        }
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zcposv	(	PLASMA_enum	uplo,
		int	N,
		int	NRHS,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t *	B,
		int	LDB,
		PLASMA_Complex64_t *	X,
		int	LDX,
		int *	ITER
	)

PLASMA_zcposv - Computes the solution to a system of linear equations A * X = B, where A is an N-by-N symmetric positive definite (or Hermitian positive definite in the complex case) matrix and X and B are N-by-NRHS matrices. The Cholesky decomposition is used to factor A as

A = U**H * U, if uplo = PlasmaUpper, or A = L * L**H, if uplo = PlasmaLower,

where U is an upper triangular matrix and L is a lower triangular matrix. The factored form of A is then used to solve the system of equations A * X = B.

PLASMA_zcposv first attempts to factorize the matrix in COMPLEX and use this factorization within an iterative refinement procedure to produce a solution with COMPLEX*16 normwise backward error quality (see below). If the approach fails the method switches to a COMPLEX*16 factorization and solve.

The iterative refinement is not going to be a winning strategy if the ratio COMPLEX performance over COMPLEX*16 performance is too small. A reasonable strategy should take the number of right-hand sides and the size of the matrix into account. This might be done with a call to ILAENV in the future. Up to now, we always try iterative refinement.

The iterative refinement process is stopped if ITER > ITERMAX or for all the RHS we have: RNRM < N*XNRM*ANRM*EPS*BWDMAX where:

ITER is the number of the current iteration in the iterative refinement process
RNRM is the infinity-norm of the residual
XNRM is the infinity-norm of the solution
ANRM is the infinity-operator-norm of the matrix A
EPS is the machine epsilon returned by DLAMCH('Epsilon').

Actually, in its current state (PLASMA 2.1.0), the test is slightly relaxed.

The values ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 respectively.

Parameters:

[in]	uplo	Specifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The number of linear equations, i.e., the order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]	A	The N-by-N symmetric positive definite (or Hermitian) coefficient matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. This matrix is not modified.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in]	B	The N-by-NRHS matrix of right hand side matrix B.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).
[out]	X	If return value = 0, the N-by-NRHS solution matrix X.
[in]	LDX	The leading dimension of the array B. LDX >= max(1,N).
[out]	ITER	The number of the current iteration in the iterative refinement process

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, the leading minor of order i of A is not positive definite, so the factorization could not be completed, and the solution has not been computed.

See also:: PLASMA_zcposv_Tile; PLASMA_zcposv_Tile_Async; PLASMA_dsposv; PLASMA_zposv

Definition at line 171 of file zcposv.c.

References plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZCPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zcposv_Tile_Async(), plasma_zdesc_alloc, plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaComplexDouble, PlasmaLower, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int status;
    PLASMA_desc  descA;
    PLASMA_desc  descB;
    PLASMA_desc  descX;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zcposv", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_zcposv", "illegal value of uplo");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_zcposv", "illegal value of N");
        return -2;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_zcposv", "illegal value of NRHS");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_zcposv", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_zcposv", "illegal value of LDB");
        return -7;
    }
    if (LDX < max(1, N)) {
        plasma_error("PLASMA_zcposv", "illegal value of LDX");
        return -10;
    }
    /* Quick return - currently NOT equivalent to LAPACK's
     * LAPACK does not have such check for ZCPOSV */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_ZCPOSV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zcposv", "plasma_tune() failed");
        return status;
    }
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    /* DOUBLE PRECISION INITIALIZATION */
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)) );
        plasma_zdesc_alloc(  descX, NB, NB, N, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descX)) );
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
        descX = plasma_desc_init(
            PlasmaComplexDouble, NB, NB, (NB*NB), 
            LDX, NRHS, 0, 0, N, NRHS);
        descX.mat = X;
    }
    /* Call the native interface */
    status = PLASMA_zcposv_Tile_Async(uplo, &descA, &descB, &descX, ITER, sequence, &request);
    if (status == PLASMA_SUCCESS) {
        if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
            plasma_zooptile2lap( descX, X, NB, NB, LDX, NRHS );
            plasma_dynamic_sync();
            plasma_desc_mat_free(&descA);
            plasma_desc_mat_free(&descB);
            plasma_desc_mat_free(&descX);
        } else {
            plasma_ziptile2lap( descA, A, NB, NB, LDA, N    );
            plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
            plasma_ziptile2lap( descX, X, NB, NB, LDX, NRHS );
            plasma_dynamic_sync();
        }
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zcungesv	(	PLASMA_enum	trans,
		int	N,
		int	NRHS,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t *	B,
		int	LDB,
		PLASMA_Complex64_t *	X,
		int	LDX,
		int *	ITER
	)

PLASMA_zcungesv - Solves overdetermined or underdetermined linear systems involving an M-by-N matrix A using the QR or the LQ factorization of A. It is assumed that A has full rank. The following options are provided:

trans = PlasmaNoTrans and M >= N: find the least squares solution of an overdetermined

system, i.e., solve the least squares problem: minimize || B - A*X ||.

trans = PlasmaNoTrans and M < N: find the minimum norm solution of an underdetermined

system A * X = B.

Several right hand side vectors B and solution vectors X can be handled in a single call; they are stored as the columns of the M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix X.

PLASMA_zcungesv first attempts to factorize the matrix in COMPLEX and use this factorization within an iterative refinement procedure to produce a solution with COMPLEX*16 normwise backward error quality (see below). If the approach fails the method switches to a COMPLEX*16 factorization and solve.

The iterative refinement is not going to be a winning strategy if the ratio COMPLEX performance over COMPLEX*16 performance is too small. A reasonable strategy should take the number of right-hand sides and the size of the matrix into account. This might be done with a call to ILAENV in the future. Up to now, we always try iterative refinement.

The iterative refinement process is stopped if ITER > ITERMAX or for all the RHS we have: RNRM < N*XNRM*ANRM*EPS*BWDMAX where:

ITER is the number of the current iteration in the iterative refinement process
RNRM is the infinity-norm of the residual
XNRM is the infinity-norm of the solution
ANRM is the infinity-operator-norm of the matrix A
EPS is the machine epsilon returned by DLAMCH('Epsilon').

Actually, in its current state (PLASMA 2.1.0), the test is slightly relaxed.

The values ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 respectively.

We follow Bjorck's algorithm proposed in "Iterative Refinement of Linear Least Squares solutions I", BIT, 7:257-278, 1967.4

Parameters:

[in]	trans	Intended usage: = PlasmaNoTrans: the linear system involves A; = PlasmaConjTrans: the linear system involves A**H. Currently only PlasmaNoTrans is supported.
[in]	N	The number of columns of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrices B and X. NRHS >= 0.
[in]	A	The M-by-N matrix A. This matrix is not modified.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[in]	B	The M-by-NRHS matrix B of right hand side vectors, stored columnwise. Not modified.
[in]	LDB	The leading dimension of the array B. LDB >= MAX(1,M,N).
[out]	X	If return value = 0, the solution vectors, stored columnwise. if M >= N, rows 1 to N of B contain the least squares solution vectors; the residual sum of squares for the solution in each column is given by the sum of squares of the modulus of elements N+1 to M in that column; if M < N, rows 1 to N of B contain the minimum norm solution vectors;
[in]	LDX	The leading dimension of the array B. LDB >= MAX(1,M,N).
[out]	ITER	The number of the current iteration in the iterative refinement process

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_zcungesv_Tile; PLASMA_zcungesv_Tile_Async; PLASMA_dsungesv; PLASMA_zgels

Definition at line 163 of file zcungesv.c.

References plasma_desc_t::mat, max, PLASMA_Alloc_Workspace_zgels_Tile(), plasma_context_self(), PLASMA_Dealloc_Handle_Tile(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_NOT_SUPPORTED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZCGELS, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zcungesv_Tile_Async(), plasma_zdesc_alloc, plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaComplexDouble, and PlasmaNoTrans.

{
    int NB;
    int status;
    PLASMA_desc  descA;
    PLASMA_desc  descB;
    PLASMA_desc *descT;
    PLASMA_desc  descX;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zcungesv", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (trans != PlasmaNoTrans) {
        plasma_error("PLASMA_zcungesv", "only PlasmaNoTrans supported");
        return PLASMA_ERR_NOT_SUPPORTED;
    }
    if (N < 0) {
        plasma_error("PLASMA_zcungesv", "illegal value of N");
        return -2;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_zcungesv", "illegal value of NRHS");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_zcungesv", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_zcungesv", "illegal value of LDB");
        return -8;
    }
    if (LDX < max(1, N)) {
        plasma_error("PLASMA_zcungesv", "illegal value of LDX");
        return -9;
    }
    /* Quick return */
    if ( N == 0 )
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZCGELS, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zcungesv", "plasma_tune() failed");
        return status;
    }
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    /* DOUBLE PRECISION INITIALIZATION */
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)) );
        plasma_zdesc_alloc(  descX, NB, NB, N, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descX)) );
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
        descX = plasma_desc_init(
            PlasmaComplexDouble, NB, NB, (NB*NB), 
            LDX, NRHS, 0, 0, N, NRHS);
        descX.mat = X;
    }
    /* Allocate workspace */
    PLASMA_Alloc_Workspace_zgels_Tile(N, N, &descT);
    /* Call the native interface */
    status = PLASMA_zcungesv_Tile_Async(PlasmaNoTrans, &descA, descT, &descB, &descX, ITER,
                                        sequence, &request);
    if (status == PLASMA_SUCCESS) {
        if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
            plasma_zooptile2lap( descX, X, NB, NB, LDX, NRHS );
            plasma_dynamic_sync();
            plasma_desc_mat_free(&descA);
            plasma_desc_mat_free(&descB);
            plasma_desc_mat_free(&descX);
        } else {
            plasma_ziptile2lap( descA, A, NB, NB, LDA, N    );
            plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
            plasma_ziptile2lap( descX, X, NB, NB, LDX, NRHS );
            plasma_dynamic_sync();
        }
    }
    PLASMA_Dealloc_Handle_Tile(&descT);
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgebrd	(	PLASMA_enum	jobu,
		PLASMA_enum	jobvt,
		int	M,
		int	N,
		PLASMA_Complex64_t *	A,
		int	LDA,
		double *	D,
		double *	E,
		PLASMA_Complex64_t *	U,
		int	LDU,
		PLASMA_Complex64_t *	VT,
		int	LDVT,
		PLASMA_desc *	descT
	)

PLASMA_zgebrd - computes the singular value decomposition (SVD) of a complex M-by-N matrix A, optionally computing the left and/or right singular vectors. The SVD is written

 A = U * SIGMA * transpose(V)

where SIGMA is an M-by-N matrix which is zero except for its min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA are the singular values of A; they are real and non-negative, and are returned in descending order. The first min(m,n) columns of U and V are the left and right singular vectors of A.

Note that the routine returns V**T, not V. Not LAPACK Compliant for now! Note: Only PlasmaNoVec supported!

Parameters:

[in]	jobu	Specifies options for computing all or part of the matrix U. Intended usage: = PlasmaVec: all M columns of U are returned in array U; = PlasmaNoVec: no columns of U (no left singular vectors) are computed. Note: Only PlasmaNoVec supported!
[in]	jobvt	Specifies options for computing all or part of the matrix V**H. Intended usage: = PlasmaVec: all M columns of U are returned in array U; = PlasmaNoVec: no columns of U (no left singular vectors) are computed. Note: Only PlasmaNoVec supported!
[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in,out]	A	On entry, the M-by-N matrix A. On exit, if JOBU = 'O', A is overwritten with the first min(m,n) columns of U (the left singular vectors, stored columnwise); if JOBVT = 'O', A is overwritten with the first min(m,n) rows of V**H (the right singular vectors, stored rowwise); if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A are destroyed.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[out]	S	The double precision singular values of A, sorted so that S(i) >= S(i+1).
[out]	U	(LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. If JOBU = 'A', U contains the M-by-M unitary matrix U; if JOBU = 'S', U contains the first min(m,n) columns of U (the left singular vectors, stored columnwise); if JOBU = 'N' or 'O', U is not referenced.
[in]	LDU	The leading dimension of the array U. LDU >= 1; if JOBU = 'S' or 'A', LDU >= M.
[out]	VT	If JOBVT = 'A', VT contains the N-by-N unitary matrix VH; if JOBVT = 'S', VT contains the first min(m,n) rows of VH (the right singular vectors, stored rowwise); if JOBVT = 'N' or 'O', VT is not referenced.
[in]	LDVT	The leading dimension of the array VT. LDVT >= 1; if JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
[in,out]	descT	On entry, descriptor as return by PLASMA_Alloc_Workspace_zgesvd On exit, contains auxiliary factorization data.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_zgebrd_Tile; PLASMA_zgebrd_Tile_Async; PLASMA_cgebrd; PLASMA_dgebrd; PLASMA_sgebrd

Definition at line 122 of file zgebrd.c.

References plasma_desc_t::m, max, min, plasma_desc_t::n, plasma_context_self(), plasma_desc_check(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGEBRD, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zgebrd_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaNoVec, PlasmaVec, and plasma_sequence_t::status.

{
    int NB, IB, MT, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descU, descVT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zgebrd", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    
    /* Tune NB & IB depending on M & N; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZGEBRD, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zgebrd", "plasma_tune() failed");
        return status;
    }
    /* Set MT, NT */
    NB = PLASMA_NB;
    IB = PLASMA_IB;
    MT = (M%NB==0) ? (M/NB) : (M/NB+1);
    NT = (N%NB==0) ? (N/NB) : (N/NB+1);
    /* Check input arguments */
    if (jobu != PlasmaNoVec  && jobu !=PlasmaVec) {
        plasma_error("PLASMA_zgebrd", "illegal value of jobu");
        return -1;
    }
    if (jobvt != PlasmaNoVec && jobvt != PlasmaVec) {
        plasma_error("PLASMA_zgebrd", "illegal value of jobvt");
        return -2;
    }
    if (M < 0) {
        plasma_error("PLASMA_zgebrd", "illegal value of M");
        return -3;
    }
    if (N < 0) {
        plasma_error("PLASMA_zgebrd", "illegal value of N");
        return -4;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_zgebrd", "illegal value of LDA");
        return -6;
    }
    if (LDU < 1) {
        plasma_error("PLASMA_zgebrd", "illegal value of LDU");
        return -9;
    }
    if (LDVT < 1) {
        plasma_error("PLASMA_zgebrd", "illegal value of LDVT");
        return -11;
    }
    if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) || 
         ( descT->m != MT*IB ) || (descT->n != NT*NB) ) {
        plasma_error("PLASMA_zgebrd", "invalid T descriptor");
        return -12;
    }
    /* Quick return */
    if (min(M, N) == 0) {
        return PLASMA_SUCCESS;
    }
    if (jobu == PlasmaVec) {
        plasma_error("PLASMA_zgebrd", "computing the singular vectors is not supported in this version");
        return -1;
    }
    if (jobvt == PlasmaVec) {
        plasma_error("PLASMA_zgebrd", "computing the singular vectors is not supported in this version");
        return -2;
    }
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA,   A, NB, NB,  LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
        if (jobu == PlasmaVec){
            plasma_zooplap2tile( descU,   U, NB, NB,  LDU, M, 0, 0, M, M, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descU)));
        }
        if (jobvt == PlasmaVec){
            plasma_zooplap2tile( descVT, VT, NB, NB, LDVT, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descU)); plasma_desc_mat_free(&(descVT)));
        }
    } else {
        plasma_ziplap2tile( descA,   A, NB, NB,  LDA, N, 0, 0, M, N);
        if (jobu == PlasmaVec){
            plasma_ziplap2tile( descU,   U, NB, NB,  LDU, M, 0, 0, M, M);
        }
        if (jobvt == PlasmaVec){
            plasma_ziplap2tile( descVT, VT, NB, NB, LDVT, N, 0, 0, N, N);
        }
    }
    /* Call the tile interface */
    PLASMA_zgebrd_Tile_Async(jobu, jobvt, &descA, D, E, &descU, &descVT, descT, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descA,   A, NB, NB,  LDA, N );
        if (jobu == PlasmaVec){
            plasma_zooptile2lap( descU,   U, NB, NB,  LDU, M );
        }
        if (jobvt == PlasmaVec){
            plasma_zooptile2lap( descVT, VT, NB, NB, LDVT, N );
        }
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        if (jobu == PlasmaVec){
            plasma_desc_mat_free(&descU);
        }
        if (jobvt == PlasmaVec){
            plasma_desc_mat_free(&descVT);
        }
    } else {
        plasma_ziptile2lap( descA,   A, NB, NB,  LDA, N );
        if (jobu == PlasmaVec){
            plasma_ziptile2lap( descU,   U, NB, NB,  LDU, M );
        }
        if (jobvt == PlasmaVec){
            plasma_ziptile2lap( descVT, VT, NB, NB, LDVT, N );
        }
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgelqf	(	int	M,
		int	N,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t *	T
	)

PLASMA_zgelqf - Computes the tile LQ factorization of a complex M-by-N matrix A: A = L * Q.

Parameters:

[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in,out]	A	On entry, the M-by-N matrix A. On exit, the elements on and below the diagonal of the array contain the m-by-min(M,N) lower trapezoidal matrix L (L is lower triangular if M <= N); the elements above the diagonal represent the unitary matrix Q as a product of elementary reflectors, stored by tiles.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[out]	T	On exit, auxiliary factorization data, required by PLASMA_zgelqs to solve the system of equations.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_zgelqf_Tile; PLASMA_zgelqf_Tile_Async; PLASMA_cgelqf; PLASMA_dgelqf; PLASMA_sgelqf; PLASMA_zgelqs

Definition at line 62 of file zgelqf.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_ZGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zgelqf_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaComplexDouble, plasma_sequence_t::status, and T.

{
    int NB, IB, IBNB, MT, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zgelqf", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (M < 0) {
        plasma_error("PLASMA_zgelqf", "illegal value of M");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_zgelqf", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_zgelqf", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (min(M, N) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_ZGELS, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zgelqf", "plasma_tune() failed");
        return status;
    }
    /* Set MT & NT */
    NB   = PLASMA_NB;
    IB   = PLASMA_IB;
    IBNB = IB*NB;
    MT   = (M%NB==0) ? (M/NB) : (M/NB+1);
    NT   = (N%NB==0) ? (N/NB) : (N/NB+1);
    plasma_sequence_create(plasma, &sequence);
     if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
        descT = plasma_desc_init(
            PlasmaComplexDouble,
            IB, NB, IBNB,
            MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
    }
    else {
        /* Double the size of T to accomodate the tree reduction phase */
        descT = plasma_desc_init(
            PlasmaComplexDouble,
            IB, NB, IBNB,
            MT*IB, 2*NT*NB, 0, 0, MT*IB, 2*NT*NB);
    }
    descT.mat = T;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N);
    }
    /* Call the tile interface */
    PLASMA_zgelqf_Tile_Async(&descA, &descT, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgelqs	(	int	M,
		int	N,
		int	NRHS,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t *	T,
		PLASMA_Complex64_t *	B,
		int	LDB
	)

PLASMA_zgelqs - Compute a minimum-norm solution min || A*X - B || using the LQ factorization A = L*Q computed by PLASMA_zgelqf.

Parameters:

[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= M >= 0.
[in]	NRHS	The number of columns of B. NRHS >= 0.
[in]	A	Details of the LQ factorization of the original matrix A as returned by PLASMA_zgelqf.
[in]	LDA	The leading dimension of the array A. LDA >= M.
[in]	T	Auxiliary factorization data, computed by PLASMA_zgelqf.
[in,out]	B	On entry, the M-by-NRHS right hand side matrix B. On exit, the N-by-NRHS solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= N.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_zgelqs_Tile; PLASMA_zgelqs_Tile_Async; PLASMA_cgelqs; PLASMA_dgelqs; PLASMA_sgelqs; PLASMA_zgelqf

Definition at line 67 of file zgelqs.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_ZGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zgelqs_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaComplexDouble, plasma_sequence_t::status, and T.

{
    int NB, IB, IBNB, MT, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB, descT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zgelqs", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (M < 0) {
        plasma_error("PLASMA_zgelqs", "illegal value of M");
        return -1;
    }
    if (N < 0 || M > N) {
        plasma_error("PLASMA_zgelqs", "illegal value of N");
        return -2;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_zgelqs", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_zgelqs", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, max(1, N))) {
        plasma_error("PLASMA_zgelqs", "illegal value of LDB");
        return -8;
    }
    /* Quick return */
    if (min(M, min(N, NRHS)) == 0) {
        return PLASMA_SUCCESS;
    }
    /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_ZGELS, M, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zgelqs", "plasma_tune() failed");
        return status;
    }
    /* Set MT, NT & NTRHS */
    NB    = PLASMA_NB;
    IB    = PLASMA_IB;
    IBNB  = IB*NB;
    MT    = (M%NB==0) ? (M/NB) : (M/NB+1);
    NT    = (N%NB==0) ? (N/NB) : (N/NB+1);
    plasma_sequence_create(plasma, &sequence);
    if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
        descT = plasma_desc_init(
            PlasmaComplexDouble,
            IB, NB, IBNB,
            MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
    }
    else {
        /* Double the size of T to accomodate the tree reduction phase */
        descT = plasma_desc_init(
            PlasmaComplexDouble,
            IB, NB, IBNB,
            MT*IB, 2*NT*NB, 0, 0, MT*IB, 2*NT*NB);
    }
    descT.mat = T;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, M, N   , plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, N,    0, 0, M, N   );
        plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
    }
    /* Call the tile interface */
    PLASMA_zgelqs_Tile_Async(&descA, &descT, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_zooptile2lap( descB, B, NB, NB, LDB, NRHS );
    plasma_dynamic_sync();
    plasma_desc_mat_free(&descA);
    plasma_desc_mat_free(&descB);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgels	(	PLASMA_enum	trans,
		int	M,
		int	N,
		int	NRHS,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t *	T,
		PLASMA_Complex64_t *	B,
		int	LDB
	)

PLASMA_zgels - solves overdetermined or underdetermined linear systems involving an M-by-N matrix A using the QR or the LQ factorization of A. It is assumed that A has full rank. The following options are provided:

trans = PlasmaNoTrans and M >= N: find the least squares solution of an overdetermined

system, i.e., solve the least squares problem: minimize || B - A*X ||.

trans = PlasmaNoTrans and M < N: find the minimum norm solution of an underdetermined

system A * X = B.

Several right hand side vectors B and solution vectors X can be handled in a single call; they are stored as the columns of the M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix X.

Parameters:

[in]	trans	Intended usage: = PlasmaNoTrans: the linear system involves A; = PlasmaConjTrans: the linear system involves A**H. Currently only PlasmaNoTrans is supported.
[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrices B and X. NRHS >= 0.
[in,out]	A	On entry, the M-by-N matrix A. On exit, if M >= N, A is overwritten by details of its QR factorization as returned by PLASMA_zgeqrf; if M < N, A is overwritten by details of its LQ factorization as returned by PLASMA_zgelqf.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[out]	T	On exit, auxiliary factorization data.
[in,out]	B	On entry, the M-by-NRHS matrix B of right hand side vectors, stored columnwise; On exit, if return value = 0, B is overwritten by the solution vectors, stored columnwise: if M >= N, rows 1 to N of B contain the least squares solution vectors; the residual sum of squares for the solution in each column is given by the sum of squares of the modulus of elements N+1 to M in that column; if M < N, rows 1 to N of B contain the minimum norm solution vectors;
[in]	LDB	The leading dimension of the array B. LDB >= MAX(1,M,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_zgels_Tile; PLASMA_zgels_Tile_Async; PLASMA_cgels; PLASMA_dgels; PLASMA_sgels

Definition at line 94 of file zgels.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_NOT_SUPPORTED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_ZGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zgels_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaComplexDouble, PlasmaNoTrans, plasma_sequence_t::status, and T.

{
    int i, j;
    int NB, IB, IBNB, MT, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB, descT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zgels", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (trans != PlasmaNoTrans) {
        plasma_error("PLASMA_zgels", "only PlasmaNoTrans supported");
        return PLASMA_ERR_NOT_SUPPORTED;
    }
    if (M < 0) {
        plasma_error("PLASMA_zgels", "illegal value of M");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_zgels", "illegal value of N");
        return -3;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_zgels", "illegal value of NRHS");
        return -4;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_zgels", "illegal value of LDA");
        return -6;
    }
    if (LDB < max(1, max(M, N))) {
        plasma_error("PLASMA_zgels", "illegal value of LDB");
        return -9;
    }
    /* Quick return */
    if (min(M, min(N, NRHS)) == 0) {
        for (i = 0; i < max(M, N); i++)
            for (j = 0; j < NRHS; j++)
                B[j*LDB+i] = 0.0;
        return PLASMA_SUCCESS;
    }
    /* Tune NB & IB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZGELS, M, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zgels", "plasma_tune() failed");
        return status;
    }
    /* Set MT, NT & NTRHS */
    NB    = PLASMA_NB;
    IB    = PLASMA_IB;
    IBNB  = IB*NB;
    NT    = (N%NB==0) ? (N/NB) : (N/NB+1);
    MT    = (M%NB==0) ? (M/NB) : (M/NB+1);
    plasma_sequence_create(plasma, &sequence);
    if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
        descT = plasma_desc_init(
            PlasmaComplexDouble,
            IB, NB, IBNB,
            MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
    }
    else {
        /* Double the size of T to accomodate the tree reduction phase */
        descT = plasma_desc_init(
            PlasmaComplexDouble,
            IB, NB, IBNB,
            MT*IB, 2*NT*NB, 0, 0, MT*IB, 2*NT*NB);
    }
    descT.mat = T;
    if ( M >= N ) {
        if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
            plasma_zooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, M, N   , plasma_desc_mat_free(&(descA)) );
            plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, M, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
        } else {
            plasma_ziplap2tile( descA, A, NB, NB, LDA, N,    0, 0, M, N   );
            plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, M, NRHS);
        }
    } else {
        if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
            plasma_zooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, M, N   , plasma_desc_mat_free(&(descA)) );
            plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
        } else {
            plasma_ziplap2tile( descA, A, NB, NB, LDA, N,    0, 0, M, N   );
            plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
        }
    }
    /* Call the tile interface */
    PLASMA_zgels_Tile_Async(PlasmaNoTrans, &descA, &descT, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_zooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgemm	(	PLASMA_enum	transA,
		PLASMA_enum	transB,
		int	M,
		int	N,
		int	K,
		PLASMA_Complex64_t	alpha,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t *	B,
		int	LDB,
		PLASMA_Complex64_t	beta,
		PLASMA_Complex64_t *	C,
		int	LDC
	)

PLASMA_zgemm - Performs one of the matrix-matrix operations

$C = \alpha [op( A )\times op( B )] + \beta C$

,

where op( X ) is one of

op( X ) = X or op( X ) = X' or op( X ) = conjg( X' )

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters:

[in]	transA	Specifies whether the matrix A is transposed, not transposed or conjugate transposed: = PlasmaNoTrans: A is not transposed; = PlasmaTrans: A is transposed; = PlasmaConjTrans: A is conjugate transposed.
[in]	transB	Specifies whether the matrix B is transposed, not transposed or conjugate transposed: = PlasmaNoTrans: B is not transposed; = PlasmaTrans: B is transposed; = PlasmaConjTrans: B is conjugate transposed.
[in]	M	M specifies the number of rows of the matrix op( A ) and of the matrix C. M >= 0.
[in]	N	N specifies the number of columns of the matrix op( B ) and of the matrix C. N >= 0.
[in]	K	K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K >= 0.
[in]	alpha	alpha specifies the scalar alpha
[in]	A	A is a LDA-by-ka matrix, where ka is K when transA = PlasmaNoTrans, and is M otherwise.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[in]	B	B is a LDB-by-kb matrix, where kb is N when transB = PlasmaNoTrans, and is K otherwise.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).
[in]	beta	beta specifies the scalar beta
[in,out]	C	C is a LDC-by-N matrix. On exit, the array is overwritten by the M by N matrix ( alphaop( A )op( B ) + beta*C )
[in]	LDC	The leading dimension of the array C. LDC >= max(1,M).

Returns:

Return values:

PLASMA_SUCCESS successful exit

See also:: PLASMA_zgemm_Tile; PLASMA_cgemm; PLASMA_dgemm; PLASMA_sgemm

Definition at line 96 of file zgemm.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGEMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zgemm_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaConjTrans, PlasmaNoTrans, PlasmaTrans, and plasma_sequence_t::status.

{
    int NB;
    int Am, An, Bm, Bn;
    int status;
    PLASMA_desc descA, descB, descC;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zgemm", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ((transA != PlasmaNoTrans) && (transA != PlasmaTrans) && (transA != PlasmaConjTrans)) {
        plasma_error("PLASMA_zgemm", "illegal value of transA");
        return -1;
    }
    if ((transB != PlasmaNoTrans) && (transB != PlasmaTrans) && (transB != PlasmaConjTrans)) {
        plasma_error("PLASMA_zgemm", "illegal value of transB");
        return -2;
    }
    if ( transA == PlasmaNoTrans ) { 
        Am = M; An = K;
    } else {
        Am = K; An = M;
    }
    if ( transB == PlasmaNoTrans ) { 
        Bm = K; Bn = N;
    } else {
        Bm = N; Bn = K;
    }
    if (M < 0) {
        plasma_error("PLASMA_zgemm", "illegal value of M");
        return -3;
    }
    if (N < 0) {
        plasma_error("PLASMA_zgemm", "illegal value of N");
        return -4;
    }
    if (K < 0) {
        plasma_error("PLASMA_zgemm", "illegal value of N");
        return -5;
    }
    if (LDA < max(1, Am)) {
        plasma_error("PLASMA_zgemm", "illegal value of LDA");
        return -8;
    }
    if (LDB < max(1, Bm)) {
        plasma_error("PLASMA_zgemm", "illegal value of LDB");
        return -10;
    }
    if (LDC < max(1, M)) {
        plasma_error("PLASMA_zgemm", "illegal value of LDC");
        return -13;
    }
    /* Quick return */
    if (M == 0 || N == 0 ||
        ((alpha == (PLASMA_Complex64_t)0.0 || K == 0) && beta == (PLASMA_Complex64_t)1.0))
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_ZGEMM, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zgemm", "plasma_tune() failed");
        return status;
    }
    /* Set MT & NT & KT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descB, B, NB, NB, LDB, Bn, 0, 0, Bm, Bn, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
        plasma_zooplap2tile( descC, C, NB, NB, LDC, N,  0, 0, M,  N,  plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descC)));
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An );
        plasma_ziplap2tile( descB, B, NB, NB, LDB, Bn, 0, 0, Bm, Bn );
        plasma_ziplap2tile( descC, C, NB, NB, LDC, N,  0, 0, M,  N  );
    }
    /* Call the tile interface */
    PLASMA_zgemm_Tile_Async(
        transA, transB, alpha, &descA, &descB, beta, &descC, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
        plasma_desc_mat_free(&descC);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, An );
        plasma_ziptile2lap( descB, B, NB, NB, LDB, Bn );
        plasma_ziptile2lap( descC, C, NB, NB, LDC, N  );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgeqrf	(	int	M,
		int	N,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t *	T
	)

PLASMA_zgeqrf - Computes the tile QR factorization of a complex M-by-N matrix A: A = Q * R.

Parameters:

[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in,out]	A	On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if M >= N); the elements below the diagonal represent the unitary matrix Q as a product of elementary reflectors stored by tiles.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[out]	T	On exit, auxiliary factorization data, required by PLASMA_zgeqrs to solve the system of equations.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_zgeqrf_Tile; PLASMA_zgeqrf_Tile_Async; PLASMA_cgeqrf; PLASMA_dgeqrf; PLASMA_sgeqrf; PLASMA_zgeqrs

Definition at line 61 of file zgeqrf.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_ZGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zgeqrf_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaComplexDouble, plasma_sequence_t::status, and T.

{
    int NB, IB, IBNB, MT, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zgeqrf", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (M < 0) {
        plasma_error("PLASMA_zgeqrf", "illegal value of M");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_zgeqrf", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_zgeqrf", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (min(M, N) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_ZGELS, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zgeqrf", "plasma_tune() failed");
        return status;
    }
    /* Set MT & NT */
    NB   = PLASMA_NB;
    IB   = PLASMA_IB;
    IBNB = IB*NB;
    MT   = (M%NB==0) ? (M/NB) : (M/NB+1);
    NT   = (N%NB==0) ? (N/NB) : (N/NB+1);
    plasma_sequence_create(plasma, &sequence);
 
     if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
        descT = plasma_desc_init(
            PlasmaComplexDouble,
            IB, NB, IBNB,
            MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
    }
    else {
        /* Double the size of T to accomodate the tree reduction phase */
        descT = plasma_desc_init(
            PlasmaComplexDouble,
            IB, NB, IBNB,
            MT*IB, 2*NT*NB, 0, 0, MT*IB, 2*NT*NB);
    }
    descT.mat = T;
    
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N);
    }
    /* Call the tile interface */
    PLASMA_zgeqrf_Tile_Async(&descA, &descT, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgeqrs	(	int	M,
		int	N,
		int	NRHS,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t *	T,
		PLASMA_Complex64_t *	B,
		int	LDB
	)

PLASMA_zgeqrs - Compute a minimum-norm solution min || A*X - B || using the RQ factorization A = R*Q computed by PLASMA_zgeqrf.

Parameters:

[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= M >= 0.
[in]	NRHS	The number of columns of B. NRHS >= 0.
[in,out]	A	Details of the QR factorization of the original matrix A as returned by PLASMA_zgeqrf.
[in]	LDA	The leading dimension of the array A. LDA >= M.
[in]	T	Auxiliary factorization data, computed by PLASMA_zgeqrf.
[in,out]	B	On entry, the m-by-nrhs right hand side matrix B. On exit, the n-by-nrhs solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_zgeqrs_Tile; PLASMA_zgeqrs_Tile_Async; PLASMA_cgeqrs; PLASMA_dgeqrs; PLASMA_sgeqrs; PLASMA_zgeqrf

Definition at line 67 of file zgeqrs.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_ZGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zgeqrs_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaComplexDouble, plasma_sequence_t::status, and T.

{
    int NB, IB, IBNB, MT, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB, descT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zgeqrs", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (M < 0) {
        plasma_error("PLASMA_zgeqrs", "illegal value of M");
        return -1;
    }
    if (N < 0 || N > M) {
        plasma_error("PLASMA_zgeqrs", "illegal value of N");
        return -2;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_zgeqrs", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_zgeqrs", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, max(1, M))) {
        plasma_error("PLASMA_zgeqrs", "illegal value of LDB");
        return -8;
    }
    /* Quick return */
    if (min(M, min(N, NRHS)) == 0) {
        return PLASMA_SUCCESS;
    }
    /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_ZGELS, M, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zgeqrs", "plasma_tune() failed");
        return status;
    }
    /* Set MT, NT & NTRHS */
    NB   = PLASMA_NB;
    IB   = PLASMA_IB;
    IBNB = IB*NB;
    MT   = (M%NB==0) ? (M/NB) : (M/NB+1);
    NT   = (N%NB==0) ? (N/NB) : (N/NB+1);
    plasma_sequence_create(plasma, &sequence);
    if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
        descT = plasma_desc_init(
            PlasmaComplexDouble,
            IB, NB, IBNB,
            MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
    }
    else {
        /* Double the size of T to accomodate the tree reduction phase */
        descT = plasma_desc_init(
            PlasmaComplexDouble,
            IB, NB, IBNB,
            MT*IB, 2*NT*NB, 0, 0, MT*IB, 2*NT*NB);
    }
    descT.mat = T;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, M, N   , plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, M, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, N,    0, 0, M, N   );
        plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, M, NRHS);
    }
    /* Call the tile interface */
    PLASMA_zgeqrs_Tile_Async(&descA, &descT, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_zooptile2lap( descB, B, NB, NB, LDB, NRHS );
    plasma_dynamic_sync();
    plasma_desc_mat_free(&descA);
    plasma_desc_mat_free(&descB);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgesv	(	int	N,
		int	NRHS,
		PLASMA_Complex64_t *	A,
		int	LDA,
		int *	IPIV,
		PLASMA_Complex64_t *	B,
		int	LDB
	)

PLASMA_zgesv - Computes the solution to a system of linear equations A * X = B, where A is an N-by-N matrix and X and B are N-by-NRHS matrices. The tile LU decomposition with partial tile pivoting and row interchanges is used to factor A. The factored form of A is then used to solve the system of equations A * X = B.

Parameters:

[in]	N	The number of linear equations, i.e., the order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in,out]	A	On entry, the N-by-N coefficient matrix A. On exit, the tile L and U factors from the factorization.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[out]	IPIV	On exit, the pivot indices that define the permutations.
[in,out]	B	On entry, the N-by-NRHS matrix of right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, so the solution could not be computed.

See also:: PLASMA_zgesv_Tile; PLASMA_zgesv_Tile_Async; PLASMA_cgesv; PLASMA_dgesv; PLASMA_sgesv

Definition at line 70 of file zgesv.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), PLASMA_FUNC_ZGESV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zgesv_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_error("PLASMA_zgesv", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (N < 0) {
        plasma_error("PLASMA_zgesv", "illegal value of N");
        return -1;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_zgesv", "illegal value of NRHS");
        return -2;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_zgesv", "illegal value of LDA");
        return -4;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_zgesv", "illegal value of LDB");
        return -8;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZGESV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zgesv", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
    }
    /* Call the tile interface */
    PLASMA_zgesv_Tile_Async(&descA, IPIV, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_zooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgesv_incpiv	(	int	N,
		int	NRHS,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t *	L,
		int *	IPIV,
		PLASMA_Complex64_t *	B,
		int	LDB
	)

PLASMA_zgesv_incpiv - Computes the solution to a system of linear equations A * X = B, where A is an N-by-N matrix and X and B are N-by-NRHS matrices. The tile LU decomposition with partial tile pivoting and row interchanges is used to factor A. The factored form of A is then used to solve the system of equations A * X = B.

Parameters:

[in]	N	The number of linear equations, i.e., the order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in,out]	A	On entry, the N-by-N coefficient matrix A. On exit, the tile L and U factors from the factorization (not equivalent to LAPACK).
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[out]	L	On exit, auxiliary factorization data, related to the tile L factor, necessary to solve the system of equations.
[out]	IPIV	On exit, the pivot indices that define the permutations (not equivalent to LAPACK).
[in,out]	B	On entry, the N-by-NRHS matrix of right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, so the solution could not be computed.

See also:: PLASMA_zgesv_incpiv_Tile; PLASMA_zgesv_incpiv_Tile_Async; PLASMA_cgesv_incpiv; PLASMA_dgesv_incpiv; PLASMA_sgesv_incpiv

Definition at line 73 of file zgesv_incpiv.c.

References L, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), PLASMA_FUNC_ZGESV, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zgesv_incpiv_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaComplexDouble, and plasma_sequence_t::status.

{
    int NB, IB, IBNB, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB, descL;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_error("PLASMA_zgesv_incpiv", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (N < 0) {
        plasma_error("PLASMA_zgesv_incpiv", "illegal value of N");
        return -1;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_zgesv_incpiv", "illegal value of NRHS");
        return -2;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_zgesv_incpiv", "illegal value of LDA");
        return -4;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_zgesv_incpiv", "illegal value of LDB");
        return -8;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZGESV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zgesv_incpiv", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB    = PLASMA_NB;
    IB    = PLASMA_IB;
    IBNB  = IB*NB;
    NT    = (N%NB==0) ? (N/NB) : (N/NB+1);
    plasma_sequence_create(plasma, &sequence);
    descL = plasma_desc_init(
        PlasmaComplexDouble,
        IB, NB, IBNB,
        NT*IB, NT*NB, 0, 0, NT*IB, NT*NB);
    descL.mat = L;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
    }
    /* Call the tile interface */
    PLASMA_zgesv_incpiv_Tile_Async(&descA, &descL, IPIV, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_zooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgesvd	(	PLASMA_enum	jobu,
		PLASMA_enum	jobvt,
		int	M,
		int	N,
		PLASMA_Complex64_t *	A,
		int	LDA,
		double *	S,
		PLASMA_Complex64_t *	U,
		int	LDU,
		PLASMA_Complex64_t *	VT,
		int	LDVT,
		PLASMA_desc *	descT
	)

PLASMA_zgesvd - computes the singular value decomposition (SVD) of a complex M-by-N matrix A, optionally computing the left and/or right singular vectors. The SVD is written

 A = U * SIGMA * transpose(V)

where SIGMA is an M-by-N matrix which is zero except for its min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA are the singular values of A; they are real and non-negative, and are returned in descending order. The first min(m,n) columns of U and V are the left and right singular vectors of A.

Note that the routine returns V**T, not V. Not LAPACK Compliant for now! Note: Only PlasmaNoVec supported!

Parameters:

[in]	jobu	Specifies options for computing all or part of the matrix U. Intended usage: = PlasmaVec: all M columns of U are returned in array U; = PlasmaNoVec: no columns of U (no left singular vectors) are computed. Note: Only PlasmaNoVec supported!
[in]	jobvt	Specifies options for computing all or part of the matrix V**H. Intended usage: = PlasmaVec: all M columns of U are returned in array U; = PlasmaNoVec: no columns of U (no left singular vectors) are computed. Note: Only PlasmaNoVec supported!
[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in,out]	A	On entry, the M-by-N matrix A. On exit, if JOBU = 'O', A is overwritten with the first min(m,n) columns of U (the left singular vectors, stored columnwise); if JOBVT = 'O', A is overwritten with the first min(m,n) rows of V**H (the right singular vectors, stored rowwise); if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A are destroyed.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[out]	S	The double precision singular values of A, sorted so that S(i) >= S(i+1).
[out]	U	(LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. If JOBU = 'A', U contains the M-by-M unitary matrix U; if JOBU = 'S', U contains the first min(m,n) columns of U (the left singular vectors, stored columnwise); if JOBU = 'N' or 'O', U is not referenced.
[in]	LDU	The leading dimension of the array U. LDU >= 1; if JOBU = 'S' or 'A', LDU >= M.
[out]	VT	If JOBVT = 'A', VT contains the N-by-N unitary matrix VH; if JOBVT = 'S', VT contains the first min(m,n) rows of VH (the right singular vectors, stored rowwise); if JOBVT = 'N' or 'O', VT is not referenced.
[in]	LDVT	The leading dimension of the array VT. LDVT >= 1; if JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
[in,out]	descT	On entry, descriptor as return by PLASMA_Alloc_Workspace_zgesvd On exit, contains auxiliary factorization data.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_zgesvd_Tile; PLASMA_zgesvd_Tile_Async; PLASMA_cgesvd; PLASMA_dgesvd; PLASMA_sgesvd

Definition at line 123 of file zgesvd.c.

References plasma_desc_t::m, max, min, plasma_desc_t::n, plasma_context_self(), plasma_desc_check(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGESVD, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zgesvd_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaNoVec, PlasmaVec, and plasma_sequence_t::status.

{
    int NB, IB, MT, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descU, descVT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zgesvd", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    
    /* Tune NB & IB depending on M & N; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZGESVD, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zgesvd", "plasma_tune() failed");
        return status;
    }
    /* Set MT, NT */
    NB = PLASMA_NB;
    IB = PLASMA_IB;
    MT = (M%NB==0) ? (M/NB) : (M/NB+1);
    NT = (N%NB==0) ? (N/NB) : (N/NB+1);
    /* Check input arguments */
    if (jobu != PlasmaNoVec  && jobu !=PlasmaVec) {
        plasma_error("PLASMA_zgesvd", "illegal value of jobu");
        return -1;
    }
    if (jobvt != PlasmaNoVec && jobvt != PlasmaVec) {
        plasma_error("PLASMA_zgesvd", "illegal value of jobvt");
        return -2;
    }
    if (M < 0) {
        plasma_error("PLASMA_zgesvd", "illegal value of M");
        return -3;
    }
    if (N < 0) {
        plasma_error("PLASMA_zgesvd", "illegal value of N");
        return -4;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_zgesvd", "illegal value of LDA");
        return -6;
    }
    if (LDU < 1) {
        plasma_error("PLASMA_zgesvd", "illegal value of LDU");
        return -9;
    }
    if (LDVT < 1) {
        plasma_error("PLASMA_zgesvd", "illegal value of LDVT");
        return -11;
    }
    if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) || 
         ( descT->m != MT*IB ) || (descT->n != NT*NB) ) {
        plasma_error("PLASMA_zgesvd", "invalid T descriptor");
        return -12;
    }
    /* Quick return */
    if (min(M, N) == 0) {
        return PLASMA_SUCCESS;
    }
    if (jobu == PlasmaVec) {
        plasma_error("PLASMA_zgesvd", "computing the singular vectors is not supported in this version");
        return -1;
    }
    if (jobvt == PlasmaVec) {
        plasma_error("PLASMA_zgesvd", "computing the singular vectors is not supported in this version");
        return -2;
    }
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA,   A, NB, NB,  LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
        if (jobu == PlasmaVec){
            plasma_zooplap2tile( descU,   U, NB, NB,  LDU, M, 0, 0, M, M, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descU)));
        }
        if (jobvt == PlasmaVec){
            plasma_zooplap2tile( descVT, VT, NB, NB, LDVT, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descU)); plasma_desc_mat_free(&(descVT)));
        }
    } else {
        plasma_ziplap2tile( descA,   A, NB, NB,  LDA, N, 0, 0, M, N);
        if (jobu == PlasmaVec){
            plasma_ziplap2tile( descU,   U, NB, NB,  LDU, M, 0, 0, M, M);
        }
        if (jobvt == PlasmaVec){
            plasma_ziplap2tile( descVT, VT, NB, NB, LDVT, N, 0, 0, N, N);
        }
    }
    /* Call the tile interface */
    PLASMA_zgesvd_Tile_Async(jobu, jobvt, &descA, S, &descU, &descVT, descT, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descA,   A, NB, NB,  LDA, N );
        if (jobu == PlasmaVec){
            plasma_zooptile2lap( descU,   U, NB, NB,  LDU, M );
        }
        if (jobvt == PlasmaVec){
            plasma_zooptile2lap( descVT, VT, NB, NB, LDVT, N );
        }
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        if (jobu == PlasmaVec){
            plasma_desc_mat_free(&descU);
        }
        if (jobvt == PlasmaVec){
            plasma_desc_mat_free(&descVT);
        }
    } else {
        plasma_ziptile2lap( descA,   A, NB, NB,  LDA, N );
        if (jobu == PlasmaVec){
            plasma_ziptile2lap( descU,   U, NB, NB,  LDU, M );
        }
        if (jobvt == PlasmaVec){
            plasma_ziptile2lap( descVT, VT, NB, NB, LDVT, N );
        }
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgetrf	(	int	M,
		int	N,
		PLASMA_Complex64_t *	A,
		int	LDA,
		int *	IPIV
	)

PLASMA_zgetrf - Computes an LU factorization of a general M-by-N matrix A using the tile LU algorithm with partial tile pivoting with row interchanges.

Parameters:

[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in,out]	A	On entry, the M-by-N matrix to be factored. On exit, the tile factors L and U from the factorization.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[out]	IPIV	The pivot indices that define the permutations.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.

See also:: PLASMA_zgetrf_Tile; PLASMA_zgetrf_Tile_Async; PLASMA_cgetrf; PLASMA_dgetrf; PLASMA_sgetrf

Definition at line 62 of file zgetrf.c.

References A, plasma_desc_t::mat, max, plasma_desc_t::mb, min, plasma_desc_t::mt, plasma_context_self(), plasma_desc_init(), plasma_dynamic_call_4, plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGESV, PLASMA_NB, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, plasma_tune(), PlasmaComplexDouble, and plasma_sequence_t::status.

{
    int NB, NBNB, minMN;
    int status;
    PLASMA_desc descA ;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zgetrf", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (M < 0) {
        plasma_error("PLASMA_zgetrf", "illegal value of M");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_zgetrf", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_zgetrf", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (min(M, N) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_ZGESV, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zgetrf", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB   = PLASMA_NB;
    NBNB = NB*NB;
    plasma_sequence_create(plasma, &sequence);
    descA = plasma_desc_init(
        PlasmaComplexDouble,
        NB, NB, NBNB,
        LDA, N, 0, 0, M, N);
    descA.mat = A;
    minMN = min(M, N);
    memset(IPIV, 0, minMN*sizeof(int));
    /* Call the tile interface */
    plasma_dynamic_call_4(plasma_pzgetrf_reclap,
        PLASMA_desc, descA,
        int*, IPIV,
        PLASMA_sequence*, sequence,
        PLASMA_request*, &request);
    plasma_dynamic_sync();
    /*
     * Generate the correct IPIV (Has to be moved in a task)
     */
    { 
        int i, inc, tmp, j;
        for(i=1; i<descA.mt; i++) {
            inc = i*descA.mb;
            tmp = min( minMN - inc, descA.mb);
            if ( tmp < 1 )
              break;
            
            for (j=0; j<tmp; j++)
                IPIV[inc+j] = IPIV[inc+j] + inc;
        }
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgetrf_incpiv	(	int	M,
		int	N,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t *	L,
		int *	IPIV
	)

PLASMA_zgetrf_incpiv - Computes an LU factorization of a general M-by-N matrix A using the tile LU algorithm with partial tile pivoting with row interchanges.

Parameters:

[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in,out]	A	On entry, the M-by-N matrix to be factored. On exit, the tile factors L and U from the factorization.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[out]	L	On exit, auxiliary factorization data, related to the tile L factor, required by PLASMA_zgetrs_incpiv to solve the system of equations.
[out]	IPIV	The pivot indices that define the permutations (not equivalent to LAPACK).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.

See also:: PLASMA_zgetrf_incpiv_Tile; PLASMA_zgetrf_incpiv_Tile_Async; PLASMA_cgetrf_incpiv; PLASMA_dgetrf_incpiv; PLASMA_sgetrf_incpiv; PLASMA_zgetrs_incpiv

Definition at line 65 of file zgetrf_incpiv.c.

References L, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGESV, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zgetrf_incpiv_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaComplexDouble, and plasma_sequence_t::status.

{
    int NB, IB, IBNB, MT, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descL;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zgetrf_incpiv", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (M < 0) {
        plasma_error("PLASMA_zgetrf_incpiv", "illegal value of M");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_zgetrf_incpiv", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_zgetrf_incpiv", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (min(M, N) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_ZGESV, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zgetrf_incpiv", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB   = PLASMA_NB;
    IB   = PLASMA_IB;
    IBNB = IB*NB;
    MT   = (M%NB==0) ? (M/NB) : (M/NB+1);
    NT   = (N%NB==0) ? (N/NB) : (N/NB+1);
    plasma_sequence_create(plasma, &sequence);
    descL = plasma_desc_init(
        PlasmaComplexDouble,
        IB, NB, IBNB,
        MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
    descL.mat = L;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N);
    }
    /* Call the tile interface */
    PLASMA_zgetrf_incpiv_Tile_Async(&descA, &descL, IPIV, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgetri	(	int	N,
		PLASMA_Complex64_t *	A,
		int	LDA,
		int *	IPIV
	)

PLASMA_zgetri - Computes the inverse of a matrix using the LU factorization computed by PLASMA_zgetrf. This method inverts U and then computes inv(A) by solving the system inv(A)*L = inv(U) for inv(A).

Parameters:

[in]	N	The order of the matrix A. N >= 0.
[in,out]	A	On entry, the triangular factor L or U from the factorization A = PLU as computed by PLASMA_zgetrf. On exit, if return value = 0, the inverse of the original matrix A.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in]	IPIV	The pivot indices that define the permutations as returned by PLASMA_zgetrf.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, the (i,i) element of the factor U is exactly zero; The matrix is singular and its inverse could not be computed.

See also:: PLASMA_zgetri_Tile; PLASMA_zgetri_Tile_Async; PLASMA_cgetri; PLASMA_dgetri; PLASMA_sgetri; PLASMA_zgetrf

Definition at line 63 of file zgetri.c.

References max, PLASMA_Alloc_Workspace_zgetri_Tile_Async(), plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGESV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zgetri_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    PLASMA_desc descW;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zgetri", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (N < 0) {
        plasma_error("PLASMA_zgetri", "illegal value of N");
        return -1;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_zgetri", "illegal value of LDA");
        return -3;
    }
    /* Quick return */
    if (max(N, 0) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZGESV, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zgetri", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB   = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_ziplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, N, N);
    }
    /* Allocate workspace */
    PLASMA_Alloc_Workspace_zgetri_Tile_Async(&descA, &descW);
    /* Call the tile interface */
    PLASMA_zgetri_Tile_Async(&descA, IPIV, &descW, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    plasma_desc_mat_free(&(descW));
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zgetrs	(	PLASMA_enum	trans,
		int	N,
		int	NRHS,
		PLASMA_Complex64_t *	A,
		int	LDA,
		int *	IPIV,
		PLASMA_Complex64_t *	B,
		int	LDB
	)

PLASMA_zgetrs - Solves a system of linear equations A * X = B, with a general N-by-N matrix A using the tile LU factorization computed by PLASMA_zgetrf.

Parameters:

[in]	trans	Intended to specify the the form of the system of equations: = PlasmaNoTrans: A * X = B (No transpose) = PlasmaTrans: A*T X = B (Transpose) = PlasmaConjTrans: A*H X = B (Conjugate transpose)
[in]	N	The order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]	A	The tile factors L and U from the factorization, computed by PLASMA_zgetrf.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in]	IPIV	The pivot indices from PLASMA_zgetrf.
[in,out]	B	On entry, the N-by-NRHS matrix of right hand side matrix B. On exit, the solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS successful exit

Returns:: <0 if -i, the i-th argument had an illegal value

See also:: PLASMA_zgetrs_Tile; PLASMA_zgetrs_Tile_Async; PLASMA_cgetrs; PLASMA_dgetrs; PLASMA_sgetrs; PLASMA_zgetrf

Definition at line 72 of file zgetrs.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGESV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zgetrs_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaConjTrans, PlasmaNoTrans, PlasmaTrans, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zgetrs", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ( (trans != PlasmaNoTrans) && 
         (trans != PlasmaTrans)   &&
         (trans != PlasmaConjTrans)) {
        plasma_error("PLASMA_zgetrs", "illegal value of trans");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_zgetrs", "illegal value of N");
        return -2;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_zgetrs", "illegal value of NRHS");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_zgetrs", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_zgetrs", "illegal value of LDB");
        return -8;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_ZGESV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zgetrs", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
    }
    /* Call the tile interface */
    PLASMA_zgetrs_Tile_Async(trans, &descA, IPIV, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

int PLASMA_zgetrs_incpiv	(	PLASMA_enum	trans,
		int	N,
		int	NRHS,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t *	L,
		int *	IPIV,
		PLASMA_Complex64_t *	B,
		int	LDB
	)

PLASMA_zgetrs_incpiv - Solves a system of linear equations A * X = B, with a general N-by-N matrix A using the tile LU factorization computed by PLASMA_zgetrf_incpiv.

Parameters:

[in]	trans	Intended to specify the the form of the system of equations: = PlasmaNoTrans: A * X = B (No transpose) = PlasmaTrans: A*T X = B (Transpose) = PlasmaConjTrans: A*H X = B (Conjugate transpose) Currently only PlasmaNoTrans is supported.
[in]	N	The order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]	A	The tile factors L and U from the factorization, computed by PLASMA_zgetrf_incpiv.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in]	L	Auxiliary factorization data, related to the tile L factor, computed by PLASMA_zgetrf_incpiv.
[in]	IPIV	The pivot indices from PLASMA_zgetrf_incpiv (not equivalent to LAPACK).
[in,out]	B	On entry, the N-by-NRHS matrix of right hand side matrix B. On exit, the solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS successful exit

Returns:: <0 if -i, the i-th argument had an illegal value

See also:: PLASMA_zgetrs_incpiv_Tile; PLASMA_zgetrs_incpiv_Tile_Async; PLASMA_cgetrs_incpiv; PLASMA_dgetrs_incpiv; PLASMA_sgetrs_incpiv; PLASMA_zgetrf_incpiv

Definition at line 75 of file zgetrs_incpiv.c.

References L, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_NOT_SUPPORTED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGESV, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zgetrs_incpiv_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaComplexDouble, PlasmaNoTrans, and plasma_sequence_t::status.

{
    int NB, IB, IBNB, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB, descL;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zgetrs_incpiv", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (trans != PlasmaNoTrans) {
        plasma_error("PLASMA_zgetrs_incpiv", "only PlasmaNoTrans supported");
        return PLASMA_ERR_NOT_SUPPORTED;
    }
    if (N < 0) {
        plasma_error("PLASMA_zgetrs_incpiv", "illegal value of N");
        return -2;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_zgetrs_incpiv", "illegal value of NRHS");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_zgetrs_incpiv", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_zgetrs_incpiv", "illegal value of LDB");
        return -9;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_ZGESV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zgetrs_incpiv", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB    = PLASMA_NB;
    IB    = PLASMA_IB;
    IBNB  = IB*NB;
    NT    = (N%NB==0) ? (N/NB) : (N/NB+1);
    plasma_sequence_create(plasma, &sequence);
    descL = plasma_desc_init(
        PlasmaComplexDouble,
        IB, NB, IBNB,
        NT*IB, NT*NB, 0, 0, NT*IB, NT*NB);
    descL.mat = L;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
    }
    /* Call the tile interface */
    PLASMA_zgetrs_incpiv_Tile_Async(&descA, &descL, IPIV, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zheev	(	PLASMA_enum	jobz,
		PLASMA_enum	uplo,
		int	N,
		PLASMA_Complex64_t *	A,
		int	LDA,
		double *	W,
		PLASMA_desc *	descT,
		PLASMA_Complex64_t *	Q,
		int	LDQ
	)

PLASMA_zheev - Computes all eigenvalues and, optionally, eigenvectors of a complex Hermitian matrix A. The matrix A is preliminary reduced to tridiagonal form using a two-stage approach: First stage: reduction to band tridiagonal form; Second stage: reduction from band to tridiagonal form. Note: Only PlasmaNoVec supported!

Parameters:

[in]	jobz	Intended usage: = PlasmaNoVec: computes eigenvalues only; = PlasmaVec: computes eigenvalues and eigenvectors. Note: Only PlasmaNoVec supported!
[in]	uplo	Specifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The order of the matrix A. N >= 0.
[in,out]	A	On entry, the symmetric (or Hermitian) matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, the lower triangle (if uplo = PlasmaLower) or the upper triangle (if uplo = PlasmaUpper) of A, including the diagonal, is destroyed.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[out]	W	On exit, if info = 0, the eigenvalues.
[in,out]	descT	On entry, descriptor as return by PLASMA_Alloc_Workspace_zheev On exit, contains auxiliary factorization data.
[out]	Q	On exit, if jobz = PlasmaVec and info = 0, the eigenvectors.
[in]	LDQ	The leading dimension of the array Q. LDQ >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if INFO = i, the algorithm failed to converge; i off-diagonal elements of an intermediate tridiagonal form did not converge to zero.

See also:: PLASMA_zheev_Tile; PLASMA_zheev_Tile_Async; PLASMA_cheev; PLASMA_dsyev; PLASMA_ssyev

Definition at line 96 of file zheev.c.

References plasma_desc_t::m, plasma_desc_t::mat, max, plasma_desc_t::n, plasma_context_self(), plasma_desc_check(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), PLASMA_FUNC_ZHEEV, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_zdesc_alloc, PLASMA_zheev_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaComplexDouble, PlasmaLower, PlasmaNoVec, PlasmaUpper, PlasmaVec, Q, and plasma_sequence_t::status.

{
    int NB, IB, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descQ;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_error("PLASMA_zheev", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Tune NB & IB depending on N; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZHEEV, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zheev", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB = PLASMA_NB;
    IB = PLASMA_IB;
    NT = (N%NB==0) ? (N/NB) : (N/NB+1);
    /* Check input arguments */
    if (jobz != PlasmaNoVec && jobz != PlasmaVec) {
        plasma_error("PLASMA_zheev", "illegal value of jobz");
        return -1;
    }
    if (uplo != PlasmaLower && uplo != PlasmaUpper) {
        plasma_error("PLASMA_zheev", "illegal value of uplo");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_zheev", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_zheev", "illegal value of LDA");
        return -5;
    }
    if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) || 
         ( descT->m != NT*IB ) || (descT->n != NT*NB) ) {
        plasma_error("PLASMA_zheev", "invalid T descriptor");
        return -7;
    }
    if (LDQ < max(1, N)) {
        plasma_error("PLASMA_zheev", "illegal value of LDQ");
        return -9;
    }
    /* Quick return */
    if (N == 0)
        return PLASMA_SUCCESS;
    if (jobz == PlasmaVec) {
        plasma_error("PLASMA_zheev", "computing the eigenvectors is not supported in this version");
        return -1;
    }
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
        if (jobz == PlasmaVec) {
            /* No need for conversion, it's just output */
            plasma_zdesc_alloc( descQ, NB, NB, LDQ, N, 0, 0, N, N, 
                                plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descQ)) );
        }
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N );
        if (jobz == PlasmaVec) {
            /* No need for conversion, it's just output */
            descQ = plasma_desc_init(
                PlasmaComplexDouble, NB, NB, NB*NB,
                LDQ, N, 0, 0, N, N);
            descQ.mat = Q;
        }
    }
    /* Call the tile interface */
    PLASMA_zheev_Tile_Async(jobz, uplo, &descA, W, descT, &descQ, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
        if (jobz == PlasmaVec) {
           plasma_zooptile2lap( descQ, Q, NB, NB, LDQ, N );
        }
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        if (jobz == PlasmaVec)
           plasma_desc_mat_free(&descQ);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
        if (jobz == PlasmaVec)
           plasma_ziptile2lap( descQ, Q, NB, NB, LDQ, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zheevd	(	PLASMA_enum	jobz,
		PLASMA_enum	uplo,
		int	N,
		PLASMA_Complex64_t *	A,
		int	LDA,
		double *	W,
		PLASMA_desc *	T,
		PLASMA_Complex64_t *	Q,
		int	LDQ
	)

PLASMA_zheevd - Computes all eigenvalues and, optionally, eigenvectors of a complex Hermitian matrix A. The matrix A is preliminary reduced to tridiagonal form using a two-stage approach: First stage: reduction to band tridiagonal form; Second stage: reduction from band to tridiagonal form. Note: Only PlasmaNoVec supported!

Parameters:

[in]	jobz	Intended usage: = PlasmaNoVec: computes eigenvalues only; = PlasmaVec: computes eigenvalues and eigenvectors. Note: Only PlasmaNoVec supported!
[in]	uplo	Specifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The order of the matrix A. N >= 0.
[in,out]	A	On entry, the symmetric (or Hermitian) matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, the lower triangle (if uplo = PlasmaLower) or the upper triangle (if uplo = PlasmaUpper) of A, including the diagonal, is destroyed.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[out]	W	On exit, if info = 0, the eigenvalues.
[in,out]	T	On entry, descriptor as return by PLASMA_Alloc_Workspace_zheev On exit, contains auxiliary factorization data.
[out]	Q	On exit, if jobz = PlasmaVec and info = 0, the eigenvectors.
[in]	LDQ	The leading dimension of the array Q. LDQ >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if INFO = i, the algorithm failed to converge; i off-diagonal elements of an intermediate tridiagonal form did not converge to zero.

See also:: PLASMA_zheevd_Tile; PLASMA_zheevd_Tile_Async; PLASMA_cheevd; PLASMA_dsyevd; PLASMA_ssyevd

Definition at line 97 of file zheevd.c.

References plasma_desc_t::mat, max, plasma_context_self(), plasma_desc_check(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), PLASMA_FUNC_ZHEEV, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_zdesc_alloc, PLASMA_zheevd_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaComplexDouble, PlasmaLower, PlasmaNoVec, PlasmaUpper, PlasmaVec, Q, and plasma_sequence_t::status.

{
    int NB, IB, IBNB, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descQ;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_error("PLASMA_zheevd", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    
    /* Set NT */
    NB    = PLASMA_NB;
    IB    = PLASMA_IB;
    IBNB  = IB*NB;
    NT    = (N%NB==0) ? (N/NB) : (N/NB+1);
    /* Check input arguments */
    if (jobz != PlasmaNoVec && jobz != PlasmaVec) {
        plasma_error("PLASMA_zheevd", "illegal value of jobz");
        return -1;
    }
    if (uplo != PlasmaLower && uplo != PlasmaUpper) {
        plasma_error("PLASMA_zheevd", "illegal value of uplo");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_zheevd", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_zheevd", "illegal value of LDA");
        return -5;
    }
    if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) || 
         ( descT->m != NT*IB ) || (descT->n != NT*NB) ) {
        plasma_error("PLASMA_zhegv", "invalid T descriptor");
        return -7;
    }
    if (LDQ < max(1, N)) {
        plasma_error("PLASMA_zheevd", "illegal value of LDQ");
        return -9;
    }
    /* Quick return */
    if (N == 0)
        return PLASMA_SUCCESS;
    if (jobz == PlasmaVec) {
        plasma_error("PLASMA_zheevd", "computing the eigenvectors is not supported in this version");
        return -1;
    }
    /* Tune NB & IB depending on N; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZHEEV, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zheevd", "plasma_tune() failed");
        return status;
    }
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
        if (jobz == PlasmaVec) {
            /* No need for conversion, it's just output */
            plasma_zdesc_alloc( descQ, NB, NB, LDQ, N, 0, 0, N, N, 
                                plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descQ)) );
        }
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N );
        if (jobz == PlasmaVec) {
            /* No need for conversion, it's just output */
            descQ = plasma_desc_init(
                PlasmaComplexDouble, NB, NB, NB*NB,
                LDQ, N, 0, 0, N, N);
            descQ.mat = Q;
        }
    }
    /* Call the tile interface */
    PLASMA_zheevd_Tile_Async(jobz, uplo, &descA, &descT, W, &descQ, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descA, A, NB, NB, LDA, N    );
        if (jobz == PlasmaVec) {
           plasma_zooptile2lap( descQ, Q, NB, NB, LDQ, N    );
        }
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        if (jobz == PlasmaVec)
           plasma_desc_mat_free(&descQ);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N    );
        if (jobz == PlasmaVec)
           plasma_ziptile2lap( descQ, Q, NB, NB, LDQ, N    );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zhegst	(	PLASMA_enum	itype,
		PLASMA_enum	uplo,
		int	N,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t *	B,
		int	LDB
	)

PLASMA_zhegst - reduces a complex Hermitian-definite generalized eigenproblem to standard form. If PlasmaItype == 1, the problem is A*x = lambda*B*x, and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) If PlasmaItype == 2 or 3, the problem is A*B*x = lambda*x or B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L. B must have been previously factorized as U**H*U or L*L**H by PLASMA_ZPOTRF.

Parameters:

[in]	PlasmaItype	Intended usage: = 1: Ax=(lambda)Bx = 2: ABx=(lambda)x = 3: BAx=(lambda)x
[in]	uplo	Specifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The order of the matrices A and B. N >= 0.
[in,out]	A	On entry, the symmetric (or Hermitian) matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if return value == 0, the transformed matrix, stored in the same format as A.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in,out]	B	On entry, the triangular factor from the Cholesky factorization of B, as returned by PLASMA_ZPOTRF.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_zhegst_Tile; PLASMA_zhegst_Tile_Async; PLASMA_chegst; PLASMA_dsygst; PLASMA_ssygst

Definition at line 85 of file zhegst.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZHEGST, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zhegst_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaLower, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zhegst", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (itype != 1 && itype != 2 && itype != 3) {
        plasma_error("PLASMA_zhegst", "Illegal value of itype");
        return -1;
    }
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_zhegst", "Illegal value of uplo");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_zhegst", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_zhegst", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_zhegst", "illegal value of LDB");
        return -7;
    }
    /* Quick return */
    if (N == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_ZHEGST, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zhegst", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, N, N, plasma_desc_mat_free(&(descB)) );
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N);
        plasma_ziplap2tile( descB, B, NB, NB, LDB, N, 0, 0, N, N);
    }
    /* Call the tile interface */
    PLASMA_zhegst_Tile_Async(itype, uplo, &descA, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_zooptile2lap( descB, B, NB, NB, LDB, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
        plasma_ziptile2lap( descB, B, NB, NB, LDB, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zhegv	(	PLASMA_enum	itype,
		PLASMA_enum	jobz,
		PLASMA_enum	uplo,
		int	N,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t *	B,
		int	LDB,
		double *	W,
		PLASMA_desc *	descT,
		PLASMA_Complex64_t *	Q,
		int	LDQ
	)

PLASMA_zhegv - Computes all eigenvalues and, optionally, eigenvectors of a complex generalized Hermitian-definite eigenproblem of the form: A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and B are assumed to be Hermitian and B is also positive definite. Note: Only PlasmaNoVec supported!

Parameters:

[in]	PlasmaItype	Intended usage: = 1: Ax=(lambda)Bx = 2: ABx=(lambda)x = 3: BAx=(lambda)x
[in]	jobz	Intended usage: = PlasmaNoVec: computes eigenvalues only; = PlasmaVec: computes eigenvalues and eigenvectors. Note: Only PlasmaNoVec supported!
[in]	uplo	Specifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A and B are stored; = PlasmaLower: Lower triangle of A and B are stored.
[in]	N	The order of the matrix A. N >= 0.
[in,out]	A	On entry, the symmetric (or Hermitian) matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if jobz = PlasmaVec, then if return value = 0, A contains the matrix Z of eigenvectors. The eigenvectors are normalized as follows: if ITYPE = 1 or 2, Z*HBZ = I; if ITYPE = 3, ZHinv(B)*Z = I. If jobz = PlasmaNoVec, then on exit the lower triangle (if uplo = PlasmaLower) or the upper triangle (if uplo = PlasmaUpper) of A, including the diagonal, is destroyed.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in,out]	B	On entry, the symmetric (or Hermitian) positive definite matrix B. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of B contains the upper triangular part of the matrix B, and the strictly lower triangular part of B is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of B contains the lower triangular part of the matrix B, and the strictly upper triangular part of B is not referenced. On exit, if return value <= N, the part of B containing the matrix is overwritten by the triangular factor U or L from the Cholesky factorization B = U*HU or B = LL*H.
[in]	LDB	The leading dimension of the array B. LDA >= max(1,N).
[out]	W	On exit, if info = 0, the eigenvalues.
[in,out]	descT	On entry, descriptor as return by PLASMA_Alloc_Workspace_zhegv On exit, contains auxiliary factorization data.
[out]	Q	On exit, if jobz = PlasmaVec and info = 0, the eigenvectors.
[in]	LDQ	The leading dimension of Q.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
<=N	if INFO = i, plasma_zhegv failed to converge; i off-diagonal elements of an intermediate tridiagonal form did not converge to zero.
>N	if INFO = N + i, for 1 <= i <= N, then the leading minor of order i of B is not positive definite. The factorization of B could not be completed and no eigenvalues or eigenvectors were computed.

See also:: PLASMA_zhegv_Tile; PLASMA_zhegv_Tile_Async; PLASMA_chegv; PLASMA_dsygv; PLASMA_ssygv

Definition at line 128 of file zhegv.c.

References plasma_desc_t::m, plasma_desc_t::mat, max, plasma_desc_t::n, plasma_context_self(), plasma_desc_check(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), PLASMA_FUNC_ZHEGV, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_zdesc_alloc, PLASMA_zhegv_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaComplexDouble, PlasmaLower, PlasmaNoVec, PlasmaUpper, PlasmaVec, Q, and plasma_sequence_t::status.

{
    int NB, IB, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB, descQ;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_error("PLASMA_zhegv", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Tune NB & IB depending on N; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZHEGV, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zhegv", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB = PLASMA_NB;
    IB = PLASMA_IB;
    NT = (N%NB==0) ? (N/NB) : (N/NB+1);
    /* Check input arguments */
    if (itype != 1 && itype != 2 && itype != 3) {
        plasma_error("PLASMA_zhegv", "Illegal value of itype");
        return -1;
    }
    if (jobz != PlasmaNoVec && jobz != PlasmaVec) {
        plasma_error("PLASMA_zhegv", "illegal value of jobz");
        return -2;
    }
    if (uplo != PlasmaLower && uplo!= PlasmaUpper) {
        plasma_error("PLASMA_zhegv", "only PlasmaLower supported");
        return -3;
    }
    if (N < 0) {
        plasma_error("PLASMA_zhegv", "illegal value of N");
        return -4;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_zhegv", "illegal value of LDA");
        return -6;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_zhegv", "illegal value of LDB");
        return -8;
    }
    if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) || 
         ( descT->m != NT*IB ) || (descT->n != NT*NB) ) {
        plasma_error("PLASMA_zhegv", "invalid T descriptor");
        return -10;
    }
    if (LDQ < max(1, N)) {
        plasma_error("PLASMA_zhegv", "illegal value of LDQ");
        return -12;
    }
    
    /* Quick return */
    if (N == 0)
        return PLASMA_SUCCESS;
    if (jobz == PlasmaVec) {
        plasma_error("PLASMA_zhegv", "computing the eigenvectors is not supported in this version");
        return -1;
    }
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, 
                             plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, N, N, 
                             plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)) );
        if (jobz == PlasmaVec) {
            /* No need for conversion, it's just output */
            plasma_zdesc_alloc( descQ, NB, NB, LDQ, N, 0, 0, N, N, 
                                plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descQ)) );
        }
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N );
        plasma_ziplap2tile( descB, B, NB, NB, LDB, N, 0, 0, N, N );
        if (jobz == PlasmaVec) {
            /* No need for conversion, it's just output */
            descQ = plasma_desc_init(
                PlasmaComplexDouble, NB, NB, NB*NB,
                LDQ, N, 0, 0, N, N);
            descQ.mat = Q;
        }
    }
    /* Call the tile interface */
    PLASMA_zhegv_Tile_Async(itype, PlasmaNoVec, uplo, 
                            &descA, &descB, W, 
                            descT, &descQ, 
                            sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_zooptile2lap( descB, B, NB, NB, LDB, N );
        if (jobz == PlasmaVec) {
           plasma_zooptile2lap( descQ, Q, NB, NB, LDQ, N );
        }
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
        if (jobz == PlasmaVec)
           plasma_desc_mat_free(&descQ);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
        plasma_ziptile2lap( descB, B, NB, NB, LDB, N );
        if (jobz == PlasmaVec)
           plasma_ziptile2lap( descQ, Q, NB, NB, LDQ, N );
        plasma_dynamic_sync();
    }
    
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zhemm	(	PLASMA_enum	side,
		PLASMA_enum	uplo,
		int	M,
		int	N,
		PLASMA_Complex64_t	alpha,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t *	B,
		int	LDB,
		PLASMA_Complex64_t	beta,
		PLASMA_Complex64_t *	C,
		int	LDC
	)

PLASMA_zhemm - Performs one of the matrix-matrix operations

$C = \alpha \times A \times B + \beta \times C$

or

$C = \alpha \times B \times A + \beta \times C$

where alpha and beta are scalars, A is an hermitian matrix and B and C are m by n matrices.

Parameters:

[in]	side	Specifies whether the hermitian matrix A appears on the left or right in the operation as follows: = PlasmaLeft: $C = \alpha \times A \times B + \beta \times C$ = PlasmaRight: $C = \alpha \times B \times A + \beta \times C$
[in]	uplo	Specifies whether the upper or lower triangular part of the hermitian matrix A is to be referenced as follows: = PlasmaLower: Only the lower triangular part of the hermitian matrix A is to be referenced. = PlasmaUpper: Only the upper triangular part of the hermitian matrix A is to be referenced.
[in]	M	Specifies the number of rows of the matrix C. M >= 0.
[in]	N	Specifies the number of columns of the matrix C. N >= 0.
[in]	alpha	Specifies the scalar alpha.
[in]	A	A is a LDA-by-ka matrix, where ka is M when side = PlasmaLeft, and is N otherwise. Only the uplo triangular part is referenced.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,ka).
[in]	B	B is a LDB-by-N matrix, where the leading M-by-N part of the array B must contain the matrix B.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,M).
[in]	beta	Specifies the scalar beta.
[in,out]	C	C is a LDC-by-N matrix. On exit, the array is overwritten by the M by N updated matrix.
[in]	LDC	The leading dimension of the array C. LDC >= max(1,M).

Returns:

Return values:

PLASMA_SUCCESS successful exit

See also:: PLASMA_zhemm_Tile; PLASMA_chemm; PLASMA_dhemm; PLASMA_shemm

Definition at line 94 of file zhemm.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZHEMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zhemm_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaLeft, PlasmaLower, PlasmaRight, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int Am;
    int status;
    PLASMA_desc descA, descB, descC;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zhemm", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ( (side != PlasmaLeft) && (side != PlasmaRight) ){
        plasma_error("PLASMA_zhemm", "illegal value of side");
        return -1;
    }
    if ((uplo != PlasmaLower) && (uplo != PlasmaUpper)) {
        plasma_error("PLASMA_zhemm", "illegal value of uplo");
        return -2;
    }
    Am = ( side == PlasmaLeft ) ? M : N;
    if (M < 0) {
        plasma_error("PLASMA_zhemm", "illegal value of M");
        return -3;
    }
    if (N < 0) {
        plasma_error("PLASMA_zhemm", "illegal value of N");
        return -4;
    }
    if (LDA < max(1, Am)) {
        plasma_error("PLASMA_zhemm", "illegal value of LDA");
        return -7;
    }
    if (LDB < max(1, M)) {
        plasma_error("PLASMA_zhemm", "illegal value of LDB");
        return -9;
    }
    if (LDC < max(1, M)) {
        plasma_error("PLASMA_zhemm", "illegal value of LDC");
        return -12;
    }
    /* Quick return */
    if (M == 0 || N == 0 ||
        ((alpha == (PLASMA_Complex64_t)0.0) && beta == (PLASMA_Complex64_t)1.0))
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_ZHEMM, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zhemm", "plasma_tune() failed");
        return status;
    }
    /* Set MT & NT & KT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, Am, 0, 0, Am, Am, 
                             plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descB, B, NB, NB, LDB, N,  0, 0, M,  N,
                             plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
        plasma_zooplap2tile( descC, C, NB, NB, LDC, N,  0, 0, M,  N,
                             plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descC)));
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, Am, 0, 0, Am, Am );
        plasma_ziplap2tile( descB, B, NB, NB, LDB, N,  0, 0, M,  N  );
        plasma_ziplap2tile( descC, C, NB, NB, LDC, N,  0, 0, M,  N  );
    }
    /* Call the tile interface */
    PLASMA_zhemm_Tile_Async(
        side, uplo, alpha, &descA, &descB, beta, &descC, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
        plasma_desc_mat_free(&descC);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, Am );
        plasma_ziptile2lap( descB, B, NB, NB, LDB, N );
        plasma_ziptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zher2k	(	PLASMA_enum	uplo,
		PLASMA_enum	trans,
		int	N,
		int	K,
		PLASMA_Complex64_t	alpha,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t *	B,
		int	LDB,
		double	beta,
		PLASMA_Complex64_t *	C,
		int	LDC
	)

PLASMA_zher2k - Performs one of the hermitian rank 2k operations

$C = \alpha [ op( A ) \times conjg( op( B )' )] + conjg( \alpha ) [ op( B ) \times conjg( op( A )' )] + \beta C$

, or

$C = \alpha [ conjg( op( A )' ) \times op( B ) ] + conjg( \alpha ) [ conjg( op( B )' ) \times op( A ) ] + \beta C$

,

where op( X ) is one of

op( X ) = X or op( X ) = conjg( X' )

where alpha and beta are real scalars, C is an n-by-n symmetric matrix and A and B are an n-by-k matrices the first case and k-by-n matrices in the second case.

Parameters:

[in]	uplo	= PlasmaUpper: Upper triangle of C is stored; = PlasmaLower: Lower triangle of C is stored.
[in]	trans	Specifies whether the matrix A is transposed or conjugate transposed: = PlasmaNoTrans: $C = \alpha [ op( A ) \times conjg( op( B )' )] + conjg( \alpha ) [ op( B ) \times conjg( op( A )' )] + \beta C$ = PlasmaConjTrans: $C = \alpha [ conjg( op( A )' ) \times op( B ) ] + conjg( \alpha ) [ conjg( op( B )' ) \times op( A ) ] + \beta C$
[in]	N	N specifies the order of the matrix C. N must be at least zero.
[in]	K	K specifies the number of columns of the A and B matrices with trans = PlasmaNoTrans. K specifies the number of rows of the A and B matrices with trans = PlasmaTrans.
[in]	alpha	alpha specifies the scalar alpha.
[in]	A	A is a LDA-by-ka matrix, where ka is K when trans = PlasmaNoTrans, and is N otherwise.
[in]	LDA	The leading dimension of the array A. LDA must be at least max( 1, N ), otherwise LDA must be at least max( 1, K ).
[in]	B	B is a LDB-by-kb matrix, where kb is K when trans = PlasmaNoTrans, and is N otherwise.
[in]	LDB	The leading dimension of the array B. LDB must be at least max( 1, N ), otherwise LDB must be at least max( 1, K ).
[in]	beta	beta specifies the scalar beta.
[in,out]	C	C is a LDC-by-N matrix. On exit, the array uplo part of the matrix is overwritten by the uplo part of the updated matrix.
[in]	LDC	The leading dimension of the array C. LDC >= max( 1, N ).

Returns:

Return values:

PLASMA_SUCCESS successful exit

See also:: PLASMA_zher2k_Tile; PLASMA_cher2k; PLASMA_dher2k; PLASMA_sher2k

Definition at line 96 of file zher2k.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZHERK, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zher2k_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaConjTrans, PlasmaLower, PlasmaNoTrans, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int Am, An;
    int status;
    PLASMA_desc descA, descB, descC;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zher2k", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ((uplo != PlasmaUpper) && (uplo != PlasmaLower)) {
        plasma_error("PLASMA_zher2k", "illegal value of uplo");
        return -1;
    }
    if ((trans != PlasmaNoTrans) && (trans != PlasmaConjTrans)) {
        plasma_error("PLASMA_zher2k", "illegal value of trans");
        return -2;
    }
    if ( trans == PlasmaNoTrans ) { 
        Am = N; An = K;
    } else {
        Am = K; An = N;
    }
    if (N < 0) {
        plasma_error("PLASMA_zher2k", "illegal value of N");
        return -3;
    }
    if (K < 0) {
        plasma_error("PLASMA_zher2k", "illegal value of K");
        return -4;
    }
    if (LDA < max(1, Am)) {
        plasma_error("PLASMA_zher2k", "illegal value of LDA");
        return -7;
    }
    if (LDB < max(1, Am)) {
        plasma_error("PLASMA_zher2k", "illegal value of LDB");
        return -9;
    }
    if (LDC < max(1, N)) {
        plasma_error("PLASMA_zher2k", "illegal value of LDC");
        return -12;
    }
    /* Quick return */
    if (N == 0 ||
        ((alpha == (PLASMA_Complex64_t)0.0 || K == 0.0) && beta == (double)1.0))
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZHERK, N, K, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zher2k", "plasma_tune() failed");
        return status;
    }
    /* Set MT & NT & KT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descB, B, NB, NB, LDB, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
        plasma_zooplap2tile( descC, C, NB, NB, LDC, N,  0, 0, N,  N,  plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descC)));
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An );
        plasma_ziplap2tile( descB, B, NB, NB, LDB, An, 0, 0, Am, An );
        plasma_ziplap2tile( descC, C, NB, NB, LDC, N,  0, 0, N,  N );
    }
    /* Call the tile interface */
    PLASMA_zher2k_Tile_Async(uplo, trans, alpha, &descA, &descB, beta, &descC, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
        plasma_desc_mat_free(&descC);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, An );
        plasma_ziptile2lap( descB, B, NB, NB, LDB, An );
        plasma_ziptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zherk	(	PLASMA_enum	uplo,
		PLASMA_enum	trans,
		int	N,
		int	K,
		double	alpha,
		PLASMA_Complex64_t *	A,
		int	LDA,
		double	beta,
		PLASMA_Complex64_t *	C,
		int	LDC
	)

PLASMA_zherk - Performs one of the hermitian rank k operations

$C = \alpha [ op( A ) \times conjg( op( A )' )] + \beta C$

,

where op( X ) is one of

op( X ) = X or op( X ) = conjg( X' )

where alpha and beta are real scalars, C is an n-by-n hermitian matrix and A is an n-by-k matrix in the first case and a k-by-n matrix in the second case.

Parameters:

[in]	uplo	= PlasmaUpper: Upper triangle of C is stored; = PlasmaLower: Lower triangle of C is stored.
[in]	trans	Specifies whether the matrix A is transposed or conjugate transposed: = PlasmaNoTrans: A is not transposed; = PlasmaConjTrans: A is conjugate transposed.
[in]	N	N specifies the order of the matrix C. N must be at least zero.
[in]	K	K specifies the number of columns of the matrix op( A ).
[in]	alpha	alpha specifies the scalar alpha.
[in]	A	A is a LDA-by-ka matrix, where ka is K when trans = PlasmaNoTrans, and is N otherwise.
[in]	LDA	The leading dimension of the array A. LDA must be at least max( 1, N ), otherwise LDA must be at least max( 1, K ).
[in]	beta	beta specifies the scalar beta
[in,out]	C	C is a LDC-by-N matrix. On exit, the array uplo part of the matrix is overwritten by the uplo part of the updated matrix.
[in]	LDC	The leading dimension of the array C. LDC >= max( 1, N ).

Returns:

Return values:

PLASMA_SUCCESS successful exit

See also:: PLASMA_zherk_Tile; PLASMA_cherk; PLASMA_dherk; PLASMA_sherk

Definition at line 85 of file zherk.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZHERK, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zherk_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaConjTrans, PlasmaLower, PlasmaNoTrans, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int Am, An;
    int status;
    PLASMA_desc descA, descC;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zherk", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ((uplo != PlasmaUpper) && (uplo != PlasmaLower)) {
        plasma_error("PLASMA_zherk", "illegal value of uplo");
        return -1;
    }
    if ((trans != PlasmaNoTrans) && (trans != PlasmaConjTrans)) {
        plasma_error("PLASMA_zherk", "illegal value of trans");
        return -2;
    }
    if ( trans == PlasmaNoTrans ) { 
        Am = N; An = K;
    } else {
        Am = K; An = N;
    }
    if (N < 0) {
        plasma_error("PLASMA_zherk", "illegal value of N");
        return -3;
    }
    if (K < 0) {
        plasma_error("PLASMA_zherk", "illegal value of K");
        return -4;
    }
    if (LDA < max(1, Am)) {
        plasma_error("PLASMA_zherk", "illegal value of LDA");
        return -7;
    }
    if (LDC < max(1, N)) {
        plasma_error("PLASMA_zherk", "illegal value of LDC");
        return -10;
    }
    /* Quick return */
    if (N == 0 ||
        ((alpha == (double)0.0 || K == 0.0) && beta == (double)1.0))
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZHERK, N, K, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zherk", "plasma_tune() failed");
        return status;
    }
    /* Set MT & NT & KT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descC, C, NB, NB, LDC, N,  0, 0, N,  N,  plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descC)));
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An );
        plasma_ziplap2tile( descC, C, NB, NB, LDC, N,  0, 0, N,  N );
    }
    /* Call the tile interface */
    PLASMA_zherk_Tile_Async(uplo, trans, alpha, &descA, beta, &descC, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descC);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, An );
        plasma_ziptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zhetrd	(	PLASMA_enum	jobz,
		PLASMA_enum	uplo,
		int	N,
		PLASMA_Complex64_t *	A,
		int	LDA,
		double *	D,
		double *	E,
		PLASMA_desc *	descT,
		PLASMA_Complex64_t *	Q,
		int	LDQ
	)

PLASMA_zhetrd - reduces a complex Hermitian matrix A to real symmetric tridiagonal form S using a two-stage approach First stage: reduction to band tridiagonal form (unitary Q1); Second stage: reduction from band to tridiagonal form (unitary Q2). Let Q = Q1 * Q2 be the global unitary transformation; Q**H * A * Q = S. Not LAPACK compliant as A does not contain the T elements Note: Only PlasmaNoVec supported!

Parameters:

[in]	jobz	Intended usage: = PlasmaNoVec: computes eigenvalues only; = PlasmaVec: computes eigenvalues and eigenvectors. Note: Only PlasmaNoVec supported!
[in]	uplo	Specifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The order of the matrix A. N >= 0.
[in,out]	A	On entry, the symmetric (or Hermitian) matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, the lower triangle (if uplo = PlasmaLower) or the upper triangle (if uplo = PlasmaUpper) of A, including the diagonal, is destroyed.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[out]	D	On exit, the diagonal elements of the tridiagonal matrix: D(i) = A(i,i).
[out]	E	On exit, he off-diagonal elements of the tridiagonal matrix: E(i) = A(i,i+1) if uplo = PlasmaUpper, E(i) = A(i+1,i) if uplo = PlasmaLower.
[in,out]	descT	On entry, descriptor as return by PLASMA_Alloc_Workspace_zheev On exit, contains auxiliary factorization data.
[out]	Q	On exit, if jobz = PlasmaVec and info = 0, the eigenvectors.
[in]	LDQ	The leading dimension of the array Q. LDQ >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if INFO = i, the algorithm failed to converge; i off-diagonal elements of an intermediate tridiagonal form did not converge to zero.

See also:: PLASMA_zhetrd_Tile; PLASMA_zhetrd_Tile_Async; PLASMA_chetrd; PLASMA_dsytrd; PLASMA_ssytrd

Definition at line 100 of file zhetrd.c.

References plasma_desc_t::m, max, plasma_desc_t::n, plasma_context_self(), plasma_desc_check(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), PLASMA_FUNC_ZHETRD, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PLASMA_zhetrd_Tile_Async(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PlasmaLower, PlasmaNoVec, PlasmaUpper, PlasmaVec, and plasma_sequence_t::status.

{
    int NB, IB, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descQ;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_error("PLASMA_zhetrd", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Tune NB & IB depending on N; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZHETRD, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zhetrd", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB = PLASMA_NB;
    IB = PLASMA_IB;
    NT = (N%NB==0) ? (N/NB) : (N/NB+1);
    /* Check input arguments */
    if (jobz != PlasmaNoVec && jobz != PlasmaVec) {
        plasma_error("PLASMA_zhetrd", "illegal value of jobz");
        return -1;
    }
    if (uplo != PlasmaLower && uplo != PlasmaUpper) {
        plasma_error("PLASMA_zhetrd", "illegal value of uplo");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_zhetrd", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_zhetrd", "illegal value of LDA");
        return -5;
    }
    if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) || 
         ( descT->m != NT*IB ) || (descT->n != NT*NB) ) {
        plasma_error("PLASMA_zhetrd", "invalid T descriptor");
        return -8;
    }
    if (LDQ < max(1, N)) {
        plasma_error("PLASMA_zhetrd", "illegal value of LDQ");
        return -10;
    }
    /* Quick return */
    if (N == 0)
        return PLASMA_SUCCESS;
    if (jobz == PlasmaVec) {
        plasma_error("PLASMA_zhetrd", "computing the eigenvectors is not supported in this version");
        return -1;
    }
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        if (jobz == PlasmaVec) {
           plasma_zooplap2tile( descQ, Q, NB, NB, LDQ, N,    0, 0, N, N   , plasma_desc_mat_free(&(descQ)) );
        }
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        if (jobz == PlasmaVec)
           plasma_ziplap2tile( descQ, Q, NB, NB, LDQ, N,    0, 0, N, N   );
    }
    /* Call the tile interface */
    PLASMA_zhetrd_Tile_Async(jobz, uplo, &descA, D, E, descT, &descQ, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descA, A, NB, NB, LDA, N    );
        if (jobz == PlasmaVec) {
           plasma_zooptile2lap( descQ, Q, NB, NB, LDQ, N    );
        }
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        if (jobz == PlasmaVec)
           plasma_desc_mat_free(&descQ);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N    );
        if (jobz == PlasmaVec)
           plasma_ziptile2lap( descQ, Q, NB, NB, LDQ, N    );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zlacpy	(	PLASMA_enum	uplo,
		int	M,
		int	N,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t *	B,
		int	LDB
	)

PLASMA_zlacpy copies all or part of a two-dimensional matrix A to another matrix B

Parameters:

[in]	uplo	Specifies the part of the matrix A to be copied to B. = PlasmaUpperLower: All the matrix A = PlasmaUpper: Upper triangular part = PlasmaLower: Lower triangular part
[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in]	A	The M-by-N matrix A. If uplo = PlasmaUpper, only the upper trapezium is accessed; if UPLO = PlasmaLower, only the lower trapezium is accessed.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[out]	B	The M-by-N matrix B. On exit, B = A in the locations specified by UPLO.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,M).

See also:: PLASMA_zlacpy_Tile; PLASMA_zlacpy_Tile_Async; PLASMA_clacpy; PLASMA_dlacpy; PLASMA_slacpy

Definition at line 62 of file zlacpy.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGEMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, PLASMA_zlacpy_Tile_Async(), plasma_zooplap2tile, plasma_zooptile2lap, PlasmaLower, PlasmaUpper, and PlasmaUpperLower.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zlacpy", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ( (uplo != PlasmaUpperLower) && 
         (uplo != PlasmaUpper) &&
         (uplo != PlasmaLower) ) {
        plasma_error("PLASMA_zlacpy", "illegal value of uplo");
        return -1;
    }
    if (M < 0) {
        plasma_error("PLASMA_zlacpy", "illegal value of M");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_zlacpy", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_zlacpy", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, M)) {
        plasma_error("PLASMA_zlacpy", "illegal value of LDB");
        return -7;
    }
    /* Quick return */
    if (min(N, M) == 0)
      return (double)0.0;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZGEMM, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zlacpy", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB   = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)) );
    } else {
        plasma_ziplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, M, N);
        plasma_ziplap2tile(  descB, B, NB, NB, LDA, N, 0, 0, M, N);
    }
    /* Call the tile interface */
    PLASMA_zlacpy_Tile_Async(uplo, &descA, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descB, B, NB, NB, LDB, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ziptile2lap( descB, B, NB, NB, LDB, N );
        plasma_dynamic_sync();
    }
    plasma_sequence_destroy(plasma, sequence);
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

double PLASMA_zlange	(	PLASMA_enum	norm,
		int	M,
		int	N,
		PLASMA_Complex64_t *	A,
		int	LDA,
		double *	work
	)

PLASMA_zlange returns the value

zlange = ( max(abs(A(i,j))), NORM = PlasmaMaxNorm ( ( norm1(A), NORM = PlasmaOneNorm ( ( normI(A), NORM = PlasmaInfNorm ( ( normF(A), NORM = PlasmaFrobeniusNorm

where norm1 denotes the one norm of a matrix (maximum column sum), normI denotes the infinity norm of a matrix (maximum row sum) and normF denotes the Frobenius norm of a matrix (square root of sum of squares). Note that max(abs(A(i,j))) is not a consistent matrix norm.

Parameters:

[in]	norm	= PlasmaMaxNorm: Max norm = PlasmaOneNorm: One norm = PlasmaInfNorm: Infinity norm = PlasmaFrobeniusNorm: Frobenius norm
[in]	M	The number of rows of the matrix A. M >= 0. When M = 0, the returned value is set to zero.
[in]	N	The number of columns of the matrix A. N >= 0. When N = 0, the returned value is set to zero.
[in]	A	The M-by-N matrix A.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[in]	work	double precision array of dimension (MAX(1,LWORK)), where LWORK >= M when NORM = PlasmaInfNorm; otherwise, WORK is not referenced.

Returns:

Return values:

the	norm described above.

See also:: PLASMA_zlange_Tile; PLASMA_zlange_Tile_Async; PLASMA_clange; PLASMA_dlange; PLASMA_slange

Definition at line 78 of file zlange.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGEMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, PLASMA_zlange_Tile_Async(), plasma_zooplap2tile, PlasmaFrobeniusNorm, PlasmaInfNorm, PlasmaMaxNorm, and PlasmaOneNorm.

{
    int NB;
    int status;
    double value;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zlange", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ( (norm != PlasmaMaxNorm) && (norm != PlasmaOneNorm)
        && (norm != PlasmaInfNorm) && (norm != PlasmaFrobeniusNorm) ) {
        plasma_error("PLASMA_zlange", "illegal value of norm");
        return -1;
    }
    if (M < 0) {
        plasma_error("PLASMA_zlange", "illegal value of M");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_zlange", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_zlange", "illegal value of LDA");
        return -5;
    }
    /* Quick return */
    if (min(N, M) == 0)
      return (double)0.0;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZGEMM, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zlange", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB   = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_ziplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, M, N);
    }
    /* Call the tile interface */
    PLASMA_zlange_Tile_Async(norm, &descA, work, &value, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    plasma_sequence_destroy(plasma, sequence);
    return value;
}

Here is the call graph for this function:

double PLASMA_zlanhe	(	PLASMA_enum	norm,
		PLASMA_enum	uplo,
		int	N,
		PLASMA_Complex64_t *	A,
		int	LDA,
		double *	work
	)

PLASMA_zlanhe returns the value

zlanhe = ( max(abs(A(i,j))), NORM = PlasmaMaxNorm ( ( norm1(A), NORM = PlasmaOneNorm ( ( normI(A), NORM = PlasmaInfNorm ( ( normF(A), NORM = PlasmaFrobeniusNorm

where norm1 denotes the one norm of a matrix (maximum column sum), normI denotes the infinity norm of a matrix (maximum row sum) and normF denotes the Frobenius norm of a matrix (square root of sum of squares). Note that max(abs(A(i,j))) is not a consistent matrix norm.

Parameters:

[in]	norm	= PlasmaMaxNorm: Max norm = PlasmaOneNorm: One norm = PlasmaInfNorm: Infinity norm = PlasmaFrobeniusNorm: Frobenius norm
[in]	uplo	= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The number of columns/rows of the matrix A. N >= 0. When N = 0, the returned value is set to zero.
[in]	A	The N-by-N matrix A.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in]	work	double precision array of dimension PLASMA_SIZE is PLASMA_STATIC_SCHEDULING is used, and NULL otherwise.

Returns:

Return values:

the	norm described above.

See also:: PLASMA_zlanhe_Tile; PLASMA_zlanhe_Tile_Async; PLASMA_clanhe; PLASMA_dlanhe; PLASMA_slanhe

Definition at line 77 of file zlanhe.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGEMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, PLASMA_zlanhe_Tile_Async(), plasma_zooplap2tile, PlasmaFrobeniusNorm, PlasmaInfNorm, PlasmaLower, PlasmaMaxNorm, PlasmaOneNorm, and PlasmaUpper.

{
    int NB;
    int status;
    double value;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zlanhe", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ( (norm != PlasmaMaxNorm) && (norm != PlasmaOneNorm)
        && (norm != PlasmaInfNorm) && (norm != PlasmaFrobeniusNorm) ) {
        plasma_error("PLASMA_zlanhe", "illegal value of norm");
        return -1;
    }
    if ( (uplo != PlasmaUpper) && (uplo != PlasmaLower) ) {
        plasma_error("PLASMA_zlanhe", "illegal value of uplo");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_zlanhe", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_zlanhe", "illegal value of LDA");
        return -5;
    }
    /* Quick return */
    if ( N == 0)
      return (double)0.0;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZGEMM, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zlanhe", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB   = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_ziplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, N, N);
    }
    /* Call the tile interface */
    PLASMA_zlanhe_Tile_Async(norm, uplo, &descA, work, &value, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    plasma_sequence_destroy(plasma, sequence);
    return value;
}

Here is the call graph for this function:

double PLASMA_zlansy	(	PLASMA_enum	norm,
		PLASMA_enum	uplo,
		int	N,
		PLASMA_Complex64_t *	A,
		int	LDA,
		double *	work
	)

PLASMA_zlansy returns the value

zlansy = ( max(abs(A(i,j))), NORM = PlasmaMaxNorm ( ( norm1(A), NORM = PlasmaOneNorm ( ( normI(A), NORM = PlasmaInfNorm ( ( normF(A), NORM = PlasmaFrobeniusNorm

where norm1 denotes the one norm of a matrix (maximum column sum), normI denotes the infinity norm of a matrix (maximum row sum) and normF denotes the Frobenius norm of a matrix (square root of sum of squares). Note that max(abs(A(i,j))) is not a consistent matrix norm.

Parameters:

[in]	norm	= PlasmaMaxNorm: Max norm = PlasmaOneNorm: One norm = PlasmaInfNorm: Infinity norm = PlasmaFrobeniusNorm: Frobenius norm
[in]	uplo	= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The number of columns/rows of the matrix A. N >= 0. When N = 0, the returned value is set to zero.
[in]	A	The N-by-N matrix A.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in]	work	double precision array of dimension PLASMA_SIZE is PLASMA_STATIC_SCHEDULING is used, and NULL otherwise.

Returns:

Return values:

the	norm described above.

See also:: PLASMA_zlansy_Tile; PLASMA_zlansy_Tile_Async; PLASMA_clansy; PLASMA_dlansy; PLASMA_slansy

Definition at line 77 of file zlansy.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGEMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, PLASMA_zlansy_Tile_Async(), plasma_zooplap2tile, PlasmaFrobeniusNorm, PlasmaInfNorm, PlasmaLower, PlasmaMaxNorm, PlasmaOneNorm, and PlasmaUpper.

{
    int NB;
    int status;
    double value;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zlansy", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ( (norm != PlasmaMaxNorm) && (norm != PlasmaOneNorm)
        && (norm != PlasmaInfNorm) && (norm != PlasmaFrobeniusNorm) ) {
        plasma_error("PLASMA_zlansy", "illegal value of norm");
        return -1;
    }
    if ( (uplo != PlasmaUpper) && (uplo != PlasmaLower) ) {
        plasma_error("PLASMA_zlansy", "illegal value of uplo");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_zlansy", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_zlansy", "illegal value of LDA");
        return -5;
    }
    /* Quick return */
    if ( N == 0)
      return (double)0.0;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZGEMM, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zlansy", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB   = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_ziplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, N, N);
    }
    /* Call the tile interface */
    PLASMA_zlansy_Tile_Async(norm, uplo, &descA, work, &value, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    plasma_sequence_destroy(plasma, sequence);
    return value;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zLapack_to_Tile	(	PLASMA_Complex64_t *	Af77,
		int	LDA,
		PLASMA_desc *	A
	)

PLASMA_zLapack_to_Tile - Conversion from LAPACK layout to tile layout.

Parameters:

[in]	Af77	LAPACK matrix.
[in]	LDA	The leading dimension of the matrix Af77.
[in,out]	A	Descriptor of the PLASMA matrix in tile layout. If PLASMA_TRANSLATION_MODE is set to PLASMA_INPLACE, A->mat is not used and set to Af77 when returns, else if PLASMA_TRANSLATION_MODE is set to PLASMA_OUTOFPLACE, A->mat has to be allocated before.

Returns:

Return values:

PLASMA_SUCCESS successful exit

See also:: PLASMA_zLapack_to_Tile_Async; PLASMA_zTile_to_Lapack; PLASMA_cLapack_to_Tile; PLASMA_dLapack_to_Tile; PLASMA_sLapack_to_Tile

Definition at line 55 of file ztile.c.

References A, plasma_context_self(), plasma_desc_check(), plasma_dynamic_sync, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_5, plasma_pzlapack_to_tile(), plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, and plasma_sequence_t::status.

{
    PLASMA_desc descA = *A;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request;
    int status;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zLapack_to_Tile", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check descriptor for correctness */
    if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zLapack_to_Tile", "invalid descriptor");
        return PLASMA_ERR_ILLEGAL_VALUE;
    }
    plasma_sequence_create(plasma, &sequence);
    plasma_parallel_call_5(
        plasma_pzlapack_to_tile,
        PLASMA_Complex64_t*, Af77,
        int, LDA,
        PLASMA_desc, descA,
        PLASMA_sequence*, sequence,
        PLASMA_request*, &request);
    plasma_dynamic_sync();
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zlaset	(	PLASMA_enum	uplo,
		int	M,
		int	N,
		PLASMA_Complex64_t	alpha,
		PLASMA_Complex64_t	beta,
		PLASMA_Complex64_t *	A,
		int	LDA
	)

PLASMA_zlaset copies all or part of a two-dimensional matrix A to another matrix B

Parameters:

[in]	uplo	Specifies the part of the matrix A to be copied to B. = PlasmaUpperLower: All the matrix A = PlasmaUpper: Upper triangular part is set. The lower triangle is unchanged. = PlasmaLower: Lower triangular part is set. The upper triangle is unchange.
[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in]	alpha	All the offdiagonal array elements are set to alpha.
[in]	beta	All the diagonal array elements are set to beta.
[in,out]	A	On entry, the m by n matrix A. On exit, A(i,j) = ALPHA, 1 <= i <= m, 1 <= j <= n, i.ne.j; A(i,i) = BETA , 1 <= i <= min(m,n)
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).

See also:: PLASMA_zlaset_Tile; PLASMA_zlaset_Tile_Async; PLASMA_claset; PLASMA_dlaset; PLASMA_slaset

Definition at line 63 of file zlaset.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGEMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, PLASMA_zlaset_Tile_Async(), plasma_zooplap2tile, PlasmaLower, PlasmaUpper, and PlasmaUpperLower.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zlaset", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ( (uplo != PlasmaUpperLower) && 
         (uplo != PlasmaUpper) &&
         (uplo != PlasmaLower) ) {
        plasma_error("PLASMA_zlaset", "illegal value of uplo");
        return -1;
    }
    if (M < 0) {
        plasma_error("PLASMA_zlaset", "illegal value of M");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_zlaset", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_zlaset", "illegal value of LDA");
        return -5;
    }
    /* Quick return */
    if (min(N, M) == 0)
      return (double)0.0;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZGEMM, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zlaset", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB   = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_ziplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, M, N);
    }
    /* Call the tile interface */
    PLASMA_zlaset_Tile_Async(uplo, alpha, beta, &descA, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    plasma_sequence_destroy(plasma, sequence);
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

int PLASMA_zlaswp	(	int	N,
		PLASMA_Complex64_t *	A,
		int	LDA,
		int	K1,
		int	K2,
		int *	IPIV,
		int	INCX
	)

PLASMA_zlaswp - performs a series of row interchanges on the matrix A. One row interchange is initiated for each of rows K1 through K2 of A.

Parameters:

[in]	N	The order of the matrix A. N >= 0.
[in]	A	The tile factors L and U from the factorization, computed by PLASMA_zgetrf.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in]	K1	The first element of IPIV for which a row interchange will be done.
[in]	K2	The last element of IPIV for which a row interchange will be done.
[in]	IPIV	The pivot indices from PLASMA_zgetrf.
[in]	INCX	The increment between successive values of IPIV. If IPIV is negative, the pivots are applied in reverse order.

Returns:

Return values:

PLASMA_SUCCESS successful exit

Returns:: <0 if -i, the i-th argument had an illegal value

See also:: PLASMA_zlaswp_Tile; PLASMA_zlaswp_Tile_Async; PLASMA_claswp; PLASMA_dlaswp; PLASMA_slaswp; PLASMA_zgetrf

Definition at line 66 of file zlaswp.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGESV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, PLASMA_zlaswp_Tile_Async(), plasma_zooplap2tile, plasma_zooptile2lap, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zlaswp", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (N < 0) {
        plasma_error("PLASMA_zlaswp", "illegal value of N");
        return -1;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_zlaswp", "illegal value of LDA");
        return -3;
    }
    /* Quick return */
    if ( N == 0 )
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_ZGESV, LDA, N, N);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zlaswp", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K2, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K2, N);
    }
    /* Call the tile interface */
    PLASMA_zlaswp_Tile_Async(&descA, K1, K2, IPIV, INCX, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zlaswpc	(	int	N,
		PLASMA_Complex64_t *	A,
		int	LDA,
		int	K1,
		int	K2,
		int *	IPIV,
		int	INCX
	)

PLASMA_zlaswpc - performs a series of row interchanges on the matrix A. One row interchange is initiated for each of rows K1 through K2 of A.

Parameters:

[in]	N	The order of the matrix A. N >= 0.
[in]	A	The tile factors L and U from the factorization, computed by PLASMA_zgetrf.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in]	K1	The first element of IPIV for which a row interchange will be done.
[in]	K2	The last element of IPIV for which a row interchange will be done.
[in]	IPIV	The pivot indices from PLASMA_zgetrf.
[in]	INCX	The increment between successive values of IPIV. If IPIV is negative, the pivots are applied in reverse order.

Returns:

Return values:

PLASMA_SUCCESS successful exit

Returns:: <0 if -i, the i-th argument had an illegal value

See also:: PLASMA_zlaswpc_Tile; PLASMA_zlaswpc_Tile_Async; PLASMA_claswpc; PLASMA_dlaswpc; PLASMA_slaswpc; PLASMA_zgetrf

Definition at line 66 of file zlaswpc.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGESV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, PLASMA_zlaswpc_Tile_Async(), plasma_zooplap2tile, plasma_zooptile2lap, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zlaswpc", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (N < 0) {
        plasma_error("PLASMA_zlaswpc", "illegal value of N");
        return -1;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_zlaswpc", "illegal value of LDA");
        return -3;
    }
    /* Quick return */
    if ( N == 0 )
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_ZGESV, LDA, N, N);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zlaswpc", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K2, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K2, N);
    }
    /* Call the tile interface */
    PLASMA_zlaswpc_Tile_Async(&descA, K1, K2, IPIV, INCX, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

int PLASMA_zlauum	(	PLASMA_enum	uplo,
		int	N,
		PLASMA_Complex64_t *	A,
		int	LDA
	)

PLASMA_zlauum - Computes the product U * U' or L' * L, where the triangular factor U or L is stored in the upper or lower triangular part of the array A.

If UPLO = 'U' or 'u' then the upper triangle of the result is stored, overwriting the factor U in A. If UPLO = 'L' or 'l' then the lower triangle of the result is stored, overwriting the factor L in A.

Parameters:

[in]	uplo	= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The order of the triangular factor U or L. N >= 0.
[in,out]	A	On entry, the triangular factor U or L. On exit, if UPLO = 'U', the upper triangle of A is overwritten with the upper triangle of the product U * U'; if UPLO = 'L', the lower triangle of A is overwritten with the lower triangle of the product L' * L.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_zlauum_Tile; PLASMA_zlauum_Tile_Async; PLASMA_clauum; PLASMA_dlauum; PLASMA_slauum; PLASMA_zpotri

Definition at line 65 of file zlauum.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, PLASMA_zlauum_Tile_Async(), plasma_zooplap2tile, plasma_zooptile2lap, PlasmaLower, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zlauum", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_zlauum", "illegal value of uplo");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_zlauum", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_zlauum", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (max(N, 0) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZPOSV, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zlauum", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB   = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_ziplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, N, N);
    }
    /* Call the tile interface */
    PLASMA_zlauum_Tile_Async(uplo, &descA, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

int PLASMA_zplghe	(	double	bump,
		int	N,
		PLASMA_Complex64_t *	A,
		int	LDA,
		unsigned long long int	seed
	)

PLASMA_zplghe - Generate a random hermitian matrix by tiles.

Parameters:

[in]	bump	The value to add to the diagonal to be sure to have a positive definite matrix.
[in]	N	The order of the matrix A. N >= 0.
[out]	A	On exit, The random hermitian matrix A generated.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[in]	seed	The seed used in the random generation.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_zplghe_Tile; PLASMA_zplghe_Tile_Async; PLASMA_cplghe; PLASMA_dplghe; PLASMA_splghe; PLASMA_zplrnt; PLASMA_zplgsy

Definition at line 58 of file zplghe.c.

References A, plasma_desc_t::mat, max, plasma_context_self(), plasma_desc_init(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGEMM, PLASMA_NB, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, plasma_tune(), plasma_ziptile2lap, PLASMA_zplghe_Tile_Async(), PlasmaComplexDouble, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zplghe", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (N < 0) {
        plasma_error("PLASMA_zplghe", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_zplghe", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (max(0, N) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZGEMM, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zplghe", "plasma_tune() failed");
        return status;
    }
    
    /* Set NT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    
    descA = plasma_desc_init(
        PlasmaComplexDouble, NB, NB, NB*NB,
        LDA, N, 0, 0, N, N);
    descA.mat = A;
    /* Call the tile interface */
    PLASMA_zplghe_Tile_Async( bump, &descA, seed, sequence, &request );
    plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
    plasma_dynamic_sync();
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zplgsy	(	PLASMA_Complex64_t	bump,
		int	N,
		PLASMA_Complex64_t *	A,
		int	LDA,
		unsigned long long int	seed
	)

PLASMA_zplgsy - Generate a random hermitian matrix by tiles.

Parameters:

[in]	bump	The value to add to the diagonal to be sure to have a positive definite matrix.
[in]	N	The order of the matrix A. N >= 0.
[out]	A	On exit, The random hermitian matrix A generated.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[in]	seed	The seed used in the random generation.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_zplgsy_Tile; PLASMA_zplgsy_Tile_Async; PLASMA_cplgsy; PLASMA_dplgsy; PLASMA_splgsy; PLASMA_zplrnt; PLASMA_zplgsy

Definition at line 58 of file zplgsy.c.

References A, plasma_desc_t::mat, max, plasma_context_self(), plasma_desc_init(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGEMM, PLASMA_NB, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, plasma_tune(), plasma_ziptile2lap, PLASMA_zplgsy_Tile_Async(), PlasmaComplexDouble, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zplgsy", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (N < 0) {
        plasma_error("PLASMA_zplgsy", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_zplgsy", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (max(0, N) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZGEMM, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zplgsy", "plasma_tune() failed");
        return status;
    }
    
    /* Set NT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    
    descA = plasma_desc_init(
        PlasmaComplexDouble, NB, NB, NB*NB,
        LDA, N, 0, 0, N, N);
    descA.mat = A;
    /* Call the tile interface */
    PLASMA_zplgsy_Tile_Async( bump, &descA, seed, sequence, &request );
    plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
    plasma_dynamic_sync();
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zplrnt	(	int	M,
		int	N,
		PLASMA_Complex64_t *	A,
		int	LDA,
		unsigned long long int	seed
	)

PLASMA_zplrnt - Generate a random matrix by tiles.

Parameters:

[in]	M	The number of rows of A.
[in]	N	The order of the matrix A. N >= 0.
[out]	A	On exit, The random matrix A generated.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[in]	seed	The seed used in the random generation.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_zplrnt_Tile; PLASMA_zplrnt_Tile_Async; PLASMA_cplrnt; PLASMA_dplrnt; PLASMA_splrnt; PLASMA_zplghe; PLASMA_zplgsy

Definition at line 57 of file zplrnt.c.

References A, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGEMM, PLASMA_NB, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, plasma_tune(), plasma_ziptile2lap, PLASMA_zplrnt_Tile_Async(), PlasmaComplexDouble, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zplrnt", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (M < 0) {
        plasma_error("PLASMA_zplrnt", "illegal value of M");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_zplrnt", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_zplrnt", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (min(M, N) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZGEMM, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zplrnt", "plasma_tune() failed");
        return status;
    }
    
    /* Set NT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    descA = plasma_desc_init(
        PlasmaComplexDouble, NB, NB, NB*NB,
        LDA, N, 0, 0, M, N);
    descA.mat = A;
    /* Call the tile interface */
    PLASMA_zplrnt_Tile_Async( &descA, seed, sequence, &request );
    plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
    plasma_dynamic_sync();
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zposv	(	PLASMA_enum	uplo,
		int	N,
		int	NRHS,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t *	B,
		int	LDB
	)

PLASMA_zposv - Computes the solution to a system of linear equations A * X = B, where A is an N-by-N symmetric positive definite (or Hermitian positive definite in the complex case) matrix and X and B are N-by-NRHS matrices. The Cholesky decomposition is used to factor A as

$A = \{_{L\times L^H, if uplo = PlasmaLower}^{U^H\times U, if uplo = PlasmaUpper}$

where U is an upper triangular matrix and L is a lower triangular matrix. The factored form of A is then used to solve the system of equations A * X = B.

Parameters:

[in]	uplo	Specifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The number of linear equations, i.e., the order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in,out]	A	On entry, the symmetric positive definite (or Hermitian) matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if return value = 0, the factor U or L from the Cholesky factorization A = U*HU or A = LL*H.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in,out]	B	On entry, the N-by-NRHS right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, the leading minor of order i of A is not positive definite, so the factorization could not be completed, and the solution has not been computed.

See also:: PLASMA_zposv_Tile; PLASMA_zposv_Tile_Async; PLASMA_cposv; PLASMA_dposv; PLASMA_sposv

Definition at line 82 of file zposv.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_zposv_Tile_Async(), PlasmaLower, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zposv", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_zposv", "illegal value of uplo");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_zposv", "illegal value of N");
        return -2;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_zposv", "illegal value of NRHS");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_zposv", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_zposv", "illegal value of LDB");
        return -7;
    }
    /* Quick return - currently NOT equivalent to LAPACK's
     * LAPACK does not have such check for DPOSV */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_ZPOSV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zposv", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB    = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
    }
    /* Call the tile interface */
    PLASMA_zposv_Tile_Async(uplo, &descA, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_zooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zpotrf	(	PLASMA_enum	uplo,
		int	N,
		PLASMA_Complex64_t *	A,
		int	LDA
	)

PLASMA_zpotrf - Computes the Cholesky factorization of a symmetric positive definite (or Hermitian positive definite in the complex case) matrix A. The factorization has the form

$A = \{_{L\times L^H, if uplo = PlasmaLower}^{U^H\times U, if uplo = PlasmaUpper}$

where U is an upper triangular matrix and L is a lower triangular matrix.

Parameters:

[in]	uplo	= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The order of the matrix A. N >= 0.
[in,out]	A	On entry, the symmetric positive definite (or Hermitian) matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if return value = 0, the factor U or L from the Cholesky factorization A = U*HU or A = LL*H.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, the leading minor of order i of A is not positive definite, so the factorization could not be completed, and the solution has not been computed.

See also:: PLASMA_zpotrf_Tile; PLASMA_zpotrf_Tile_Async; PLASMA_cpotrf; PLASMA_dpotrf; PLASMA_spotrf; PLASMA_zpotrs

Definition at line 70 of file zpotrf.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_zpotrf_Tile_Async(), PlasmaLower, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zpotrf", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_zpotrf", "illegal value of uplo");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_zpotrf", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_zpotrf", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (max(N, 0) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZPOSV, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zpotrf", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB   = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_ziplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, N, N);
    }
    /* Call the tile interface */
    PLASMA_zpotrf_Tile_Async(uplo, &descA, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zpotri	(	PLASMA_enum	uplo,
		int	N,
		PLASMA_Complex64_t *	A,
		int	LDA
	)

PLASMA_zpotri - Computes the inverse of a complex Hermitian positive definite matrix A using the Cholesky factorization A = U**H*U or A = L*L**H computed by PLASMA_zpotrf.

Parameters:

[in]	uplo	= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The order of the matrix A. N >= 0.
[in,out]	A	On entry, the triangular factor U or L from the Cholesky factorization A = U*HU or A = LL*H, as computed by PLASMA_zpotrf. On exit, the upper or lower triangle of the (Hermitian) inverse of A, overwriting the input factor U or L.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, the (i,i) element of the factor U or L is zero, and the inverse could not be computed.

See also:: PLASMA_zpotri_Tile; PLASMA_zpotri_Tile_Async; PLASMA_cpotri; PLASMA_dpotri; PLASMA_spotri; PLASMA_zpotrf

Definition at line 62 of file zpotri.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_zpotri_Tile_Async(), PlasmaLower, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zpotri", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_zpotri", "illegal value of uplo");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_zpotri", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_zpotri", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (max(N, 0) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZPOSV, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zpotri", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB   = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_ziplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, N, N);
    }
    /* Call the tile interface */
    PLASMA_zpotri_Tile_Async(uplo, &descA, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zpotrs	(	PLASMA_enum	uplo,
		int	N,
		int	NRHS,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t *	B,
		int	LDB
	)

PLASMA_zpotrs - Solves a system of linear equations A * X = B with a symmetric positive definite (or Hermitian positive definite in the complex case) matrix A using the Cholesky factorization A = U**H*U or A = L*L**H computed by PLASMA_zpotrf.

Parameters:

[in]	uplo	= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]	A	The triangular factor U or L from the Cholesky factorization A = U*HU or A = LL*H, computed by PLASMA_zpotrf.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in,out]	B	On entry, the N-by-NRHS right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_zpotrs_Tile; PLASMA_zpotrs_Tile_Async; PLASMA_cpotrs; PLASMA_dpotrs; PLASMA_spotrs; PLASMA_zpotrf

Definition at line 67 of file zpotrs.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_zpotrs_Tile_Async(), PlasmaLower, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zpotrs", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_zpotrs", "illegal value of uplo");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_zpotrs", "illegal value of N");
        return -2;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_zpotrs", "illegal value of NRHS");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_zpotrs", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_zpotrs", "illegal value of LDB");
        return -7;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZPOSV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zpotrs", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB    = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
    }
    /* Call the tile interface */
    PLASMA_zpotrs_Tile_Async(uplo, &descA, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zsymm	(	PLASMA_enum	side,
		PLASMA_enum	uplo,
		int	M,
		int	N,
		PLASMA_Complex64_t	alpha,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t *	B,
		int	LDB,
		PLASMA_Complex64_t	beta,
		PLASMA_Complex64_t *	C,
		int	LDC
	)

PLASMA_zsymm - Performs one of the matrix-matrix operations

$C = \alpha \times A \times B + \beta \times C$

or

$C = \alpha \times B \times A + \beta \times C$

where alpha and beta are scalars, A is an symmetric matrix and B and C are m by n matrices.

Parameters:

[in]	side	Specifies whether the symmetric matrix A appears on the left or right in the operation as follows: = PlasmaLeft: $C = \alpha \times A \times B + \beta \times C$ = PlasmaRight: $C = \alpha \times B \times A + \beta \times C$
[in]	uplo	Specifies whether the upper or lower triangular part of the symmetric matrix A is to be referenced as follows: = PlasmaLower: Only the lower triangular part of the symmetric matrix A is to be referenced. = PlasmaUpper: Only the upper triangular part of the symmetric matrix A is to be referenced.
[in]	M	Specifies the number of rows of the matrix C. M >= 0.
[in]	N	Specifies the number of columns of the matrix C. N >= 0.
[in]	alpha	Specifies the scalar alpha.
[in]	A	A is a LDA-by-ka matrix, where ka is M when side = PlasmaLeft, and is N otherwise. Only the uplo triangular part is referenced.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,ka).
[in]	B	B is a LDB-by-N matrix, where the leading M-by-N part of the array B must contain the matrix B.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,M).
[in]	beta	Specifies the scalar beta.
[in,out]	C	C is a LDC-by-N matrix. On exit, the array is overwritten by the M by N updated matrix.
[in]	LDC	The leading dimension of the array C. LDC >= max(1,M).

Returns:

Return values:

PLASMA_SUCCESS successful exit

See also:: PLASMA_zsymm_Tile; PLASMA_csymm; PLASMA_dsymm; PLASMA_ssymm

Definition at line 94 of file zsymm.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZSYMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_zsymm_Tile_Async(), PlasmaLeft, PlasmaLower, PlasmaRight, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int Am;
    int status;
    PLASMA_desc descA, descB, descC;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zsymm", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ( (side != PlasmaLeft) && (side != PlasmaRight) ){
        plasma_error("PLASMA_zsymm", "illegal value of side");
        return -1;
    }
    if ((uplo != PlasmaLower) && (uplo != PlasmaUpper)) {
        plasma_error("PLASMA_zsymm", "illegal value of uplo");
        return -2;
    }
    Am = ( side == PlasmaLeft ) ? M : N;
    if (M < 0) {
        plasma_error("PLASMA_zsymm", "illegal value of M");
        return -3;
    }
    if (N < 0) {
        plasma_error("PLASMA_zsymm", "illegal value of N");
        return -4;
    }
    if (LDA < max(1, Am)) {
        plasma_error("PLASMA_zsymm", "illegal value of LDA");
        return -7;
    }
    if (LDB < max(1, M)) {
        plasma_error("PLASMA_zsymm", "illegal value of LDB");
        return -9;
    }
    if (LDC < max(1, M)) {
        plasma_error("PLASMA_zsymm", "illegal value of LDC");
        return -12;
    }
    /* Quick return */
    if (M == 0 || N == 0 ||
        ((alpha == (PLASMA_Complex64_t)0.0) && beta == (PLASMA_Complex64_t)1.0))
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZSYMM, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zsymm", "plasma_tune() failed");
        return status;
    }
    /* Set MT & NT & KT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, Am, 0, 0, Am, Am, 
                             plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descB, B, NB, NB, LDB, N,  0, 0, M,  N,
                             plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
        plasma_zooplap2tile( descC, C, NB, NB, LDC, N,  0, 0, M,  N,
                             plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descC)));
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, Am, 0, 0, Am, Am );
        plasma_ziplap2tile( descB, B, NB, NB, LDB, N,  0, 0, M,  N  );
        plasma_ziplap2tile( descC, C, NB, NB, LDC, N,  0, 0, M,  N  );
    }
    /* Call the tile interface */
    PLASMA_zsymm_Tile_Async(
        side, uplo, alpha, &descA, &descB, beta, &descC, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
        plasma_desc_mat_free(&descC);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, Am );
        plasma_ziptile2lap( descB, B, NB, NB, LDB, N );
        plasma_ziptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zsyr2k	(	PLASMA_enum	uplo,
		PLASMA_enum	trans,
		int	N,
		int	K,
		PLASMA_Complex64_t	alpha,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t *	B,
		int	LDB,
		PLASMA_Complex64_t	beta,
		PLASMA_Complex64_t *	C,
		int	LDC
	)

PLASMA_zsyr2k - Performs one of the symmetric rank 2k operations

$C = \alpha [ op( A ) \times conjg( op( B )' )] + \alpha [ op( B ) \times conjg( op( A )' )] + \beta C$

, or

$C = \alpha [ conjg( op( A )' ) \times op( B ) ] + \alpha [ conjg( op( B )' ) \times op( A ) ] + \beta C$

,

where op( X ) is one of

op( X ) = X or op( X ) = conjg( X' )

where alpha and beta are real scalars, C is an n-by-n symmetric matrix and A and B are an n-by-k matrices the first case and k-by-n matrices in the second case.

Parameters:

[in]	uplo	= PlasmaUpper: Upper triangle of C is stored; = PlasmaLower: Lower triangle of C is stored.
[in]	trans	Specifies whether the matrix A is transposed or conjugate transposed: = PlasmaNoTrans: $C = \alpha [ op( A ) \times conjg( op( B )' )] + \alpha [ op( B ) \times conjg( op( A )' )] + \beta C$ = PlasmaTrans: $C = \alpha [ conjg( op( A )' ) \times op( B ) ] + \alpha [ conjg( op( B )' ) \times op( A ) ] + \beta C$
[in]	N	N specifies the order of the matrix C. N must be at least zero.
[in]	K	K specifies the number of columns of the A and B matrices with trans = PlasmaNoTrans. K specifies the number of rows of the A and B matrices with trans = PlasmaTrans.
[in]	alpha	alpha specifies the scalar alpha.
[in]	A	A is a LDA-by-ka matrix, where ka is K when trans = PlasmaNoTrans, and is N otherwise.
[in]	LDA	The leading dimension of the array A. LDA must be at least max( 1, N ), otherwise LDA must be at least max( 1, K ).
[in]	B	B is a LDB-by-kb matrix, where kb is K when trans = PlasmaNoTrans, and is N otherwise.
[in]	LDB	The leading dimension of the array B. LDB must be at least max( 1, N ), otherwise LDB must be at least max( 1, K ).
[in]	beta	beta specifies the scalar beta.
[in,out]	C	C is a LDC-by-N matrix. On exit, the array uplo part of the matrix is overwritten by the uplo part of the updated matrix.
[in]	LDC	The leading dimension of the array C. LDC >= max( 1, N ).

Returns:

Return values:

PLASMA_SUCCESS successful exit

See also:: PLASMA_zsyr2k_Tile; PLASMA_csyr2k; PLASMA_dsyr2k; PLASMA_ssyr2k

Definition at line 96 of file zsyr2k.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZSYRK, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_zsyr2k_Tile_Async(), PlasmaLower, PlasmaNoTrans, PlasmaTrans, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int Am, An;
    int status;
    PLASMA_desc descA, descB, descC;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zsyr2k", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ((uplo != PlasmaUpper) && (uplo != PlasmaLower)) {
        plasma_error("PLASMA_zsyr2k", "illegal value of uplo");
        return -1;
    }
    if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans)) {
        plasma_error("PLASMA_zsyr2k", "illegal value of trans");
        return -2;
    }
    if ( trans == PlasmaNoTrans ) { 
        Am = N; An = K;
    } else {
        Am = K; An = N;
    }
    if (N < 0) {
        plasma_error("PLASMA_zsyr2k", "illegal value of N");
        return -3;
    }
    if (K < 0) {
        plasma_error("PLASMA_zsyr2k", "illegal value of K");
        return -4;
    }
    if (LDA < max(1, Am)) {
        plasma_error("PLASMA_zsyr2k", "illegal value of LDA");
        return -7;
    }
    if (LDB < max(1, Am)) {
        plasma_error("PLASMA_zsyr2k", "illegal value of LDB");
        return -9;
    }
    if (LDC < max(1, N)) {
        plasma_error("PLASMA_zsyr2k", "illegal value of LDC");
        return -12;
    }
    /* Quick return */
    if (N == 0 ||
        ((alpha == (PLASMA_Complex64_t)0.0 || K == 0.0) && beta == (PLASMA_Complex64_t)1.0))
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZSYRK, N, K, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zsyr2k", "plasma_tune() failed");
        return status;
    }
    /* Set MT & NT & KT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descB, B, NB, NB, LDB, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
        plasma_zooplap2tile( descC, C, NB, NB, LDC, N,  0, 0, N,  N,  plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descC)));
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An );
        plasma_ziplap2tile( descB, B, NB, NB, LDB, An, 0, 0, Am, An );
        plasma_ziplap2tile( descC, C, NB, NB, LDC, N,  0, 0, N,  N );
    }
    /* Call the tile interface */
    PLASMA_zsyr2k_Tile_Async(uplo, trans, alpha, &descA, &descB, beta, &descC, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
        plasma_desc_mat_free(&descC);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, An );
        plasma_ziptile2lap( descB, B, NB, NB, LDB, An );
        plasma_ziptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zsyrk	(	PLASMA_enum	uplo,
		PLASMA_enum	trans,
		int	N,
		int	K,
		PLASMA_Complex64_t	alpha,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t	beta,
		PLASMA_Complex64_t *	C,
		int	LDC
	)

PLASMA_zsyrk - Performs one of the hermitian rank k operations

$C = \alpha [ op( A ) \times conjg( op( A )' )] + \beta C$

,

where op( X ) is one of

op( X ) = X or op( X ) = conjg( X' )

where alpha and beta are real scalars, C is an n-by-n hermitian matrix and A is an n-by-k matrix in the first case and a k-by-n matrix in the second case.

Parameters:

[in]	uplo	= PlasmaUpper: Upper triangle of C is stored; = PlasmaLower: Lower triangle of C is stored.
[in]	trans	Specifies whether the matrix A is transposed or conjugate transposed: = PlasmaNoTrans: A is not transposed; = PlasmaTrans : A is transposed.
[in]	N	N specifies the order of the matrix C. N must be at least zero.
[in]	K	K specifies the number of columns of the matrix op( A ).
[in]	alpha	alpha specifies the scalar alpha.
[in]	A	A is a LDA-by-ka matrix, where ka is K when trans = PlasmaNoTrans, and is N otherwise.
[in]	LDA	The leading dimension of the array A. LDA must be at least max( 1, N ), otherwise LDA must be at least max( 1, K ).
[in]	beta	beta specifies the scalar beta
[in,out]	C	C is a LDC-by-N matrix. On exit, the array uplo part of the matrix is overwritten by the uplo part of the updated matrix.
[in]	LDC	The leading dimension of the array C. LDC >= max( 1, N ).

Returns:

Return values:

PLASMA_SUCCESS successful exit

See also:: PLASMA_zsyrk_Tile; PLASMA_csyrk; PLASMA_dsyrk; PLASMA_ssyrk

Definition at line 85 of file zsyrk.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZSYRK, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_zsyrk_Tile_Async(), PlasmaLower, PlasmaNoTrans, PlasmaTrans, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int Am, An;
    int status;
    PLASMA_desc descA, descC;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zsyrk", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ((uplo != PlasmaUpper) && (uplo != PlasmaLower)) {
        plasma_error("PLASMA_zsyrk", "illegal value of uplo");
        return -1;
    }
    if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans)) {
        plasma_error("PLASMA_zsyrk", "illegal value of trans");
        return -2;
    }
    if ( trans == PlasmaNoTrans ) { 
        Am = N; An = K;
    } else {
        Am = K; An = N;
    }
    if (N < 0) {
        plasma_error("PLASMA_zsyrk", "illegal value of N");
        return -3;
    }
    if (K < 0) {
        plasma_error("PLASMA_zsyrk", "illegal value of K");
        return -4;
    }
    if (LDA < max(1, Am)) {
        plasma_error("PLASMA_zsyrk", "illegal value of LDA");
        return -7;
    }
    if (LDC < max(1, N)) {
        plasma_error("PLASMA_zsyrk", "illegal value of LDC");
        return -10;
    }
    /* Quick return */
    if (N == 0 ||
        ((alpha == (PLASMA_Complex64_t)0.0 || K == 0.0) && beta == (PLASMA_Complex64_t)1.0))
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_ZSYRK, N, K, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zsyrk", "plasma_tune() failed");
        return status;
    }
    /* Set MT & NT & KT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descC, C, NB, NB, LDC, N,  0, 0, N,  N,  plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descC)));
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An );
        plasma_ziplap2tile( descC, C, NB, NB, LDC, N,  0, 0, N,  N );
    }
    /* Call the tile interface */
    PLASMA_zsyrk_Tile_Async(uplo, trans, alpha, &descA, beta, &descC, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descC);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, An );
        plasma_ziptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zTile_to_Lapack	(	PLASMA_desc *	A,
		PLASMA_Complex64_t *	Af77,
		int	LDA
	)

PLASMA_Tile_to_Lapack - Conversion from tile layout to LAPACK layout.

Parameters:

[in]	A	Descriptor of the PLASMA matrix in tile layout.
[in,out]	Af77	LAPACK matrix. If PLASMA_TRANSLATION_MODE is set to PLASMA_INPLACE, Af77 has to be A->mat, else if PLASMA_TRANSLATION_MODE is set to PLASMA_OUTOFPLACE, Af77 has to be allocated before.
[in]	LDA	The leading dimension of the matrix Af77.

Returns:

Return values:

PLASMA_SUCCESS successful exit

See also:: PLASMA_zTile_to_Lapack_Async; PLASMA_zLapack_to_Tile; PLASMA_cTile_to_Lapack; PLASMA_dTile_to_Lapack; PLASMA_sTile_to_Lapack

Definition at line 191 of file ztile.c.

References A, plasma_context_self(), plasma_desc_check(), plasma_dynamic_sync, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), plasma_pztile_to_lapack(), plasma_sequence_create(), plasma_sequence_destroy(), plasma_static_call_5, PLASMA_SUCCESS, and plasma_sequence_t::status.

{
    PLASMA_desc descA = *A;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request;
    int status;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zTile_to_Lapack", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check descriptor for correctness */
    if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zTile_to_Lapack", "invalid descriptor");
        return PLASMA_ERR_ILLEGAL_VALUE;
    }
    plasma_sequence_create(plasma, &sequence);
    plasma_static_call_5(
        plasma_pztile_to_lapack,
        PLASMA_desc, descA,
        PLASMA_Complex64_t*, Af77,
        int, LDA,
        PLASMA_sequence*, sequence,
        PLASMA_request*, &request);
    plasma_dynamic_sync();
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_ztrmm	(	PLASMA_enum	side,
		PLASMA_enum	uplo,
		PLASMA_enum	transA,
		PLASMA_enum	diag,
		int	N,
		int	NRHS,
		PLASMA_Complex64_t	alpha,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t *	B,
		int	LDB
	)

PLASMA_ztrmm - Computes B = alpha*op( A )*B or B = alpha*B*op( A ).

Parameters:

[in]	side	Specifies whether A appears on the left or on the right of X: = PlasmaLeft: AX = B = PlasmaRight: XA = B
[in]	uplo	Specifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	transA	Specifies whether the matrix A is transposed, not transposed or conjugate transposed: = PlasmaNoTrans: A is transposed; = PlasmaTrans: A is not transposed; = PlasmaConjTrans: A is conjugate transposed.
[in]	diag	Specifies whether or not A is unit triangular: = PlasmaNonUnit: A is non unit; = PlasmaUnit: A us unit.
[in]	N	The order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]	alpha	alpha specifies the scalar alpha.
[in]	A	The triangular matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of the array A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of the array A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If diag = PlasmaUnit, the diagonal elements of A are also not referenced and are assumed to be 1.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in,out]	B	On entry, the N-by-NRHS right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_ztrmm_Tile; PLASMA_ztrmm_Tile_Async; PLASMA_ctrmm; PLASMA_dtrmm; PLASMA_strmm

Definition at line 88 of file ztrmm.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_ztrmm_Tile_Async(), PlasmaConjTrans, PlasmaLeft, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaRight, PlasmaTrans, PlasmaUnit, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB, NA;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_ztrmm", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (side != PlasmaLeft && side != PlasmaRight) {
        plasma_error("PLASMA_ztrmm", "illegal value of side");
        return -1;
    }
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_ztrmm", "illegal value of uplo");
        return -2;
    }
    if (transA != PlasmaConjTrans && transA != PlasmaNoTrans && transA != PlasmaTrans ) {
        plasma_error("PLASMA_ztrmm", "illegal value of transA");
        return -3;
    }
    if (diag != PlasmaUnit && diag != PlasmaNonUnit) {
        plasma_error("PLASMA_ztrmm", "illegal value of diag");
        return -4;
    }
    if (N < 0) {
        plasma_error("PLASMA_ztrmm", "illegal value of N");
        return -5;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_ztrmm", "illegal value of NRHS");
        return -6;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_ztrmm", "illegal value of LDA");
        return -8;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_ztrmm", "illegal value of LDB");
        return -10;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZPOSV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_ztrmm", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB = PLASMA_NB;
    if (side == PlasmaLeft) {
      NA = N;
    } else {
      NA = NRHS;
    }
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, NA,   0, 0, NA, NA,   plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N,  NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, NA,   0, 0, NA, NA  );
        plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N,  NRHS);
    }
    /* Call the tile interface */
    PLASMA_ztrmm_Tile_Async(
        side, uplo, transA, diag, alpha, &descA, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, NA   );
        plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_ztrsm	(	PLASMA_enum	side,
		PLASMA_enum	uplo,
		PLASMA_enum	transA,
		PLASMA_enum	diag,
		int	N,
		int	NRHS,
		PLASMA_Complex64_t	alpha,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t *	B,
		int	LDB
	)

PLASMA_ztrsm - Computes triangular solve A*X = B or X*A = B.

Parameters:

[in]	side	Specifies whether A appears on the left or on the right of X: = PlasmaLeft: AX = B = PlasmaRight: XA = B
[in]	uplo	Specifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	transA	Specifies whether the matrix A is transposed, not transposed or conjugate transposed: = PlasmaNoTrans: A is transposed; = PlasmaTrans: A is not transposed; = PlasmaConjTrans: A is conjugate transposed.
[in]	diag	Specifies whether or not A is unit triangular: = PlasmaNonUnit: A is non unit; = PlasmaUnit: A us unit.
[in]	N	The order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]	alpha	alpha specifies the scalar alpha.
[in]	A	The triangular matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of the array A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of the array A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If diag = PlasmaUnit, the diagonal elements of A are also not referenced and are assumed to be 1.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in,out]	B	On entry, the N-by-NRHS right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_ztrsm_Tile; PLASMA_ztrsm_Tile_Async; PLASMA_ctrsm; PLASMA_dtrsm; PLASMA_strsm

Definition at line 88 of file ztrsm.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_ztrsm_Tile_Async(), PlasmaConjTrans, PlasmaLeft, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaRight, PlasmaTrans, PlasmaUnit, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB, NA;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_ztrsm", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (side != PlasmaLeft && side != PlasmaRight) {
        plasma_error("PLASMA_ztrsm", "illegal value of side");
        return -1;
    }
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_ztrsm", "illegal value of uplo");
        return -2;
    }
    if (transA != PlasmaConjTrans && transA != PlasmaNoTrans && transA != PlasmaTrans ) {
        plasma_error("PLASMA_ztrsm", "illegal value of transA");
        return -3;
    }
    if (diag != PlasmaUnit && diag != PlasmaNonUnit) {
        plasma_error("PLASMA_ztrsm", "illegal value of diag");
        return -4;
    }
    if (N < 0) {
        plasma_error("PLASMA_ztrsm", "illegal value of N");
        return -5;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_ztrsm", "illegal value of NRHS");
        return -6;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_ztrsm", "illegal value of LDA");
        return -8;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_ztrsm", "illegal value of LDB");
        return -10;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZPOSV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_ztrsm", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB = PLASMA_NB;
    if (side == PlasmaLeft) {
      NA = N;
    } else {
      NA = NRHS;
    }
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, NA,   0, 0, NA, NA,   plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N,  NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, NA,   0, 0, NA, NA  );
        plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N,  NRHS);
    }
    /* Call the tile interface */
    PLASMA_ztrsm_Tile_Async(
        side, uplo, transA, diag, alpha, &descA, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, NA   );
        plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_ztrsmpl	(	int	N,
		int	NRHS,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t *	L,
		int *	IPIV,
		PLASMA_Complex64_t *	B,
		int	LDB
	)

PLASMA_ztrsmpl - Performs the forward substitution step of solving a system of linear equations after the tile LU factorization of the matrix.

Parameters:

[in]	N	The order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]	A	The tile factor L from the factorization, computed by PLASMA_zgetrf_incpiv.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in]	L	Auxiliary factorization data, related to the tile L factor, computed by PLASMA_zgetrf_incpiv.
[in]	IPIV	The pivot indices from PLASMA_zgetrf_incpiv (not equivalent to LAPACK).
[in,out]	B	On entry, the N-by-NRHS right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_ztrsmpl_Tile; PLASMA_ztrsmpl_Tile_Async; PLASMA_ctrsmpl; PLASMA_dtrsmpl; PLASMA_strsmpl; PLASMA_zgetrf_incpiv

Definition at line 67 of file ztrsmpl.c.

References L, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZGESV, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_ztrsmpl_Tile_Async(), PlasmaComplexDouble, and plasma_sequence_t::status.

{
    int NB, IB, IBNB, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB, descL;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_ztrsmpl", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (N < 0) {
        plasma_error("PLASMA_ztrsmpl", "illegal value of N");
        return -1;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_ztrsmpl", "illegal value of NRHS");
        return -2;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_ztrsmpl", "illegal value of LDA");
        return -4;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_ztrsmpl", "illegal value of LDB");
        return -8;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZGESV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_ztrsmpl", "plasma_tune() failed");
        return status;
    }
    /* Set Mt, NT & NTRHS */
    NB    = PLASMA_NB;
    IB    = PLASMA_IB;
    IBNB  = IB*NB;
    NT    = (N%NB==0) ? (N/NB) : (N/NB+1);
    plasma_sequence_create(plasma, &sequence);
    descL = plasma_desc_init(
        PlasmaComplexDouble,
        IB, NB, IBNB,
        NT*IB, NT*NB, 0, 0, NT*IB, NT*NB);
    descL.mat = L;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
    }
    /* Call the tile interface */
    PLASMA_ztrsmpl_Tile_Async(&descA, &descL, IPIV, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_ztrsmrv	(	PLASMA_enum	side,
		PLASMA_enum	uplo,
		PLASMA_enum	transA,
		PLASMA_enum	diag,
		int	N,
		int	NRHS,
		PLASMA_Complex64_t	alpha,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t *	B,
		int	LDB
	)

PLASMA_ztrsmrv - Computes triangular solve A*X = B or X*A = B.

Parameters:

[in]	side	Specifies whether A appears on the left or on the right of X: = PlasmaLeft: AX = B = PlasmaRight: XA = B
[in]	uplo	Specifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	transA	Specifies whether the matrix A is transposed, not transposed or conjugate transposed: = PlasmaNoTrans: A is transposed; = PlasmaTrans: A is not transposed; = PlasmaConjTrans: A is conjugate transposed.
[in]	diag	Specifies whether or not A is unit triangular: = PlasmaNonUnit: A is non unit; = PlasmaUnit: A us unit.
[in]	N	The order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]	alpha	alpha specifies the scalar alpha.
[in]	A	The triangular matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of the array A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of the array A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If diag = PlasmaUnit, the diagonal elements of A are also not referenced and are assumed to be 1.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in,out]	B	On entry, the N-by-NRHS right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_ztrsmrv_Tile; PLASMA_ztrsmrv_Tile_Async; PLASMA_ctrsmrv; PLASMA_dtrsmrv; PLASMA_strsmrv

Definition at line 88 of file ztrsmrv.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_ztrsmrv_Tile_Async(), PlasmaConjTrans, PlasmaLeft, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaRight, PlasmaTrans, PlasmaUnit, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB, NA;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_ztrsmrv", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (side != PlasmaLeft && side != PlasmaRight) {
        plasma_error("PLASMA_ztrsmrv", "illegal value of side");
        return -1;
    }
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_ztrsmrv", "illegal value of uplo");
        return -2;
    }
    if (transA != PlasmaConjTrans && transA != PlasmaNoTrans && transA != PlasmaTrans ) {
        plasma_error("PLASMA_ztrsmrv", "illegal value of transA");
        return -3;
    }
    if (diag != PlasmaUnit && diag != PlasmaNonUnit) {
        plasma_error("PLASMA_ztrsmrv", "illegal value of diag");
        return -4;
    }
    if (N < 0) {
        plasma_error("PLASMA_ztrsmrv", "illegal value of N");
        return -5;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_ztrsmrv", "illegal value of NRHS");
        return -6;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_ztrsmrv", "illegal value of LDA");
        return -8;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_ztrsmrv", "illegal value of LDB");
        return -10;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZPOSV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_ztrsmrv", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB = PLASMA_NB;
    if (side == PlasmaLeft) {
      NA = N;
    } else {
      NA = NRHS;
    }
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, NA,   0, 0, NA, NA,   plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N,  NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, NA,   0, 0, NA, NA  );
        plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N,  NRHS);
    }
    /* Call the tile interface */
    PLASMA_ztrsmrv_Tile_Async(
        side, uplo, transA, diag, alpha, &descA, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, NA   );
        plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

int PLASMA_ztrtri	(	PLASMA_enum	uplo,
		PLASMA_enum	diag,
		int	N,
		PLASMA_Complex64_t *	A,
		int	LDA
	)

PLASMA_ztrtri - Computes the inverse of a complex upper or lower triangular matrix A.

Parameters:

[in]	uplo	= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	diag	= PlasmaNonUnit: A is non-unit triangular; = PlasmaUnit: A is unit triangular.
[in]	N	The order of the matrix A. N >= 0.
[in,out]	A	On entry, the triangular matrix A. If UPLO = 'U', the leading N-by-N upper triangular part of the array A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of the array A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If DIAG = 'U', the diagonal elements of A are also not referenced and are assumed to be 1. On exit, the (triangular) inverse of the original matrix, in the same storage format.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, A(i,i) is exactly zero. The triangular matrix is singular and its inverse can not be computed.

See also:: PLASMA_ztrtri_Tile; PLASMA_ztrtri_Tile_Async; PLASMA_ctrtri; PLASMA_dtrtri; PLASMA_strtri; PLASMA_zpotri

Definition at line 70 of file ztrtri.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_ZPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_ztrtri_Tile_Async(), PlasmaLower, PlasmaNonUnit, PlasmaUnit, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_ztrtri", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_ztrtri", "illegal value of uplo");
        return -1;
    }
    if (diag != PlasmaUnit && diag != PlasmaNonUnit) {
        plasma_error("PLASMA_ztrtri", "illegal value of diag");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_ztrtri", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_ztrtri", "illegal value of LDA");
        return -5;
    }
    /* Quick return */
    if (max(N, 0) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZPOSV, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_ztrtri", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB   = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_ziplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, N, N);
    }
    /* Call the tile interface */
    PLASMA_ztrtri_Tile_Async(uplo, diag, &descA, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

int PLASMA_zunglq	(	int	M,
		int	N,
		int	K,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t *	T,
		PLASMA_Complex64_t *	Q,
		int	LDQ
	)

PLASMA_zunglq - Generates an M-by-N matrix Q with orthonormal rows, which is defined as the first M rows of a product of the elementary reflectors returned by PLASMA_zgelqf.

Parameters:

[in]	M	The number of rows of the matrix Q. M >= 0.
[in]	N	The number of columns of the matrix Q. N >= M.
[in]	K	The number of rows of elementary tile reflectors whose product defines the matrix Q. M >= K >= 0.
[in]	A	Details of the LQ factorization of the original matrix A as returned by PLASMA_zgelqf.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[in]	T	Auxiliary factorization data, computed by PLASMA_zgelqf.
[out]	Q	On exit, the M-by-N matrix Q.
[in]	LDQ	The leading dimension of the array Q. LDQ >= max(1,M).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
PLASMA_SUCCESS	<0 if -i, the i-th argument had an illegal value

See also:: PLASMA_zunglq_Tile; PLASMA_zunglq_Tile_Async; PLASMA_cunglq; PLASMA_dorglq; PLASMA_sorglq; PLASMA_zgelqf

Definition at line 68 of file zunglq.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_ZGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_zunglq_Tile_Async(), PlasmaComplexDouble, plasma_sequence_t::status, and T.

{
    int NB, IB, IBNB, KT, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descQ, descT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zunglq", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if (M < 0) {
        plasma_error("PLASMA_zunglq", "illegal value of M");
        return -1;
    }
    if (N < M) {
        plasma_error("PLASMA_zunglq", "illegal value of N");
        return -2;
    }
    if (K < 0 || K > M) {
        plasma_error("PLASMA_zunglq", "illegal value of K");
        return -3;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_zunglq", "illegal value of LDA");
        return -5;
    }
    if (LDQ < max(1, M)) {
        plasma_error("PLASMA_zunglq", "illegal value of LDQ");
        return -8;
    }
    /* Quick return - currently NOT equivalent to LAPACK's:
     * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDQ ) */
    if (min(M, min(N, K)) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZGELS, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zunglq", "plasma_tune() failed");
        return status;
    }
    /* Set MT & NT */
    NB   = PLASMA_NB;
    IB   = PLASMA_IB;
    IBNB = IB*NB;
    NT   = (N%NB==0) ? (N/NB) : (N/NB+1);
    KT   = (K%NB==0) ? (K/NB) : (K/NB+1);
    plasma_sequence_create(plasma, &sequence);
    if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
        descT = plasma_desc_init(
            PlasmaComplexDouble,
            IB, NB, IBNB,
            KT*IB, NT*NB, 0, 0, KT*IB, NT*NB);
    }
    else {
        /* Double the size of T to accomodate the tree reduction phase */
        descT = plasma_desc_init(
            PlasmaComplexDouble,
            IB, NB, IBNB,
            KT*IB, 2*NT*NB, 0, 0, KT*IB, 2*NT*NB);
    }
    descT.mat = T;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K, N, plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descQ)));
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K, N);
        plasma_ziplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, M, N);
    }
    /* Call the tile interface */
    PLASMA_zunglq_Tile_Async(&descA, &descT, &descQ, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descQ, Q, NB, NB, LDQ, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descQ);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
        plasma_ziptile2lap( descQ, Q, NB, NB, LDQ, N );
        plasma_dynamic_sync();
    }
        
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zungqr	(	int	M,
		int	N,
		int	K,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t *	T,
		PLASMA_Complex64_t *	Q,
		int	LDQ
	)

PLASMA_zungqr - Generates an M-by-N matrix Q with orthonormal columns, which is defined as the first N columns of a product of the elementary reflectors returned by PLASMA_zgeqrf.

Parameters:

[in]	M	The number of rows of the matrix Q. M >= 0.
[in]	N	The number of columns of the matrix Q. N >= M.
[in]	K	The number of columns of elementary tile reflectors whose product defines the matrix Q. M >= K >= 0.
[in]	A	Details of the QR factorization of the original matrix A as returned by PLASMA_zgeqrf.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[in]	T	Auxiliary factorization data, computed by PLASMA_zgeqrf.
[out]	Q	On exit, the M-by-N matrix Q.
[in]	LDQ	The leading dimension of the array Q. LDQ >= max(1,M).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_zungqr_Tile; PLASMA_zungqr_Tile_Async; PLASMA_cungqr; PLASMA_dorgqr; PLASMA_sorgqr; PLASMA_zgeqrf

Definition at line 68 of file zungqr.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_ZGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_zungqr_Tile_Async(), PlasmaComplexDouble, plasma_sequence_t::status, and T.

{
    int NB, IB, IBNB, MT, KT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descQ, descT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zungqr", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if (M < 0) {
        plasma_error("PLASMA_zungqr", "illegal value of M");
        return -1;
    }
    if (N < 0 || N > M) {
        plasma_error("PLASMA_zungqr", "illegal value of N");
        return -2;
    }
    if (K < 0 || K > N) {
        plasma_error("PLASMA_zungqr", "illegal value of K");
        return -3;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_zungqr", "illegal value of LDA");
        return -5;
    }
    if (LDQ < max(1, M)) {
        plasma_error("PLASMA_zungqr", "illegal value of LDQ");
        return -8;
    }
    if (min(M, min(N, K)) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M & N; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZGELS, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zungqr", "plasma_tune() failed");
        return status;
    }
    /* Set MT & KT */
    NB   = PLASMA_NB;
    IB   = PLASMA_IB;
    IBNB = IB*NB;
    MT   = (M%NB==0) ? (M/NB) : (M/NB+1);
    KT   = (K%NB==0) ? (K/NB) : (K/NB+1);
    plasma_sequence_create(plasma, &sequence);
    if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
        descT = plasma_desc_init(
            PlasmaComplexDouble,
            IB, NB, IBNB,
            MT*IB, KT*NB, 0, 0, MT*IB, KT*NB);
    }
    else {
        /* Double the size of T to accomodate the tree reduction phase */
        descT = plasma_desc_init(
            PlasmaComplexDouble,
            IB, NB, IBNB,
            MT*IB, 2*KT*NB, 0, 0, MT*IB, 2*KT*NB);
    }
    descT.mat = T;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, K, plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descQ)));
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, K);
        plasma_ziplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, M, N);
    }
    /* Call the tile interface */
    PLASMA_zungqr_Tile_Async(&descA, &descT, &descQ, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descQ, Q, NB, NB, LDQ, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descQ);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, K );
        plasma_ziptile2lap( descQ, Q, NB, NB, LDQ, N );
        plasma_dynamic_sync();
    }
    
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zunmlq	(	PLASMA_enum	side,
		PLASMA_enum	trans,
		int	M,
		int	N,
		int	K,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t *	T,
		PLASMA_Complex64_t *	B,
		int	LDB
	)

PLASMA_zunmlq - overwrites the general M-by-N matrix C with Q*C, where Q is an orthogonal matrix (unitary in the complex case) defined as the product of elementary reflectors returned by PLASMA_zgelqf. Q is of order M.

Parameters:

[in]	side	Intended usage: = PlasmaLeft: apply Q or QH from the left; = PlasmaRight: apply Q or QH from the right. Currently only PlasmaLeft is supported.
[in]	trans	Intended usage: = PlasmaNoTrans: no transpose, apply Q; = PlasmaConjTrans: conjugate transpose, apply Q**H. Currently only PlasmaConjTrans is supported.
[in]	M	The number of rows of the matrix C. M >= 0.
[in]	N	The number of columns of the matrix C. N >= 0.
[in]	K	The number of rows of elementary tile reflectors whose product defines the matrix Q. M >= K >= 0.
[in]	A	Details of the LQ factorization of the original matrix A as returned by PLASMA_zgelqf.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,K).
[in]	T	Auxiliary factorization data, computed by PLASMA_zgelqf.
[in,out]	B	On entry, the M-by-N matrix B. On exit, B is overwritten by QB or QHB.
[in]	LDB	The leading dimension of the array C. LDC >= max(1,M).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_zunmlq_Tile; PLASMA_zunmlq_Tile_Async; PLASMA_cunmlq; PLASMA_dormlq; PLASMA_sormlq; PLASMA_zgelqf

Definition at line 83 of file zunmlq.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_ZGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_zunmlq_Tile_Async(), PlasmaComplexDouble, PlasmaConjTrans, PlasmaLeft, PlasmaNoTrans, PlasmaRight, plasma_sequence_t::status, and T.

{
    int NB, IB, IBNB, KT, NT, An;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB, descT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zunmlq", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if (side == PlasmaLeft)
        An = M;
    else 
        An = N;
    /* Check input arguments */
    if ( (side != PlasmaLeft) && (side != PlasmaRight) ) {
        plasma_error("PLASMA_zunmlq", "illegal value of side");
        return -1;
    }
    if ( (trans != PlasmaConjTrans) && (trans != PlasmaNoTrans) ){
        plasma_error("PLASMA_zunmlq", "illegal value of trans");
        return -2;
    }
    if (M < 0) {
        plasma_error("PLASMA_zunmlq", "illegal value of M");
        return -3;
    }
    if (N < 0) {
        plasma_error("PLASMA_zunmlq", "illegal value of N");
        return -4;
    }
    if ((K < 0) || (K > An)) {
        plasma_error("PLASMA_zunmlq", "illegal value of K");
        return -5;
    }
    if (LDA < max(1, K)) {
        plasma_error("PLASMA_zunmlq", "illegal value of LDA");
        return -7;
    }
    if (LDB < max(1, M)) {
        plasma_error("PLASMA_zunmlq", "illegal value of LDB");
        return -10;
    }
    /* Quick return - currently NOT equivalent to LAPACK's:
     * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) */
    if (min(M, min(N, K)) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZGELS, M, K, N);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zunmlq", "plasma_tune() failed");
        return status;
    }
    /* Set MT, NT & NTRHS */
    NB   = PLASMA_NB;
    IB   = PLASMA_IB;
    IBNB = IB*NB;
    KT   = ( K%NB==0) ? (K /NB) : (K /NB+1);
    NT   = (An%NB==0) ? (An/NB) : (An/NB+1);
    plasma_sequence_create(plasma, &sequence);
    if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
        descT = plasma_desc_init(
            PlasmaComplexDouble,
            IB, NB, IBNB,
            KT*IB, NT*NB, 0, 0, KT*IB, NT*NB);
    }
    else {
        /* Double the size of T to accomodate the tree reduction phase */
        descT = plasma_desc_init(
            PlasmaComplexDouble,
            IB, NB, IBNB,
            KT*IB, 2*NT*NB, 0, 0, KT*IB, 2*NT*NB);
    }
    descT.mat = T;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, K, An, plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descB, B, NB, NB, LDB, N,  0, 0, M, N,  plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, An, 0, 0, K, An);
        plasma_ziplap2tile( descB, B, NB, NB, LDB, N,  0, 0, M, N);
    }
    /* Call the tile interface */
    PLASMA_zunmlq_Tile_Async(
        side, trans, &descA, &descT, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descB, B, NB, NB, LDB, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, An );
        plasma_ziptile2lap( descB, B, NB, NB, LDB, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_zunmqr	(	PLASMA_enum	side,
		PLASMA_enum	trans,
		int	M,
		int	N,
		int	K,
		PLASMA_Complex64_t *	A,
		int	LDA,
		PLASMA_Complex64_t *	T,
		PLASMA_Complex64_t *	B,
		int	LDB
	)

PLASMA_zunmqr - overwrites the general M-by-N matrix C with Q*C, where Q is an orthogonal matrix (unitary in the complex case) defined as the product of elementary reflectors returned by PLASMA_zgeqrf. Q is of order M.

Parameters:

[in]	side	Intended usage: = PlasmaLeft: apply Q or QH from the left; = PlasmaRight: apply Q or QH from the right. Currently only PlasmaLeft is supported.
[in]	trans	Intended usage: = PlasmaNoTrans: no transpose, apply Q; = PlasmaConjTrans: conjugate transpose, apply Q**H. Currently only PlasmaConjTrans is supported.
[in]	M	The number of rows of the matrix C. M >= 0.
[in]	N	The number of columns of the matrix C. N >= 0.
[in]	K	The number of columns of elementary tile reflectors whose product defines the matrix Q. If side == PlasmaLeft, M >= K >= 0. If side == PlasmaRight, N >= K >= 0.
[in]	A	Details of the QR factorization of the original matrix A as returned by PLASMA_zgeqrf.
[in]	LDA	The leading dimension of the array A. If side == PlasmaLeft, LDA >= max(1,M). If side == PlasmaRight, LDA >= max(1,N).
[in]	T	Auxiliary factorization data, computed by PLASMA_zgeqrf.
[in,out]	B	On entry, the M-by-N matrix B. On exit, B is overwritten by QB or QHB.
[in]	LDB	The leading dimension of the array C. LDC >= max(1,M).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_zunmqr_Tile; PLASMA_zunmqr_Tile_Async; PLASMA_cunmqr; PLASMA_dormqr; PLASMA_sormqr; PLASMA_zgeqrf

Definition at line 85 of file zunmqr.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_ZGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), plasma_ziplap2tile, plasma_ziptile2lap, plasma_zooplap2tile, plasma_zooptile2lap, PLASMA_zunmqr_Tile_Async(), PlasmaComplexDouble, PlasmaConjTrans, PlasmaLeft, PlasmaNoTrans, PlasmaRight, plasma_sequence_t::status, and T.

{
    int NB, IB, IBNB, Am, MT, KT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB, descT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zunmqr", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if ( side == PlasmaLeft ) {
        Am = M;
    } else {
        Am = N;
    }
    /* Check input arguments */
    if ((side != PlasmaLeft) && (side != PlasmaRight)) {
        plasma_error("PLASMA_zunmqr", "illegal value of side");
        return -1;
    }
    if ((trans != PlasmaConjTrans) && (trans != PlasmaNoTrans)){
        plasma_error("PLASMA_zunmqr", "illegal value of trans");
        return -2;
    }
    if (M < 0) {
        plasma_error("PLASMA_zunmqr", "illegal value of M");
        return -3;
    }
    if (N < 0) {
        plasma_error("PLASMA_zunmqr", "illegal value of N");
        return -4;
    }
    if ( (K < 0) || (K > Am) ) {
        plasma_error("PLASMA_zunmqr", "illegal value of K");
        return -5;
    }
    if ( LDA < max(1, Am) ) {
        plasma_error("PLASMA_zunmqr", "illegal value of LDA");
        return -7;
    }
    if (LDB < max(1, M)) {
        plasma_error("PLASMA_zunmqr", "illegal value of LDB");
        return -10;
    }
    /* Quick return - currently NOT equivalent to LAPACK's:
     * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) */
    if (min(M, min(N, K)) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, K & N; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZGELS, M, K, N);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zunmqr", "plasma_tune() failed");
        return status;
    }
    /* Set MT, NT & NTRHS */
    NB   = PLASMA_NB;
    IB   = PLASMA_IB;
    IBNB = IB*NB;
    MT   = (Am%NB==0) ? (Am/NB) : (Am/NB+1);
    KT   = (K%NB==0)  ? (K /NB) : (K /NB+1);
    plasma_sequence_create(plasma, &sequence);
    if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
        descT = plasma_desc_init(
            PlasmaComplexDouble,
            IB, NB, IBNB,
            MT*IB, KT*NB, 0, 0, MT*IB, KT*NB);
    }
    else {
        /* Double the size of T to accomodate the tree reduction phase */
        descT = plasma_desc_init(
            PlasmaComplexDouble,
            IB, NB, IBNB,
            MT*IB, 2*KT*NB, 0, 0, MT*IB, 2*KT*NB);
    }
    descT.mat = T;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, K, 0, 0, Am, K, plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M,  N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_ziplap2tile( descA, A, NB, NB, LDA, K, 0, 0, Am, K);
        plasma_ziplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M,  N);
    }
    /* Call the tile interface */
    PLASMA_zunmqr_Tile_Async(
        side, trans, &descA, &descT, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descB, B, NB, NB, LDB, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, K );
        plasma_ziptile2lap( descB, B, NB, NB, LDB, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

Functions

Detailed Description

Function Documentation

trans = PlasmaNoTrans and M >= N: find the least squares solution of an overdetermined

trans = PlasmaNoTrans and M < N: find the minimum norm solution of an underdetermined

trans = PlasmaNoTrans and M >= N: find the least squares solution of an overdetermined

trans = PlasmaNoTrans and M < N: find the minimum norm solution of an underdetermined

trans = PlasmaNoTrans and M >= N: find the least squares solution of an overdetermined

trans = PlasmaNoTrans and M < N: find the minimum norm solution of an underdetermined