Functions
int	PLASMA_dgebrd (PLASMA_enum jobu, PLASMA_enum jobvt, int M, int N, double A, int LDA, double D, double E, double U, int LDU, double VT, int LDVT, PLASMA_desc descT)
int	PLASMA_dgelqf (int M, int N, double A, int LDA, double T)
int	PLASMA_dgelqs (int M, int N, int NRHS, double A, int LDA, double T, double *B, int LDB)
int	PLASMA_dgels (PLASMA_enum trans, int M, int N, int NRHS, double A, int LDA, double T, double *B, int LDB)
int	PLASMA_dgemm (PLASMA_enum transA, PLASMA_enum transB, int M, int N, int K, double alpha, double A, int LDA, double B, int LDB, double beta, double *C, int LDC)
int	PLASMA_dgeqrf (int M, int N, double A, int LDA, double T)
int	PLASMA_dgeqrs (int M, int N, int NRHS, double A, int LDA, double T, double *B, int LDB)
int	PLASMA_dgesv (int N, int NRHS, double A, int LDA, int IPIV, double *B, int LDB)
int	PLASMA_dgesv_incpiv (int N, int NRHS, double A, int LDA, double L, int IPIV, double B, int LDB)
int	PLASMA_dgesvd (PLASMA_enum jobu, PLASMA_enum jobvt, int M, int N, double A, int LDA, double S, double U, int LDU, double VT, int LDVT, PLASMA_desc *descT)
int	PLASMA_dgetrf (int M, int N, double A, int LDA, int IPIV)
int	PLASMA_dgetrf_incpiv (int M, int N, double A, int LDA, double L, int *IPIV)
int	PLASMA_dgetri (int N, double A, int LDA, int IPIV)
int	PLASMA_dgetrs (PLASMA_enum trans, int N, int NRHS, double A, int LDA, int IPIV, double *B, int LDB)
int	PLASMA_dgetrs_incpiv (PLASMA_enum trans, int N, int NRHS, double A, int LDA, double L, int IPIV, double B, int LDB)
int	PLASMA_dlacpy (PLASMA_enum uplo, int M, int N, double A, int LDA, double B, int LDB)
double	PLASMA_dlange (PLASMA_enum norm, int M, int N, double A, int LDA, double work)
double	PLASMA_dlansy (PLASMA_enum norm, PLASMA_enum uplo, int N, double A, int LDA, double work)
int	PLASMA_dlaset (PLASMA_enum uplo, int M, int N, double alpha, double beta, double *A, int LDA)
int	PLASMA_dlaswp (int N, double A, int LDA, int K1, int K2, int IPIV, int INCX)
int	PLASMA_dlaswpc (int N, double A, int LDA, int K1, int K2, int IPIV, int INCX)
int	PLASMA_dlauum (PLASMA_enum uplo, int N, double *A, int LDA)
int	PLASMA_dorglq (int M, int N, int K, double A, int LDA, double T, double *Q, int LDQ)
int	PLASMA_dorgqr (int M, int N, int K, double A, int LDA, double T, double *Q, int LDQ)
int	PLASMA_dormlq (PLASMA_enum side, PLASMA_enum trans, int M, int N, int K, double A, int LDA, double T, double *B, int LDB)
int	PLASMA_dormqr (PLASMA_enum side, PLASMA_enum trans, int M, int N, int K, double A, int LDA, double T, double *B, int LDB)
int	PLASMA_dplgsy (double bump, int N, double *A, int LDA, unsigned long long int seed)
int	PLASMA_dplrnt (int M, int N, double *A, int LDA, unsigned long long int seed)
int	PLASMA_dposv (PLASMA_enum uplo, int N, int NRHS, double A, int LDA, double B, int LDB)
int	PLASMA_dpotrf (PLASMA_enum uplo, int N, double *A, int LDA)
int	PLASMA_dpotri (PLASMA_enum uplo, int N, double *A, int LDA)
int	PLASMA_dpotrs (PLASMA_enum uplo, int N, int NRHS, double A, int LDA, double B, int LDB)
int	PLASMA_dsgesv (int N, int NRHS, double A, int LDA, int IPIV, double B, int LDB, double X, int LDX, int *ITER)
int	PLASMA_dsposv (PLASMA_enum uplo, int N, int NRHS, double A, int LDA, double B, int LDB, double X, int LDX, int ITER)
int	PLASMA_dsungesv (PLASMA_enum trans, int N, int NRHS, double A, int LDA, double B, int LDB, double X, int LDX, int ITER)
int	PLASMA_dsyev (PLASMA_enum jobz, PLASMA_enum uplo, int N, double A, int LDA, double W, PLASMA_desc descT, double Q, int LDQ)
int	PLASMA_dsygst (PLASMA_enum itype, PLASMA_enum uplo, int N, double A, int LDA, double B, int LDB)
int	PLASMA_dsygv (PLASMA_enum itype, PLASMA_enum jobz, PLASMA_enum uplo, int N, double A, int LDA, double B, int LDB, double W, PLASMA_desc descT, double *Q, int LDQ)
int	PLASMA_dsymm (PLASMA_enum side, PLASMA_enum uplo, int M, int N, double alpha, double A, int LDA, double B, int LDB, double beta, double *C, int LDC)
int	PLASMA_dsyr2k (PLASMA_enum uplo, PLASMA_enum trans, int N, int K, double alpha, double A, int LDA, double B, int LDB, double beta, double *C, int LDC)
int	PLASMA_dsyrk (PLASMA_enum uplo, PLASMA_enum trans, int N, int K, double alpha, double A, int LDA, double beta, double C, int LDC)
int	PLASMA_dsytrd (PLASMA_enum jobz, PLASMA_enum uplo, int N, double A, int LDA, double D, double E, PLASMA_desc descT, double *Q, int LDQ)
int	PLASMA_dtrmm (PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum transA, PLASMA_enum diag, int N, int NRHS, double alpha, double A, int LDA, double B, int LDB)
int	PLASMA_dtrsm (PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum transA, PLASMA_enum diag, int N, int NRHS, double alpha, double A, int LDA, double B, int LDB)
int	PLASMA_dtrsmpl (int N, int NRHS, double A, int LDA, double L, int IPIV, double B, int LDB)
int	PLASMA_dtrsmrv (PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum transA, PLASMA_enum diag, int N, int NRHS, double alpha, double A, int LDA, double B, int LDB)
int	PLASMA_dtrtri (PLASMA_enum uplo, PLASMA_enum diag, int N, double *A, int LDA)
int	PLASMA_dLapack_to_Tile (double Af77, int LDA, PLASMA_desc A)
int	PLASMA_dTile_to_Lapack (PLASMA_desc A, double Af77, int LDA)

Detailed Description

This is the group of double real functions using the simple user interface.

Function Documentation

int PLASMA_dgebrd	(	PLASMA_enum	jobu,
		PLASMA_enum	jobvt,
		int	M,
		int	N,
		double *	A,
		int	LDA,
		double *	D,
		double *	E,
		double *	U,
		int	LDU,
		double *	VT,
		int	LDVT,
		PLASMA_desc *	descT
	)

PLASMA_dgebrd - computes the singular value decomposition (SVD) of a complex M-by-N matrix A, optionally computing the left and/or right singular vectors. The SVD is written

 A = U * SIGMA * transpose(V)

where SIGMA is an M-by-N matrix which is zero except for its min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA are the singular values of A; they are real and non-negative, and are returned in descending order. The first min(m,n) columns of U and V are the left and right singular vectors of A.

Note that the routine returns V**T, not V. Not LAPACK Compliant for now! Note: Only PlasmaNoVec supported!

Parameters:

[in]	jobu	Specifies options for computing all or part of the matrix U. Intended usage: = PlasmaVec: all M columns of U are returned in array U; = PlasmaNoVec: no columns of U (no left singular vectors) are computed. Note: Only PlasmaNoVec supported!
[in]	jobvt	Specifies options for computing all or part of the matrix V**T. Intended usage: = PlasmaVec: all M columns of U are returned in array U; = PlasmaNoVec: no columns of U (no left singular vectors) are computed. Note: Only PlasmaNoVec supported!
[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in,out]	A	On entry, the M-by-N matrix A. On exit, if JOBU = 'O', A is overwritten with the first min(m,n) columns of U (the left singular vectors, stored columnwise); if JOBVT = 'O', A is overwritten with the first min(m,n) rows of V**T (the right singular vectors, stored rowwise); if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A are destroyed.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[out]	S	The double precision singular values of A, sorted so that S(i) >= S(i+1).
[out]	U	(LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. If JOBU = 'A', U contains the M-by-M unitary matrix U; if JOBU = 'S', U contains the first min(m,n) columns of U (the left singular vectors, stored columnwise); if JOBU = 'N' or 'O', U is not referenced.
[in]	LDU	The leading dimension of the array U. LDU >= 1; if JOBU = 'S' or 'A', LDU >= M.
[out]	VT	If JOBVT = 'A', VT contains the N-by-N unitary matrix VT; if JOBVT = 'S', VT contains the first min(m,n) rows of VT (the right singular vectors, stored rowwise); if JOBVT = 'N' or 'O', VT is not referenced.
[in]	LDVT	The leading dimension of the array VT. LDVT >= 1; if JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
[in,out]	descT	On entry, descriptor as return by PLASMA_Alloc_Workspace_dgesvd On exit, contains auxiliary factorization data.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_dgebrd_Tile; PLASMA_dgebrd_Tile_Async; PLASMA_cgebrd; PLASMA_dgebrd; PLASMA_sgebrd

Definition at line 122 of file dgebrd.c.

References plasma_desc_t::m, max, min, plasma_desc_t::n, plasma_context_self(), plasma_desc_check(), plasma_desc_mat_free(), PLASMA_dgebrd_Tile_Async(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DGEBRD, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaNoVec, PlasmaVec, and plasma_sequence_t::status.

{
    int NB, IB, MT, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descU, descVT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dgebrd", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    
    /* Tune NB & IB depending on M & N; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DGEBRD, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dgebrd", "plasma_tune() failed");
        return status;
    }
    /* Set MT, NT */
    NB = PLASMA_NB;
    IB = PLASMA_IB;
    MT = (M%NB==0) ? (M/NB) : (M/NB+1);
    NT = (N%NB==0) ? (N/NB) : (N/NB+1);
    /* Check input arguments */
    if (jobu != PlasmaNoVec  && jobu !=PlasmaVec) {
        plasma_error("PLASMA_dgebrd", "illegal value of jobu");
        return -1;
    }
    if (jobvt != PlasmaNoVec && jobvt != PlasmaVec) {
        plasma_error("PLASMA_dgebrd", "illegal value of jobvt");
        return -2;
    }
    if (M < 0) {
        plasma_error("PLASMA_dgebrd", "illegal value of M");
        return -3;
    }
    if (N < 0) {
        plasma_error("PLASMA_dgebrd", "illegal value of N");
        return -4;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_dgebrd", "illegal value of LDA");
        return -6;
    }
    if (LDU < 1) {
        plasma_error("PLASMA_dgebrd", "illegal value of LDU");
        return -9;
    }
    if (LDVT < 1) {
        plasma_error("PLASMA_dgebrd", "illegal value of LDVT");
        return -11;
    }
    if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) || 
         ( descT->m != MT*IB ) || (descT->n != NT*NB) ) {
        plasma_error("PLASMA_dgebrd", "invalid T descriptor");
        return -12;
    }
    /* Quick return */
    if (min(M, N) == 0) {
        return PLASMA_SUCCESS;
    }
    if (jobu == PlasmaVec) {
        plasma_error("PLASMA_dgebrd", "computing the singular vectors is not supported in this version");
        return -1;
    }
    if (jobvt == PlasmaVec) {
        plasma_error("PLASMA_dgebrd", "computing the singular vectors is not supported in this version");
        return -2;
    }
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA,   A, NB, NB,  LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
        if (jobu == PlasmaVec){
            plasma_dooplap2tile( descU,   U, NB, NB,  LDU, M, 0, 0, M, M, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descU)));
        }
        if (jobvt == PlasmaVec){
            plasma_dooplap2tile( descVT, VT, NB, NB, LDVT, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descU)); plasma_desc_mat_free(&(descVT)));
        }
    } else {
        plasma_diplap2tile( descA,   A, NB, NB,  LDA, N, 0, 0, M, N);
        if (jobu == PlasmaVec){
            plasma_diplap2tile( descU,   U, NB, NB,  LDU, M, 0, 0, M, M);
        }
        if (jobvt == PlasmaVec){
            plasma_diplap2tile( descVT, VT, NB, NB, LDVT, N, 0, 0, N, N);
        }
    }
    /* Call the tile interface */
    PLASMA_dgebrd_Tile_Async(jobu, jobvt, &descA, D, E, &descU, &descVT, descT, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descA,   A, NB, NB,  LDA, N );
        if (jobu == PlasmaVec){
            plasma_dooptile2lap( descU,   U, NB, NB,  LDU, M );
        }
        if (jobvt == PlasmaVec){
            plasma_dooptile2lap( descVT, VT, NB, NB, LDVT, N );
        }
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        if (jobu == PlasmaVec){
            plasma_desc_mat_free(&descU);
        }
        if (jobvt == PlasmaVec){
            plasma_desc_mat_free(&descVT);
        }
    } else {
        plasma_diptile2lap( descA,   A, NB, NB,  LDA, N );
        if (jobu == PlasmaVec){
            plasma_diptile2lap( descU,   U, NB, NB,  LDU, M );
        }
        if (jobvt == PlasmaVec){
            plasma_diptile2lap( descVT, VT, NB, NB, LDVT, N );
        }
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dgelqf	(	int	M,
		int	N,
		double *	A,
		int	LDA,
		double *	T
	)

PLASMA_dgelqf - Computes the tile LQ factorization of a complex M-by-N matrix A: A = L * Q.

Parameters:

[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in,out]	A	On entry, the M-by-N matrix A. On exit, the elements on and below the diagonal of the array contain the m-by-min(M,N) lower trapezoidal matrix L (L is lower triangular if M <= N); the elements above the diagonal represent the unitary matrix Q as a product of elementary reflectors, stored by tiles.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[out]	T	On exit, auxiliary factorization data, required by PLASMA_dgelqs to solve the system of equations.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_dgelqf_Tile; PLASMA_dgelqf_Tile_Async; PLASMA_cgelqf; PLASMA_dgelqf; PLASMA_sgelqf; PLASMA_dgelqs

Definition at line 62 of file dgelqf.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), PLASMA_dgelqf_Tile_Async(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_DGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaRealDouble, plasma_sequence_t::status, and T.

{
    int NB, IB, IBNB, MT, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dgelqf", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (M < 0) {
        plasma_error("PLASMA_dgelqf", "illegal value of M");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_dgelqf", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_dgelqf", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (min(M, N) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_DGELS, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dgelqf", "plasma_tune() failed");
        return status;
    }
    /* Set MT & NT */
    NB   = PLASMA_NB;
    IB   = PLASMA_IB;
    IBNB = IB*NB;
    MT   = (M%NB==0) ? (M/NB) : (M/NB+1);
    NT   = (N%NB==0) ? (N/NB) : (N/NB+1);
    plasma_sequence_create(plasma, &sequence);
     if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
        descT = plasma_desc_init(
            PlasmaRealDouble,
            IB, NB, IBNB,
            MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
    }
    else {
        /* Double the size of T to accomodate the tree reduction phase */
        descT = plasma_desc_init(
            PlasmaRealDouble,
            IB, NB, IBNB,
            MT*IB, 2*NT*NB, 0, 0, MT*IB, 2*NT*NB);
    }
    descT.mat = T;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N);
    }
    /* Call the tile interface */
    PLASMA_dgelqf_Tile_Async(&descA, &descT, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dgelqs	(	int	M,
		int	N,
		int	NRHS,
		double *	A,
		int	LDA,
		double *	T,
		double *	B,
		int	LDB
	)

PLASMA_dgelqs - Compute a minimum-norm solution min || A*X - B || using the LQ factorization A = L*Q computed by PLASMA_dgelqf.

Parameters:

[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= M >= 0.
[in]	NRHS	The number of columns of B. NRHS >= 0.
[in]	A	Details of the LQ factorization of the original matrix A as returned by PLASMA_dgelqf.
[in]	LDA	The leading dimension of the array A. LDA >= M.
[in]	T	Auxiliary factorization data, computed by PLASMA_dgelqf.
[in,out]	B	On entry, the M-by-NRHS right hand side matrix B. On exit, the N-by-NRHS solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= N.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_dgelqs_Tile; PLASMA_dgelqs_Tile_Async; PLASMA_cgelqs; PLASMA_dgelqs; PLASMA_sgelqs; PLASMA_dgelqf

Definition at line 67 of file dgelqs.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), PLASMA_dgelqs_Tile_Async(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_DGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaRealDouble, plasma_sequence_t::status, and T.

{
    int NB, IB, IBNB, MT, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB, descT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dgelqs", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (M < 0) {
        plasma_error("PLASMA_dgelqs", "illegal value of M");
        return -1;
    }
    if (N < 0 || M > N) {
        plasma_error("PLASMA_dgelqs", "illegal value of N");
        return -2;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_dgelqs", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_dgelqs", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, max(1, N))) {
        plasma_error("PLASMA_dgelqs", "illegal value of LDB");
        return -8;
    }
    /* Quick return */
    if (min(M, min(N, NRHS)) == 0) {
        return PLASMA_SUCCESS;
    }
    /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_DGELS, M, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dgelqs", "plasma_tune() failed");
        return status;
    }
    /* Set MT, NT & NTRHS */
    NB    = PLASMA_NB;
    IB    = PLASMA_IB;
    IBNB  = IB*NB;
    MT    = (M%NB==0) ? (M/NB) : (M/NB+1);
    NT    = (N%NB==0) ? (N/NB) : (N/NB+1);
    plasma_sequence_create(plasma, &sequence);
    if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
        descT = plasma_desc_init(
            PlasmaRealDouble,
            IB, NB, IBNB,
            MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
    }
    else {
        /* Double the size of T to accomodate the tree reduction phase */
        descT = plasma_desc_init(
            PlasmaRealDouble,
            IB, NB, IBNB,
            MT*IB, 2*NT*NB, 0, 0, MT*IB, 2*NT*NB);
    }
    descT.mat = T;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, M, N   , plasma_desc_mat_free(&(descA)) );
        plasma_dooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, N,    0, 0, M, N   );
        plasma_diplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
    }
    /* Call the tile interface */
    PLASMA_dgelqs_Tile_Async(&descA, &descT, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_dooptile2lap( descB, B, NB, NB, LDB, NRHS );
    plasma_dynamic_sync();
    plasma_desc_mat_free(&descA);
    plasma_desc_mat_free(&descB);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_diptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dgels	(	PLASMA_enum	trans,
		int	M,
		int	N,
		int	NRHS,
		double *	A,
		int	LDA,
		double *	T,
		double *	B,
		int	LDB
	)

PLASMA_dgels - solves overdetermined or underdetermined linear systems involving an M-by-N matrix A using the QR or the LQ factorization of A. It is assumed that A has full rank. The following options are provided:

trans = PlasmaNoTrans and M >= N: find the least squares solution of an overdetermined

system, i.e., solve the least squares problem: minimize || B - A*X ||.

trans = PlasmaNoTrans and M < N: find the minimum norm solution of an underdetermined

system A * X = B.

Several right hand side vectors B and solution vectors X can be handled in a single call; they are stored as the columns of the M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix X.

Parameters:

[in]	trans	Intended usage: = PlasmaNoTrans: the linear system involves A; = PlasmaTrans: the linear system involves A**T. Currently only PlasmaNoTrans is supported.
[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrices B and X. NRHS >= 0.
[in,out]	A	On entry, the M-by-N matrix A. On exit, if M >= N, A is overwritten by details of its QR factorization as returned by PLASMA_dgeqrf; if M < N, A is overwritten by details of its LQ factorization as returned by PLASMA_dgelqf.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[out]	T	On exit, auxiliary factorization data.
[in,out]	B	On entry, the M-by-NRHS matrix B of right hand side vectors, stored columnwise; On exit, if return value = 0, B is overwritten by the solution vectors, stored columnwise: if M >= N, rows 1 to N of B contain the least squares solution vectors; the residual sum of squares for the solution in each column is given by the sum of squares of the modulus of elements N+1 to M in that column; if M < N, rows 1 to N of B contain the minimum norm solution vectors;
[in]	LDB	The leading dimension of the array B. LDB >= MAX(1,M,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_dgels_Tile; PLASMA_dgels_Tile_Async; PLASMA_cgels; PLASMA_dgels; PLASMA_sgels

Definition at line 94 of file dgels.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), PLASMA_dgels_Tile_Async(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_NOT_SUPPORTED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_DGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaNoTrans, PlasmaRealDouble, plasma_sequence_t::status, and T.

{
    int i, j;
    int NB, IB, IBNB, MT, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB, descT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dgels", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (trans != PlasmaNoTrans) {
        plasma_error("PLASMA_dgels", "only PlasmaNoTrans supported");
        return PLASMA_ERR_NOT_SUPPORTED;
    }
    if (M < 0) {
        plasma_error("PLASMA_dgels", "illegal value of M");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_dgels", "illegal value of N");
        return -3;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_dgels", "illegal value of NRHS");
        return -4;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_dgels", "illegal value of LDA");
        return -6;
    }
    if (LDB < max(1, max(M, N))) {
        plasma_error("PLASMA_dgels", "illegal value of LDB");
        return -9;
    }
    /* Quick return */
    if (min(M, min(N, NRHS)) == 0) {
        for (i = 0; i < max(M, N); i++)
            for (j = 0; j < NRHS; j++)
                B[j*LDB+i] = 0.0;
        return PLASMA_SUCCESS;
    }
    /* Tune NB & IB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DGELS, M, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dgels", "plasma_tune() failed");
        return status;
    }
    /* Set MT, NT & NTRHS */
    NB    = PLASMA_NB;
    IB    = PLASMA_IB;
    IBNB  = IB*NB;
    NT    = (N%NB==0) ? (N/NB) : (N/NB+1);
    MT    = (M%NB==0) ? (M/NB) : (M/NB+1);
    plasma_sequence_create(plasma, &sequence);
    if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
        descT = plasma_desc_init(
            PlasmaRealDouble,
            IB, NB, IBNB,
            MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
    }
    else {
        /* Double the size of T to accomodate the tree reduction phase */
        descT = plasma_desc_init(
            PlasmaRealDouble,
            IB, NB, IBNB,
            MT*IB, 2*NT*NB, 0, 0, MT*IB, 2*NT*NB);
    }
    descT.mat = T;
    if ( M >= N ) {
        if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
            plasma_dooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, M, N   , plasma_desc_mat_free(&(descA)) );
            plasma_dooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, M, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
        } else {
            plasma_diplap2tile( descA, A, NB, NB, LDA, N,    0, 0, M, N   );
            plasma_diplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, M, NRHS);
        }
    } else {
        if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
            plasma_dooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, M, N   , plasma_desc_mat_free(&(descA)) );
            plasma_dooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
        } else {
            plasma_diplap2tile( descA, A, NB, NB, LDA, N,    0, 0, M, N   );
            plasma_diplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
        }
    }
    /* Call the tile interface */
    PLASMA_dgels_Tile_Async(PlasmaNoTrans, &descA, &descT, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_dooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_diptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dgemm	(	PLASMA_enum	transA,
		PLASMA_enum	transB,
		int	M,
		int	N,
		int	K,
		double	alpha,
		double *	A,
		int	LDA,
		double *	B,
		int	LDB,
		double	beta,
		double *	C,
		int	LDC
	)

PLASMA_dgemm - Performs one of the matrix-matrix operations

$C = \alpha [op( A )\times op( B )] + \beta C$

,

where op( X ) is one of

op( X ) = X or op( X ) = X' or op( X ) = g( X' )

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters:

[in]	transA	Specifies whether the matrix A is transposed, not transposed or ugate transposed: = PlasmaNoTrans: A is not transposed; = PlasmaTrans: A is transposed; = PlasmaTrans: A is ugate transposed.
[in]	transB	Specifies whether the matrix B is transposed, not transposed or ugate transposed: = PlasmaNoTrans: B is not transposed; = PlasmaTrans: B is transposed; = PlasmaTrans: B is ugate transposed.
[in]	M	M specifies the number of rows of the matrix op( A ) and of the matrix C. M >= 0.
[in]	N	N specifies the number of columns of the matrix op( B ) and of the matrix C. N >= 0.
[in]	K	K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K >= 0.
[in]	alpha	alpha specifies the scalar alpha
[in]	A	A is a LDA-by-ka matrix, where ka is K when transA = PlasmaNoTrans, and is M otherwise.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[in]	B	B is a LDB-by-kb matrix, where kb is N when transB = PlasmaNoTrans, and is K otherwise.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).
[in]	beta	beta specifies the scalar beta
[in,out]	C	C is a LDC-by-N matrix. On exit, the array is overwritten by the M by N matrix ( alphaop( A )op( B ) + beta*C )
[in]	LDC	The leading dimension of the array C. LDC >= max(1,M).

Returns:

Return values:

PLASMA_SUCCESS successful exit

See also:: PLASMA_dgemm_Tile; PLASMA_cgemm; PLASMA_dgemm; PLASMA_sgemm

Definition at line 96 of file dgemm.c.

References max, plasma_context_self(), plasma_desc_mat_free(), PLASMA_dgemm_Tile_Async(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DGEMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaNoTrans, PlasmaTrans, and plasma_sequence_t::status.

{
    int NB;
    int Am, An, Bm, Bn;
    int status;
    PLASMA_desc descA, descB, descC;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dgemm", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ((transA != PlasmaNoTrans) && (transA != PlasmaTrans) && (transA != PlasmaTrans)) {
        plasma_error("PLASMA_dgemm", "illegal value of transA");
        return -1;
    }
    if ((transB != PlasmaNoTrans) && (transB != PlasmaTrans) && (transB != PlasmaTrans)) {
        plasma_error("PLASMA_dgemm", "illegal value of transB");
        return -2;
    }
    if ( transA == PlasmaNoTrans ) { 
        Am = M; An = K;
    } else {
        Am = K; An = M;
    }
    if ( transB == PlasmaNoTrans ) { 
        Bm = K; Bn = N;
    } else {
        Bm = N; Bn = K;
    }
    if (M < 0) {
        plasma_error("PLASMA_dgemm", "illegal value of M");
        return -3;
    }
    if (N < 0) {
        plasma_error("PLASMA_dgemm", "illegal value of N");
        return -4;
    }
    if (K < 0) {
        plasma_error("PLASMA_dgemm", "illegal value of N");
        return -5;
    }
    if (LDA < max(1, Am)) {
        plasma_error("PLASMA_dgemm", "illegal value of LDA");
        return -8;
    }
    if (LDB < max(1, Bm)) {
        plasma_error("PLASMA_dgemm", "illegal value of LDB");
        return -10;
    }
    if (LDC < max(1, M)) {
        plasma_error("PLASMA_dgemm", "illegal value of LDC");
        return -13;
    }
    /* Quick return */
    if (M == 0 || N == 0 ||
        ((alpha == (double)0.0 || K == 0) && beta == (double)1.0))
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_DGEMM, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dgemm", "plasma_tune() failed");
        return status;
    }
    /* Set MT & NT & KT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)) );
        plasma_dooplap2tile( descB, B, NB, NB, LDB, Bn, 0, 0, Bm, Bn, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
        plasma_dooplap2tile( descC, C, NB, NB, LDC, N,  0, 0, M,  N,  plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descC)));
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An );
        plasma_diplap2tile( descB, B, NB, NB, LDB, Bn, 0, 0, Bm, Bn );
        plasma_diplap2tile( descC, C, NB, NB, LDC, N,  0, 0, M,  N  );
    }
    /* Call the tile interface */
    PLASMA_dgemm_Tile_Async(
        transA, transB, alpha, &descA, &descB, beta, &descC, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
        plasma_desc_mat_free(&descC);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, An );
        plasma_diptile2lap( descB, B, NB, NB, LDB, Bn );
        plasma_diptile2lap( descC, C, NB, NB, LDC, N  );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dgeqrf	(	int	M,
		int	N,
		double *	A,
		int	LDA,
		double *	T
	)

PLASMA_dgeqrf - Computes the tile QR factorization of a complex M-by-N matrix A: A = Q * R.

Parameters:

[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in,out]	A	On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if M >= N); the elements below the diagonal represent the unitary matrix Q as a product of elementary reflectors stored by tiles.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[out]	T	On exit, auxiliary factorization data, required by PLASMA_dgeqrs to solve the system of equations.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_dgeqrf_Tile; PLASMA_dgeqrf_Tile_Async; PLASMA_cgeqrf; PLASMA_dgeqrf; PLASMA_sgeqrf; PLASMA_dgeqrs

Definition at line 61 of file dgeqrf.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), PLASMA_dgeqrf_Tile_Async(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_DGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaRealDouble, plasma_sequence_t::status, and T.

{
    int NB, IB, IBNB, MT, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dgeqrf", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (M < 0) {
        plasma_error("PLASMA_dgeqrf", "illegal value of M");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_dgeqrf", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_dgeqrf", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (min(M, N) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_DGELS, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dgeqrf", "plasma_tune() failed");
        return status;
    }
    /* Set MT & NT */
    NB   = PLASMA_NB;
    IB   = PLASMA_IB;
    IBNB = IB*NB;
    MT   = (M%NB==0) ? (M/NB) : (M/NB+1);
    NT   = (N%NB==0) ? (N/NB) : (N/NB+1);
    plasma_sequence_create(plasma, &sequence);
 
     if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
        descT = plasma_desc_init(
            PlasmaRealDouble,
            IB, NB, IBNB,
            MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
    }
    else {
        /* Double the size of T to accomodate the tree reduction phase */
        descT = plasma_desc_init(
            PlasmaRealDouble,
            IB, NB, IBNB,
            MT*IB, 2*NT*NB, 0, 0, MT*IB, 2*NT*NB);
    }
    descT.mat = T;
    
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N);
    }
    /* Call the tile interface */
    PLASMA_dgeqrf_Tile_Async(&descA, &descT, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dgeqrs	(	int	M,
		int	N,
		int	NRHS,
		double *	A,
		int	LDA,
		double *	T,
		double *	B,
		int	LDB
	)

PLASMA_dgeqrs - Compute a minimum-norm solution min || A*X - B || using the RQ factorization A = R*Q computed by PLASMA_dgeqrf.

Parameters:

[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= M >= 0.
[in]	NRHS	The number of columns of B. NRHS >= 0.
[in,out]	A	Details of the QR factorization of the original matrix A as returned by PLASMA_dgeqrf.
[in]	LDA	The leading dimension of the array A. LDA >= M.
[in]	T	Auxiliary factorization data, computed by PLASMA_dgeqrf.
[in,out]	B	On entry, the m-by-nrhs right hand side matrix B. On exit, the n-by-nrhs solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_dgeqrs_Tile; PLASMA_dgeqrs_Tile_Async; PLASMA_cgeqrs; PLASMA_dgeqrs; PLASMA_sgeqrs; PLASMA_dgeqrf

Definition at line 67 of file dgeqrs.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), PLASMA_dgeqrs_Tile_Async(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_DGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaRealDouble, plasma_sequence_t::status, and T.

{
    int NB, IB, IBNB, MT, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB, descT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dgeqrs", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (M < 0) {
        plasma_error("PLASMA_dgeqrs", "illegal value of M");
        return -1;
    }
    if (N < 0 || N > M) {
        plasma_error("PLASMA_dgeqrs", "illegal value of N");
        return -2;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_dgeqrs", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_dgeqrs", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, max(1, M))) {
        plasma_error("PLASMA_dgeqrs", "illegal value of LDB");
        return -8;
    }
    /* Quick return */
    if (min(M, min(N, NRHS)) == 0) {
        return PLASMA_SUCCESS;
    }
    /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_DGELS, M, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dgeqrs", "plasma_tune() failed");
        return status;
    }
    /* Set MT, NT & NTRHS */
    NB   = PLASMA_NB;
    IB   = PLASMA_IB;
    IBNB = IB*NB;
    MT   = (M%NB==0) ? (M/NB) : (M/NB+1);
    NT   = (N%NB==0) ? (N/NB) : (N/NB+1);
    plasma_sequence_create(plasma, &sequence);
    if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
        descT = plasma_desc_init(
            PlasmaRealDouble,
            IB, NB, IBNB,
            MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
    }
    else {
        /* Double the size of T to accomodate the tree reduction phase */
        descT = plasma_desc_init(
            PlasmaRealDouble,
            IB, NB, IBNB,
            MT*IB, 2*NT*NB, 0, 0, MT*IB, 2*NT*NB);
    }
    descT.mat = T;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, M, N   , plasma_desc_mat_free(&(descA)) );
        plasma_dooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, M, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, N,    0, 0, M, N   );
        plasma_diplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, M, NRHS);
    }
    /* Call the tile interface */
    PLASMA_dgeqrs_Tile_Async(&descA, &descT, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_dooptile2lap( descB, B, NB, NB, LDB, NRHS );
    plasma_dynamic_sync();
    plasma_desc_mat_free(&descA);
    plasma_desc_mat_free(&descB);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_diptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dgesv	(	int	N,
		int	NRHS,
		double *	A,
		int	LDA,
		int *	IPIV,
		double *	B,
		int	LDB
	)

PLASMA_dgesv - Computes the solution to a system of linear equations A * X = B, where A is an N-by-N matrix and X and B are N-by-NRHS matrices. The tile LU decomposition with partial tile pivoting and row interchanges is used to factor A. The factored form of A is then used to solve the system of equations A * X = B.

Parameters:

[in]	N	The number of linear equations, i.e., the order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in,out]	A	On entry, the N-by-N coefficient matrix A. On exit, the tile L and U factors from the factorization.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[out]	IPIV	On exit, the pivot indices that define the permutations.
[in,out]	B	On entry, the N-by-NRHS matrix of right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, so the solution could not be computed.

See also:: PLASMA_dgesv_Tile; PLASMA_dgesv_Tile_Async; PLASMA_cgesv; PLASMA_dgesv; PLASMA_sgesv

Definition at line 70 of file dgesv.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), PLASMA_dgesv_Tile_Async(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), PLASMA_FUNC_DGESV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_error("PLASMA_dgesv", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (N < 0) {
        plasma_error("PLASMA_dgesv", "illegal value of N");
        return -1;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_dgesv", "illegal value of NRHS");
        return -2;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_dgesv", "illegal value of LDA");
        return -4;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_dgesv", "illegal value of LDB");
        return -8;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DGESV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dgesv", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        plasma_dooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        plasma_diplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
    }
    /* Call the tile interface */
    PLASMA_dgesv_Tile_Async(&descA, IPIV, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_dooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_diptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dgesv_incpiv	(	int	N,
		int	NRHS,
		double *	A,
		int	LDA,
		double *	L,
		int *	IPIV,
		double *	B,
		int	LDB
	)

PLASMA_dgesv_incpiv - Computes the solution to a system of linear equations A * X = B, where A is an N-by-N matrix and X and B are N-by-NRHS matrices. The tile LU decomposition with partial tile pivoting and row interchanges is used to factor A. The factored form of A is then used to solve the system of equations A * X = B.

Parameters:

[in]	N	The number of linear equations, i.e., the order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in,out]	A	On entry, the N-by-N coefficient matrix A. On exit, the tile L and U factors from the factorization (not equivalent to LAPACK).
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[out]	L	On exit, auxiliary factorization data, related to the tile L factor, necessary to solve the system of equations.
[out]	IPIV	On exit, the pivot indices that define the permutations (not equivalent to LAPACK).
[in,out]	B	On entry, the N-by-NRHS matrix of right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, so the solution could not be computed.

See also:: PLASMA_dgesv_incpiv_Tile; PLASMA_dgesv_incpiv_Tile_Async; PLASMA_cgesv_incpiv; PLASMA_dgesv_incpiv; PLASMA_sgesv_incpiv

Definition at line 73 of file dgesv_incpiv.c.

References L, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), PLASMA_dgesv_incpiv_Tile_Async(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), PLASMA_FUNC_DGESV, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaRealDouble, and plasma_sequence_t::status.

{
    int NB, IB, IBNB, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB, descL;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_error("PLASMA_dgesv_incpiv", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (N < 0) {
        plasma_error("PLASMA_dgesv_incpiv", "illegal value of N");
        return -1;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_dgesv_incpiv", "illegal value of NRHS");
        return -2;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_dgesv_incpiv", "illegal value of LDA");
        return -4;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_dgesv_incpiv", "illegal value of LDB");
        return -8;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DGESV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dgesv_incpiv", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB    = PLASMA_NB;
    IB    = PLASMA_IB;
    IBNB  = IB*NB;
    NT    = (N%NB==0) ? (N/NB) : (N/NB+1);
    plasma_sequence_create(plasma, &sequence);
    descL = plasma_desc_init(
        PlasmaRealDouble,
        IB, NB, IBNB,
        NT*IB, NT*NB, 0, 0, NT*IB, NT*NB);
    descL.mat = L;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        plasma_dooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        plasma_diplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
    }
    /* Call the tile interface */
    PLASMA_dgesv_incpiv_Tile_Async(&descA, &descL, IPIV, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_dooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_diptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dgesvd	(	PLASMA_enum	jobu,
		PLASMA_enum	jobvt,
		int	M,
		int	N,
		double *	A,
		int	LDA,
		double *	S,
		double *	U,
		int	LDU,
		double *	VT,
		int	LDVT,
		PLASMA_desc *	descT
	)

PLASMA_dgesvd - computes the singular value decomposition (SVD) of a complex M-by-N matrix A, optionally computing the left and/or right singular vectors. The SVD is written

 A = U * SIGMA * transpose(V)

where SIGMA is an M-by-N matrix which is zero except for its min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA are the singular values of A; they are real and non-negative, and are returned in descending order. The first min(m,n) columns of U and V are the left and right singular vectors of A.

Note that the routine returns V**T, not V. Not LAPACK Compliant for now! Note: Only PlasmaNoVec supported!

Parameters:

[in]	jobu	Specifies options for computing all or part of the matrix U. Intended usage: = PlasmaVec: all M columns of U are returned in array U; = PlasmaNoVec: no columns of U (no left singular vectors) are computed. Note: Only PlasmaNoVec supported!
[in]	jobvt	Specifies options for computing all or part of the matrix V**T. Intended usage: = PlasmaVec: all M columns of U are returned in array U; = PlasmaNoVec: no columns of U (no left singular vectors) are computed. Note: Only PlasmaNoVec supported!
[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in,out]	A	On entry, the M-by-N matrix A. On exit, if JOBU = 'O', A is overwritten with the first min(m,n) columns of U (the left singular vectors, stored columnwise); if JOBVT = 'O', A is overwritten with the first min(m,n) rows of V**T (the right singular vectors, stored rowwise); if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A are destroyed.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[out]	S	The double precision singular values of A, sorted so that S(i) >= S(i+1).
[out]	U	(LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. If JOBU = 'A', U contains the M-by-M unitary matrix U; if JOBU = 'S', U contains the first min(m,n) columns of U (the left singular vectors, stored columnwise); if JOBU = 'N' or 'O', U is not referenced.
[in]	LDU	The leading dimension of the array U. LDU >= 1; if JOBU = 'S' or 'A', LDU >= M.
[out]	VT	If JOBVT = 'A', VT contains the N-by-N unitary matrix VT; if JOBVT = 'S', VT contains the first min(m,n) rows of VT (the right singular vectors, stored rowwise); if JOBVT = 'N' or 'O', VT is not referenced.
[in]	LDVT	The leading dimension of the array VT. LDVT >= 1; if JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
[in,out]	descT	On entry, descriptor as return by PLASMA_Alloc_Workspace_dgesvd On exit, contains auxiliary factorization data.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_dgesvd_Tile; PLASMA_dgesvd_Tile_Async; PLASMA_cgesvd; PLASMA_dgesvd; PLASMA_sgesvd

Definition at line 123 of file dgesvd.c.

References plasma_desc_t::m, max, min, plasma_desc_t::n, plasma_context_self(), plasma_desc_check(), plasma_desc_mat_free(), PLASMA_dgesvd_Tile_Async(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DGESVD, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaNoVec, PlasmaVec, and plasma_sequence_t::status.

{
    int NB, IB, MT, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descU, descVT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dgesvd", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    
    /* Tune NB & IB depending on M & N; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DGESVD, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dgesvd", "plasma_tune() failed");
        return status;
    }
    /* Set MT, NT */
    NB = PLASMA_NB;
    IB = PLASMA_IB;
    MT = (M%NB==0) ? (M/NB) : (M/NB+1);
    NT = (N%NB==0) ? (N/NB) : (N/NB+1);
    /* Check input arguments */
    if (jobu != PlasmaNoVec  && jobu !=PlasmaVec) {
        plasma_error("PLASMA_dgesvd", "illegal value of jobu");
        return -1;
    }
    if (jobvt != PlasmaNoVec && jobvt != PlasmaVec) {
        plasma_error("PLASMA_dgesvd", "illegal value of jobvt");
        return -2;
    }
    if (M < 0) {
        plasma_error("PLASMA_dgesvd", "illegal value of M");
        return -3;
    }
    if (N < 0) {
        plasma_error("PLASMA_dgesvd", "illegal value of N");
        return -4;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_dgesvd", "illegal value of LDA");
        return -6;
    }
    if (LDU < 1) {
        plasma_error("PLASMA_dgesvd", "illegal value of LDU");
        return -9;
    }
    if (LDVT < 1) {
        plasma_error("PLASMA_dgesvd", "illegal value of LDVT");
        return -11;
    }
    if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) || 
         ( descT->m != MT*IB ) || (descT->n != NT*NB) ) {
        plasma_error("PLASMA_dgesvd", "invalid T descriptor");
        return -12;
    }
    /* Quick return */
    if (min(M, N) == 0) {
        return PLASMA_SUCCESS;
    }
    if (jobu == PlasmaVec) {
        plasma_error("PLASMA_dgesvd", "computing the singular vectors is not supported in this version");
        return -1;
    }
    if (jobvt == PlasmaVec) {
        plasma_error("PLASMA_dgesvd", "computing the singular vectors is not supported in this version");
        return -2;
    }
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA,   A, NB, NB,  LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
        if (jobu == PlasmaVec){
            plasma_dooplap2tile( descU,   U, NB, NB,  LDU, M, 0, 0, M, M, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descU)));
        }
        if (jobvt == PlasmaVec){
            plasma_dooplap2tile( descVT, VT, NB, NB, LDVT, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descU)); plasma_desc_mat_free(&(descVT)));
        }
    } else {
        plasma_diplap2tile( descA,   A, NB, NB,  LDA, N, 0, 0, M, N);
        if (jobu == PlasmaVec){
            plasma_diplap2tile( descU,   U, NB, NB,  LDU, M, 0, 0, M, M);
        }
        if (jobvt == PlasmaVec){
            plasma_diplap2tile( descVT, VT, NB, NB, LDVT, N, 0, 0, N, N);
        }
    }
    /* Call the tile interface */
    PLASMA_dgesvd_Tile_Async(jobu, jobvt, &descA, S, &descU, &descVT, descT, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descA,   A, NB, NB,  LDA, N );
        if (jobu == PlasmaVec){
            plasma_dooptile2lap( descU,   U, NB, NB,  LDU, M );
        }
        if (jobvt == PlasmaVec){
            plasma_dooptile2lap( descVT, VT, NB, NB, LDVT, N );
        }
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        if (jobu == PlasmaVec){
            plasma_desc_mat_free(&descU);
        }
        if (jobvt == PlasmaVec){
            plasma_desc_mat_free(&descVT);
        }
    } else {
        plasma_diptile2lap( descA,   A, NB, NB,  LDA, N );
        if (jobu == PlasmaVec){
            plasma_diptile2lap( descU,   U, NB, NB,  LDU, M );
        }
        if (jobvt == PlasmaVec){
            plasma_diptile2lap( descVT, VT, NB, NB, LDVT, N );
        }
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dgetrf	(	int	M,
		int	N,
		double *	A,
		int	LDA,
		int *	IPIV
	)

PLASMA_dgetrf - Computes an LU factorization of a general M-by-N matrix A using the tile LU algorithm with partial tile pivoting with row interchanges.

Parameters:

[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in,out]	A	On entry, the M-by-N matrix to be factored. On exit, the tile factors L and U from the factorization.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[out]	IPIV	The pivot indices that define the permutations.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.

See also:: PLASMA_dgetrf_Tile; PLASMA_dgetrf_Tile_Async; PLASMA_cgetrf; PLASMA_dgetrf; PLASMA_sgetrf

Definition at line 62 of file dgetrf.c.

References A, plasma_desc_t::mat, max, plasma_desc_t::mb, min, plasma_desc_t::mt, plasma_context_self(), plasma_desc_init(), plasma_dynamic_call_4, plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DGESV, PLASMA_NB, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, plasma_tune(), PlasmaRealDouble, and plasma_sequence_t::status.

{
    int NB, NBNB, minMN;
    int status;
    PLASMA_desc descA ;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dgetrf", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (M < 0) {
        plasma_error("PLASMA_dgetrf", "illegal value of M");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_dgetrf", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_dgetrf", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (min(M, N) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_DGESV, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dgetrf", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB   = PLASMA_NB;
    NBNB = NB*NB;
    plasma_sequence_create(plasma, &sequence);
    descA = plasma_desc_init(
        PlasmaRealDouble,
        NB, NB, NBNB,
        LDA, N, 0, 0, M, N);
    descA.mat = A;
    minMN = min(M, N);
    memset(IPIV, 0, minMN*sizeof(int));
    /* Call the tile interface */
    plasma_dynamic_call_4(plasma_pdgetrf_reclap,
        PLASMA_desc, descA,
        int*, IPIV,
        PLASMA_sequence*, sequence,
        PLASMA_request*, &request);
    plasma_dynamic_sync();
    /*
     * Generate the correct IPIV (Has to be moved in a task)
     */
    { 
        int i, inc, tmp, j;
        for(i=1; i<descA.mt; i++) {
            inc = i*descA.mb;
            tmp = min( minMN - inc, descA.mb);
            if ( tmp < 1 )
              break;
            
            for (j=0; j<tmp; j++)
                IPIV[inc+j] = IPIV[inc+j] + inc;
        }
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dgetrf_incpiv	(	int	M,
		int	N,
		double *	A,
		int	LDA,
		double *	L,
		int *	IPIV
	)

PLASMA_dgetrf_incpiv - Computes an LU factorization of a general M-by-N matrix A using the tile LU algorithm with partial tile pivoting with row interchanges.

Parameters:

[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in,out]	A	On entry, the M-by-N matrix to be factored. On exit, the tile factors L and U from the factorization.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[out]	L	On exit, auxiliary factorization data, related to the tile L factor, required by PLASMA_dgetrs_incpiv to solve the system of equations.
[out]	IPIV	The pivot indices that define the permutations (not equivalent to LAPACK).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.

See also:: PLASMA_dgetrf_incpiv_Tile; PLASMA_dgetrf_incpiv_Tile_Async; PLASMA_cgetrf_incpiv; PLASMA_dgetrf_incpiv; PLASMA_sgetrf_incpiv; PLASMA_dgetrs_incpiv

Definition at line 65 of file dgetrf_incpiv.c.

References L, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), PLASMA_dgetrf_incpiv_Tile_Async(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DGESV, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaRealDouble, and plasma_sequence_t::status.

{
    int NB, IB, IBNB, MT, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descL;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dgetrf_incpiv", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (M < 0) {
        plasma_error("PLASMA_dgetrf_incpiv", "illegal value of M");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_dgetrf_incpiv", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_dgetrf_incpiv", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (min(M, N) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_DGESV, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dgetrf_incpiv", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB   = PLASMA_NB;
    IB   = PLASMA_IB;
    IBNB = IB*NB;
    MT   = (M%NB==0) ? (M/NB) : (M/NB+1);
    NT   = (N%NB==0) ? (N/NB) : (N/NB+1);
    plasma_sequence_create(plasma, &sequence);
    descL = plasma_desc_init(
        PlasmaRealDouble,
        IB, NB, IBNB,
        MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
    descL.mat = L;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N);
    }
    /* Call the tile interface */
    PLASMA_dgetrf_incpiv_Tile_Async(&descA, &descL, IPIV, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dgetri	(	int	N,
		double *	A,
		int	LDA,
		int *	IPIV
	)

PLASMA_dgetri - Computes the inverse of a matrix using the LU factorization computed by PLASMA_dgetrf. This method inverts U and then computes inv(A) by solving the system inv(A)*L = inv(U) for inv(A).

Parameters:

[in]	N	The order of the matrix A. N >= 0.
[in,out]	A	On entry, the triangular factor L or U from the factorization A = PLU as computed by PLASMA_dgetrf. On exit, if return value = 0, the inverse of the original matrix A.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in]	IPIV	The pivot indices that define the permutations as returned by PLASMA_dgetrf.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, the (i,i) element of the factor U is exactly zero; The matrix is singular and its inverse could not be computed.

See also:: PLASMA_dgetri_Tile; PLASMA_dgetri_Tile_Async; PLASMA_cgetri; PLASMA_dgetri; PLASMA_sgetri; PLASMA_dgetrf

Definition at line 63 of file dgetri.c.

References max, PLASMA_Alloc_Workspace_dgetri_Tile_Async(), plasma_context_self(), plasma_desc_mat_free(), PLASMA_dgetri_Tile_Async(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DGESV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    PLASMA_desc descW;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dgetri", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (N < 0) {
        plasma_error("PLASMA_dgetri", "illegal value of N");
        return -1;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_dgetri", "illegal value of LDA");
        return -3;
    }
    /* Quick return */
    if (max(N, 0) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DGESV, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dgetri", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB   = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_diplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, N, N);
    }
    /* Allocate workspace */
    PLASMA_Alloc_Workspace_dgetri_Tile_Async(&descA, &descW);
    /* Call the tile interface */
    PLASMA_dgetri_Tile_Async(&descA, IPIV, &descW, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    plasma_desc_mat_free(&(descW));
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dgetrs	(	PLASMA_enum	trans,
		int	N,
		int	NRHS,
		double *	A,
		int	LDA,
		int *	IPIV,
		double *	B,
		int	LDB
	)

PLASMA_dgetrs - Solves a system of linear equations A * X = B, with a general N-by-N matrix A using the tile LU factorization computed by PLASMA_dgetrf.

Parameters:

[in]	trans	Intended to specify the the form of the system of equations: = PlasmaNoTrans: A * X = B (No transpose) = PlasmaTrans: A*T X = B (Transpose) = PlasmaTrans: A*T X = B (Conjugate transpose)
[in]	N	The order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]	A	The tile factors L and U from the factorization, computed by PLASMA_dgetrf.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in]	IPIV	The pivot indices from PLASMA_dgetrf.
[in,out]	B	On entry, the N-by-NRHS matrix of right hand side matrix B. On exit, the solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS successful exit

Returns:: <0 if -i, the i-th argument had an illegal value

See also:: PLASMA_dgetrs_Tile; PLASMA_dgetrs_Tile_Async; PLASMA_cgetrs; PLASMA_dgetrs; PLASMA_sgetrs; PLASMA_dgetrf

Definition at line 72 of file dgetrs.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), PLASMA_dgetrs_Tile_Async(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DGESV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaNoTrans, PlasmaTrans, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dgetrs", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ( (trans != PlasmaNoTrans) && 
         (trans != PlasmaTrans)   &&
         (trans != PlasmaTrans)) {
        plasma_error("PLASMA_dgetrs", "illegal value of trans");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_dgetrs", "illegal value of N");
        return -2;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_dgetrs", "illegal value of NRHS");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_dgetrs", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_dgetrs", "illegal value of LDB");
        return -8;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_DGESV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dgetrs", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        plasma_dooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        plasma_diplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
    }
    /* Call the tile interface */
    PLASMA_dgetrs_Tile_Async(trans, &descA, IPIV, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_diptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

int PLASMA_dgetrs_incpiv	(	PLASMA_enum	trans,
		int	N,
		int	NRHS,
		double *	A,
		int	LDA,
		double *	L,
		int *	IPIV,
		double *	B,
		int	LDB
	)

PLASMA_dgetrs_incpiv - Solves a system of linear equations A * X = B, with a general N-by-N matrix A using the tile LU factorization computed by PLASMA_dgetrf_incpiv.

Parameters:

[in]	trans	Intended to specify the the form of the system of equations: = PlasmaNoTrans: A * X = B (No transpose) = PlasmaTrans: A*T X = B (Transpose) = PlasmaTrans: A*T X = B (Conjugate transpose) Currently only PlasmaNoTrans is supported.
[in]	N	The order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]	A	The tile factors L and U from the factorization, computed by PLASMA_dgetrf_incpiv.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in]	L	Auxiliary factorization data, related to the tile L factor, computed by PLASMA_dgetrf_incpiv.
[in]	IPIV	The pivot indices from PLASMA_dgetrf_incpiv (not equivalent to LAPACK).
[in,out]	B	On entry, the N-by-NRHS matrix of right hand side matrix B. On exit, the solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS successful exit

Returns:: <0 if -i, the i-th argument had an illegal value

See also:: PLASMA_dgetrs_incpiv_Tile; PLASMA_dgetrs_incpiv_Tile_Async; PLASMA_cgetrs_incpiv; PLASMA_dgetrs_incpiv; PLASMA_sgetrs_incpiv; PLASMA_dgetrf_incpiv

Definition at line 75 of file dgetrs_incpiv.c.

References L, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), PLASMA_dgetrs_incpiv_Tile_Async(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_NOT_SUPPORTED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DGESV, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaNoTrans, PlasmaRealDouble, and plasma_sequence_t::status.

{
    int NB, IB, IBNB, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB, descL;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dgetrs_incpiv", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (trans != PlasmaNoTrans) {
        plasma_error("PLASMA_dgetrs_incpiv", "only PlasmaNoTrans supported");
        return PLASMA_ERR_NOT_SUPPORTED;
    }
    if (N < 0) {
        plasma_error("PLASMA_dgetrs_incpiv", "illegal value of N");
        return -2;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_dgetrs_incpiv", "illegal value of NRHS");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_dgetrs_incpiv", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_dgetrs_incpiv", "illegal value of LDB");
        return -9;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_DGESV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dgetrs_incpiv", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB    = PLASMA_NB;
    IB    = PLASMA_IB;
    IBNB  = IB*NB;
    NT    = (N%NB==0) ? (N/NB) : (N/NB+1);
    plasma_sequence_create(plasma, &sequence);
    descL = plasma_desc_init(
        PlasmaRealDouble,
        IB, NB, IBNB,
        NT*IB, NT*NB, 0, 0, NT*IB, NT*NB);
    descL.mat = L;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        plasma_dooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        plasma_diplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
    }
    /* Call the tile interface */
    PLASMA_dgetrs_incpiv_Tile_Async(&descA, &descL, IPIV, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_diptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dlacpy	(	PLASMA_enum	uplo,
		int	M,
		int	N,
		double *	A,
		int	LDA,
		double *	B,
		int	LDB
	)

PLASMA_dlacpy copies all or part of a two-dimensional matrix A to another matrix B

Parameters:

[in]	uplo	Specifies the part of the matrix A to be copied to B. = PlasmaUpperLower: All the matrix A = PlasmaUpper: Upper triangular part = PlasmaLower: Lower triangular part
[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in]	A	The M-by-N matrix A. If uplo = PlasmaUpper, only the upper trapezium is accessed; if UPLO = PlasmaLower, only the lower trapezium is accessed.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[out]	B	The M-by-N matrix B. On exit, B = A in the locations specified by UPLO.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,M).

See also:: PLASMA_dlacpy_Tile; PLASMA_dlacpy_Tile_Async; PLASMA_clacpy; PLASMA_dlacpy; PLASMA_slacpy

Definition at line 62 of file dlacpy.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, PLASMA_dlacpy_Tile_Async(), plasma_dooplap2tile, plasma_dooptile2lap, plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DGEMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLower, PlasmaUpper, and PlasmaUpperLower.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dlacpy", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ( (uplo != PlasmaUpperLower) && 
         (uplo != PlasmaUpper) &&
         (uplo != PlasmaLower) ) {
        plasma_error("PLASMA_dlacpy", "illegal value of uplo");
        return -1;
    }
    if (M < 0) {
        plasma_error("PLASMA_dlacpy", "illegal value of M");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_dlacpy", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_dlacpy", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, M)) {
        plasma_error("PLASMA_dlacpy", "illegal value of LDB");
        return -7;
    }
    /* Quick return */
    if (min(N, M) == 0)
      return (double)0.0;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DGEMM, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dlacpy", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB   = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
        plasma_dooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)) );
    } else {
        plasma_diplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, M, N);
        plasma_diplap2tile(  descB, B, NB, NB, LDA, N, 0, 0, M, N);
    }
    /* Call the tile interface */
    PLASMA_dlacpy_Tile_Async(uplo, &descA, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descB, B, NB, NB, LDB, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_diptile2lap( descB, B, NB, NB, LDB, N );
        plasma_dynamic_sync();
    }
    plasma_sequence_destroy(plasma, sequence);
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

double PLASMA_dlange	(	PLASMA_enum	norm,
		int	M,
		int	N,
		double *	A,
		int	LDA,
		double *	work
	)

PLASMA_dlange returns the value

dlange = ( max(abs(A(i,j))), NORM = PlasmaMaxNorm ( ( norm1(A), NORM = PlasmaOneNorm ( ( normI(A), NORM = PlasmaInfNorm ( ( normF(A), NORM = PlasmaFrobeniusNorm

where norm1 denotes the one norm of a matrix (maximum column sum), normI denotes the infinity norm of a matrix (maximum row sum) and normF denotes the Frobenius norm of a matrix (square root of sum of squares). Note that max(abs(A(i,j))) is not a consistent matrix norm.

Parameters:

[in]	norm	= PlasmaMaxNorm: Max norm = PlasmaOneNorm: One norm = PlasmaInfNorm: Infinity norm = PlasmaFrobeniusNorm: Frobenius norm
[in]	M	The number of rows of the matrix A. M >= 0. When M = 0, the returned value is set to zero.
[in]	N	The number of columns of the matrix A. N >= 0. When N = 0, the returned value is set to zero.
[in]	A	The M-by-N matrix A.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[in]	work	double precision array of dimension (MAX(1,LWORK)), where LWORK >= M when NORM = PlasmaInfNorm; otherwise, WORK is not referenced.

Returns:

Return values:

the	norm described above.

See also:: PLASMA_dlange_Tile; PLASMA_dlange_Tile_Async; PLASMA_clange; PLASMA_dlange; PLASMA_slange

Definition at line 78 of file dlange.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, PLASMA_dlange_Tile_Async(), plasma_dooplap2tile, plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DGEMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaFrobeniusNorm, PlasmaInfNorm, PlasmaMaxNorm, and PlasmaOneNorm.

{
    int NB;
    int status;
    double value;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dlange", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ( (norm != PlasmaMaxNorm) && (norm != PlasmaOneNorm)
        && (norm != PlasmaInfNorm) && (norm != PlasmaFrobeniusNorm) ) {
        plasma_error("PLASMA_dlange", "illegal value of norm");
        return -1;
    }
    if (M < 0) {
        plasma_error("PLASMA_dlange", "illegal value of M");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_dlange", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_dlange", "illegal value of LDA");
        return -5;
    }
    /* Quick return */
    if (min(N, M) == 0)
      return (double)0.0;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DGEMM, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dlange", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB   = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_diplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, M, N);
    }
    /* Call the tile interface */
    PLASMA_dlange_Tile_Async(norm, &descA, work, &value, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    plasma_sequence_destroy(plasma, sequence);
    return value;
}

Here is the call graph for this function:

double PLASMA_dlansy	(	PLASMA_enum	norm,
		PLASMA_enum	uplo,
		int	N,
		double *	A,
		int	LDA,
		double *	work
	)

PLASMA_dlansy returns the value

dlansy = ( max(abs(A(i,j))), NORM = PlasmaMaxNorm ( ( norm1(A), NORM = PlasmaOneNorm ( ( normI(A), NORM = PlasmaInfNorm ( ( normF(A), NORM = PlasmaFrobeniusNorm

where norm1 denotes the one norm of a matrix (maximum column sum), normI denotes the infinity norm of a matrix (maximum row sum) and normF denotes the Frobenius norm of a matrix (square root of sum of squares). Note that max(abs(A(i,j))) is not a consistent matrix norm.

Parameters:

[in]	norm	= PlasmaMaxNorm: Max norm = PlasmaOneNorm: One norm = PlasmaInfNorm: Infinity norm = PlasmaFrobeniusNorm: Frobenius norm
[in]	uplo	= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The number of columns/rows of the matrix A. N >= 0. When N = 0, the returned value is set to zero.
[in]	A	The N-by-N matrix A.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in]	work	double precision array of dimension PLASMA_SIZE is PLASMA_STATIC_SCHEDULING is used, and NULL otherwise.

Returns:

Return values:

the	norm described above.

See also:: PLASMA_dlansy_Tile; PLASMA_dlansy_Tile_Async; PLASMA_clansy; PLASMA_dlansy; PLASMA_slansy

Definition at line 77 of file dlansy.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, PLASMA_dlansy_Tile_Async(), plasma_dooplap2tile, plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DGEMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaFrobeniusNorm, PlasmaInfNorm, PlasmaLower, PlasmaMaxNorm, PlasmaOneNorm, and PlasmaUpper.

{
    int NB;
    int status;
    double value;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dlansy", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ( (norm != PlasmaMaxNorm) && (norm != PlasmaOneNorm)
        && (norm != PlasmaInfNorm) && (norm != PlasmaFrobeniusNorm) ) {
        plasma_error("PLASMA_dlansy", "illegal value of norm");
        return -1;
    }
    if ( (uplo != PlasmaUpper) && (uplo != PlasmaLower) ) {
        plasma_error("PLASMA_dlansy", "illegal value of uplo");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_dlansy", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_dlansy", "illegal value of LDA");
        return -5;
    }
    /* Quick return */
    if ( N == 0)
      return (double)0.0;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DGEMM, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dlansy", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB   = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_diplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, N, N);
    }
    /* Call the tile interface */
    PLASMA_dlansy_Tile_Async(norm, uplo, &descA, work, &value, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    plasma_sequence_destroy(plasma, sequence);
    return value;
}

Here is the call graph for this function:

int PLASMA_dLapack_to_Tile	(	double *	Af77,
		int	LDA,
		PLASMA_desc *	A
	)

PLASMA_dLapack_to_Tile - Conversion from LAPACK layout to tile layout.

Parameters:

[in]	Af77	LAPACK matrix.
[in]	LDA	The leading dimension of the matrix Af77.
[in,out]	A	Descriptor of the PLASMA matrix in tile layout. If PLASMA_TRANSLATION_MODE is set to PLASMA_INPLACE, A->mat is not used and set to Af77 when returns, else if PLASMA_TRANSLATION_MODE is set to PLASMA_OUTOFPLACE, A->mat has to be allocated before.

Returns:

Return values:

PLASMA_SUCCESS successful exit

See also:: PLASMA_dLapack_to_Tile_Async; PLASMA_dTile_to_Lapack; PLASMA_cLapack_to_Tile; PLASMA_dLapack_to_Tile; PLASMA_sLapack_to_Tile

Definition at line 55 of file dtile.c.

References A, plasma_context_self(), plasma_desc_check(), plasma_dynamic_sync, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_5, plasma_pdlapack_to_tile(), plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, and plasma_sequence_t::status.

{
    PLASMA_desc descA = *A;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request;
    int status;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dLapack_to_Tile", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check descriptor for correctness */
    if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dLapack_to_Tile", "invalid descriptor");
        return PLASMA_ERR_ILLEGAL_VALUE;
    }
    plasma_sequence_create(plasma, &sequence);
    plasma_parallel_call_5(
        plasma_pdlapack_to_tile,
        double*, Af77,
        int, LDA,
        PLASMA_desc, descA,
        PLASMA_sequence*, sequence,
        PLASMA_request*, &request);
    plasma_dynamic_sync();
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dlaset	(	PLASMA_enum	uplo,
		int	M,
		int	N,
		double	alpha,
		double	beta,
		double *	A,
		int	LDA
	)

PLASMA_dlaset copies all or part of a two-dimensional matrix A to another matrix B

Parameters:

[in]	uplo	Specifies the part of the matrix A to be copied to B. = PlasmaUpperLower: All the matrix A = PlasmaUpper: Upper triangular part is set. The lower triangle is unchanged. = PlasmaLower: Lower triangular part is set. The upper triangle is unchange.
[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in]	alpha	All the offdiagonal array elements are set to alpha.
[in]	beta	All the diagonal array elements are set to beta.
[in,out]	A	On entry, the m by n matrix A. On exit, A(i,j) = ALPHA, 1 <= i <= m, 1 <= j <= n, i.ne.j; A(i,i) = BETA , 1 <= i <= min(m,n)
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).

See also:: PLASMA_dlaset_Tile; PLASMA_dlaset_Tile_Async; PLASMA_claset; PLASMA_dlaset; PLASMA_slaset

Definition at line 63 of file dlaset.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, PLASMA_dlaset_Tile_Async(), plasma_dooplap2tile, plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DGEMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLower, PlasmaUpper, and PlasmaUpperLower.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dlaset", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ( (uplo != PlasmaUpperLower) && 
         (uplo != PlasmaUpper) &&
         (uplo != PlasmaLower) ) {
        plasma_error("PLASMA_dlaset", "illegal value of uplo");
        return -1;
    }
    if (M < 0) {
        plasma_error("PLASMA_dlaset", "illegal value of M");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_dlaset", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_dlaset", "illegal value of LDA");
        return -5;
    }
    /* Quick return */
    if (min(N, M) == 0)
      return (double)0.0;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DGEMM, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dlaset", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB   = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_diplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, M, N);
    }
    /* Call the tile interface */
    PLASMA_dlaset_Tile_Async(uplo, alpha, beta, &descA, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    plasma_sequence_destroy(plasma, sequence);
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

int PLASMA_dlaswp	(	int	N,
		double *	A,
		int	LDA,
		int	K1,
		int	K2,
		int *	IPIV,
		int	INCX
	)

PLASMA_dlaswp - performs a series of row interchanges on the matrix A. One row interchange is initiated for each of rows K1 through K2 of A.

Parameters:

[in]	N	The order of the matrix A. N >= 0.
[in]	A	The tile factors L and U from the factorization, computed by PLASMA_dgetrf.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in]	K1	The first element of IPIV for which a row interchange will be done.
[in]	K2	The last element of IPIV for which a row interchange will be done.
[in]	IPIV	The pivot indices from PLASMA_dgetrf.
[in]	INCX	The increment between successive values of IPIV. If IPIV is negative, the pivots are applied in reverse order.

Returns:

Return values:

PLASMA_SUCCESS successful exit

Returns:: <0 if -i, the i-th argument had an illegal value

See also:: PLASMA_dlaswp_Tile; PLASMA_dlaswp_Tile_Async; PLASMA_claswp; PLASMA_dlaswp; PLASMA_slaswp; PLASMA_dgetrf

Definition at line 66 of file dlaswp.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, PLASMA_dlaswp_Tile_Async(), plasma_dooplap2tile, plasma_dooptile2lap, plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DGESV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dlaswp", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (N < 0) {
        plasma_error("PLASMA_dlaswp", "illegal value of N");
        return -1;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_dlaswp", "illegal value of LDA");
        return -3;
    }
    /* Quick return */
    if ( N == 0 )
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_DGESV, LDA, N, N);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dlaswp", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K2, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K2, N);
    }
    /* Call the tile interface */
    PLASMA_dlaswp_Tile_Async(&descA, K1, K2, IPIV, INCX, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dlaswpc	(	int	N,
		double *	A,
		int	LDA,
		int	K1,
		int	K2,
		int *	IPIV,
		int	INCX
	)

PLASMA_dlaswpc - performs a series of row interchanges on the matrix A. One row interchange is initiated for each of rows K1 through K2 of A.

Parameters:

[in]	N	The order of the matrix A. N >= 0.
[in]	A	The tile factors L and U from the factorization, computed by PLASMA_dgetrf.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in]	K1	The first element of IPIV for which a row interchange will be done.
[in]	K2	The last element of IPIV for which a row interchange will be done.
[in]	IPIV	The pivot indices from PLASMA_dgetrf.
[in]	INCX	The increment between successive values of IPIV. If IPIV is negative, the pivots are applied in reverse order.

Returns:

Return values:

PLASMA_SUCCESS successful exit

Returns:: <0 if -i, the i-th argument had an illegal value

See also:: PLASMA_dlaswpc_Tile; PLASMA_dlaswpc_Tile_Async; PLASMA_claswpc; PLASMA_dlaswpc; PLASMA_slaswpc; PLASMA_dgetrf

Definition at line 66 of file dlaswpc.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, PLASMA_dlaswpc_Tile_Async(), plasma_dooplap2tile, plasma_dooptile2lap, plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DGESV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dlaswpc", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (N < 0) {
        plasma_error("PLASMA_dlaswpc", "illegal value of N");
        return -1;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_dlaswpc", "illegal value of LDA");
        return -3;
    }
    /* Quick return */
    if ( N == 0 )
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_DGESV, LDA, N, N);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dlaswpc", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K2, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K2, N);
    }
    /* Call the tile interface */
    PLASMA_dlaswpc_Tile_Async(&descA, K1, K2, IPIV, INCX, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

int PLASMA_dlauum	(	PLASMA_enum	uplo,
		int	N,
		double *	A,
		int	LDA
	)

PLASMA_dlauum - Computes the product U * U' or L' * L, where the triangular factor U or L is stored in the upper or lower triangular part of the array A.

If UPLO = 'U' or 'u' then the upper triangle of the result is stored, overwriting the factor U in A. If UPLO = 'L' or 'l' then the lower triangle of the result is stored, overwriting the factor L in A.

Parameters:

[in]	uplo	= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The order of the triangular factor U or L. N >= 0.
[in,out]	A	On entry, the triangular factor U or L. On exit, if UPLO = 'U', the upper triangle of A is overwritten with the upper triangle of the product U * U'; if UPLO = 'L', the lower triangle of A is overwritten with the lower triangle of the product L' * L.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_dlauum_Tile; PLASMA_dlauum_Tile_Async; PLASMA_clauum; PLASMA_dlauum; PLASMA_slauum; PLASMA_dpotri

Definition at line 65 of file dlauum.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, PLASMA_dlauum_Tile_Async(), plasma_dooplap2tile, plasma_dooptile2lap, plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLower, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dlauum", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_dlauum", "illegal value of uplo");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_dlauum", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_dlauum", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (max(N, 0) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DPOSV, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dlauum", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB   = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_diplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, N, N);
    }
    /* Call the tile interface */
    PLASMA_dlauum_Tile_Async(uplo, &descA, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

int PLASMA_dorglq	(	int	M,
		int	N,
		int	K,
		double *	A,
		int	LDA,
		double *	T,
		double *	Q,
		int	LDQ
	)

PLASMA_dorglq - Generates an M-by-N matrix Q with orthonormal rows, which is defined as the first M rows of a product of the elementary reflectors returned by PLASMA_dgelqf.

Parameters:

[in]	M	The number of rows of the matrix Q. M >= 0.
[in]	N	The number of columns of the matrix Q. N >= M.
[in]	K	The number of rows of elementary tile reflectors whose product defines the matrix Q. M >= K >= 0.
[in]	A	Details of the LQ factorization of the original matrix A as returned by PLASMA_dgelqf.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[in]	T	Auxiliary factorization data, computed by PLASMA_dgelqf.
[out]	Q	On exit, the M-by-N matrix Q.
[in]	LDQ	The leading dimension of the array Q. LDQ >= max(1,M).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
PLASMA_SUCCESS	<0 if -i, the i-th argument had an illegal value

See also:: PLASMA_dorglq_Tile; PLASMA_dorglq_Tile_Async; PLASMA_cunglq; PLASMA_dorglq; PLASMA_sorglq; PLASMA_dgelqf

Definition at line 68 of file dorglq.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, PLASMA_dorglq_Tile_Async(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_DGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaRealDouble, plasma_sequence_t::status, and T.

{
    int NB, IB, IBNB, KT, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descQ, descT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dorglq", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if (M < 0) {
        plasma_error("PLASMA_dorglq", "illegal value of M");
        return -1;
    }
    if (N < M) {
        plasma_error("PLASMA_dorglq", "illegal value of N");
        return -2;
    }
    if (K < 0 || K > M) {
        plasma_error("PLASMA_dorglq", "illegal value of K");
        return -3;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_dorglq", "illegal value of LDA");
        return -5;
    }
    if (LDQ < max(1, M)) {
        plasma_error("PLASMA_dorglq", "illegal value of LDQ");
        return -8;
    }
    /* Quick return - currently NOT equivalent to LAPACK's:
     * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDQ ) */
    if (min(M, min(N, K)) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DGELS, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dorglq", "plasma_tune() failed");
        return status;
    }
    /* Set MT & NT */
    NB   = PLASMA_NB;
    IB   = PLASMA_IB;
    IBNB = IB*NB;
    NT   = (N%NB==0) ? (N/NB) : (N/NB+1);
    KT   = (K%NB==0) ? (K/NB) : (K/NB+1);
    plasma_sequence_create(plasma, &sequence);
    if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
        descT = plasma_desc_init(
            PlasmaRealDouble,
            IB, NB, IBNB,
            KT*IB, NT*NB, 0, 0, KT*IB, NT*NB);
    }
    else {
        /* Double the size of T to accomodate the tree reduction phase */
        descT = plasma_desc_init(
            PlasmaRealDouble,
            IB, NB, IBNB,
            KT*IB, 2*NT*NB, 0, 0, KT*IB, 2*NT*NB);
    }
    descT.mat = T;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K, N, plasma_desc_mat_free(&(descA)) );
        plasma_dooplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descQ)));
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K, N);
        plasma_diplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, M, N);
    }
    /* Call the tile interface */
    PLASMA_dorglq_Tile_Async(&descA, &descT, &descQ, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descQ, Q, NB, NB, LDQ, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descQ);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, N );
        plasma_diptile2lap( descQ, Q, NB, NB, LDQ, N );
        plasma_dynamic_sync();
    }
        
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dorgqr	(	int	M,
		int	N,
		int	K,
		double *	A,
		int	LDA,
		double *	T,
		double *	Q,
		int	LDQ
	)

PLASMA_dorgqr - Generates an M-by-N matrix Q with orthonormal columns, which is defined as the first N columns of a product of the elementary reflectors returned by PLASMA_dgeqrf.

Parameters:

[in]	M	The number of rows of the matrix Q. M >= 0.
[in]	N	The number of columns of the matrix Q. N >= M.
[in]	K	The number of columns of elementary tile reflectors whose product defines the matrix Q. M >= K >= 0.
[in]	A	Details of the QR factorization of the original matrix A as returned by PLASMA_dgeqrf.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[in]	T	Auxiliary factorization data, computed by PLASMA_dgeqrf.
[out]	Q	On exit, the M-by-N matrix Q.
[in]	LDQ	The leading dimension of the array Q. LDQ >= max(1,M).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_dorgqr_Tile; PLASMA_dorgqr_Tile_Async; PLASMA_cungqr; PLASMA_dorgqr; PLASMA_sorgqr; PLASMA_dgeqrf

Definition at line 68 of file dorgqr.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, PLASMA_dorgqr_Tile_Async(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_DGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaRealDouble, plasma_sequence_t::status, and T.

{
    int NB, IB, IBNB, MT, KT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descQ, descT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dorgqr", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if (M < 0) {
        plasma_error("PLASMA_dorgqr", "illegal value of M");
        return -1;
    }
    if (N < 0 || N > M) {
        plasma_error("PLASMA_dorgqr", "illegal value of N");
        return -2;
    }
    if (K < 0 || K > N) {
        plasma_error("PLASMA_dorgqr", "illegal value of K");
        return -3;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_dorgqr", "illegal value of LDA");
        return -5;
    }
    if (LDQ < max(1, M)) {
        plasma_error("PLASMA_dorgqr", "illegal value of LDQ");
        return -8;
    }
    if (min(M, min(N, K)) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M & N; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DGELS, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dorgqr", "plasma_tune() failed");
        return status;
    }
    /* Set MT & KT */
    NB   = PLASMA_NB;
    IB   = PLASMA_IB;
    IBNB = IB*NB;
    MT   = (M%NB==0) ? (M/NB) : (M/NB+1);
    KT   = (K%NB==0) ? (K/NB) : (K/NB+1);
    plasma_sequence_create(plasma, &sequence);
    if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
        descT = plasma_desc_init(
            PlasmaRealDouble,
            IB, NB, IBNB,
            MT*IB, KT*NB, 0, 0, MT*IB, KT*NB);
    }
    else {
        /* Double the size of T to accomodate the tree reduction phase */
        descT = plasma_desc_init(
            PlasmaRealDouble,
            IB, NB, IBNB,
            MT*IB, 2*KT*NB, 0, 0, MT*IB, 2*KT*NB);
    }
    descT.mat = T;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, K, plasma_desc_mat_free(&(descA)) );
        plasma_dooplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descQ)));
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, K);
        plasma_diplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, M, N);
    }
    /* Call the tile interface */
    PLASMA_dorgqr_Tile_Async(&descA, &descT, &descQ, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descQ, Q, NB, NB, LDQ, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descQ);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, K );
        plasma_diptile2lap( descQ, Q, NB, NB, LDQ, N );
        plasma_dynamic_sync();
    }
    
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dormlq	(	PLASMA_enum	side,
		PLASMA_enum	trans,
		int	M,
		int	N,
		int	K,
		double *	A,
		int	LDA,
		double *	T,
		double *	B,
		int	LDB
	)

PLASMA_dormlq - overwrites the general M-by-N matrix C with Q*C, where Q is an orthogonal matrix (unitary in the complex case) defined as the product of elementary reflectors returned by PLASMA_dgelqf. Q is of order M.

Parameters:

[in]	side	Intended usage: = PlasmaLeft: apply Q or QT from the left; = PlasmaRight: apply Q or QT from the right. Currently only PlasmaLeft is supported.
[in]	trans	Intended usage: = PlasmaNoTrans: no transpose, apply Q; = PlasmaTrans: ugate transpose, apply Q**T. Currently only PlasmaTrans is supported.
[in]	M	The number of rows of the matrix C. M >= 0.
[in]	N	The number of columns of the matrix C. N >= 0.
[in]	K	The number of rows of elementary tile reflectors whose product defines the matrix Q. M >= K >= 0.
[in]	A	Details of the LQ factorization of the original matrix A as returned by PLASMA_dgelqf.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,K).
[in]	T	Auxiliary factorization data, computed by PLASMA_dgelqf.
[in,out]	B	On entry, the M-by-N matrix B. On exit, B is overwritten by QB or QTB.
[in]	LDB	The leading dimension of the array C. LDC >= max(1,M).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_dormlq_Tile; PLASMA_dormlq_Tile_Async; PLASMA_cunmlq; PLASMA_dormlq; PLASMA_sormlq; PLASMA_dgelqf

Definition at line 83 of file dormlq.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, PLASMA_dormlq_Tile_Async(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_DGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLeft, PlasmaNoTrans, PlasmaRealDouble, PlasmaRight, PlasmaTrans, plasma_sequence_t::status, and T.

{
    int NB, IB, IBNB, KT, NT, An;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB, descT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dormlq", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if (side == PlasmaLeft)
        An = M;
    else 
        An = N;
    /* Check input arguments */
    if ( (side != PlasmaLeft) && (side != PlasmaRight) ) {
        plasma_error("PLASMA_dormlq", "illegal value of side");
        return -1;
    }
    if ( (trans != PlasmaTrans) && (trans != PlasmaNoTrans) ){
        plasma_error("PLASMA_dormlq", "illegal value of trans");
        return -2;
    }
    if (M < 0) {
        plasma_error("PLASMA_dormlq", "illegal value of M");
        return -3;
    }
    if (N < 0) {
        plasma_error("PLASMA_dormlq", "illegal value of N");
        return -4;
    }
    if ((K < 0) || (K > An)) {
        plasma_error("PLASMA_dormlq", "illegal value of K");
        return -5;
    }
    if (LDA < max(1, K)) {
        plasma_error("PLASMA_dormlq", "illegal value of LDA");
        return -7;
    }
    if (LDB < max(1, M)) {
        plasma_error("PLASMA_dormlq", "illegal value of LDB");
        return -10;
    }
    /* Quick return - currently NOT equivalent to LAPACK's:
     * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) */
    if (min(M, min(N, K)) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DGELS, M, K, N);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dormlq", "plasma_tune() failed");
        return status;
    }
    /* Set MT, NT & NTRHS */
    NB   = PLASMA_NB;
    IB   = PLASMA_IB;
    IBNB = IB*NB;
    KT   = ( K%NB==0) ? (K /NB) : (K /NB+1);
    NT   = (An%NB==0) ? (An/NB) : (An/NB+1);
    plasma_sequence_create(plasma, &sequence);
    if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
        descT = plasma_desc_init(
            PlasmaRealDouble,
            IB, NB, IBNB,
            KT*IB, NT*NB, 0, 0, KT*IB, NT*NB);
    }
    else {
        /* Double the size of T to accomodate the tree reduction phase */
        descT = plasma_desc_init(
            PlasmaRealDouble,
            IB, NB, IBNB,
            KT*IB, 2*NT*NB, 0, 0, KT*IB, 2*NT*NB);
    }
    descT.mat = T;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, K, An, plasma_desc_mat_free(&(descA)) );
        plasma_dooplap2tile( descB, B, NB, NB, LDB, N,  0, 0, M, N,  plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, An, 0, 0, K, An);
        plasma_diplap2tile( descB, B, NB, NB, LDB, N,  0, 0, M, N);
    }
    /* Call the tile interface */
    PLASMA_dormlq_Tile_Async(
        side, trans, &descA, &descT, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descB, B, NB, NB, LDB, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, An );
        plasma_diptile2lap( descB, B, NB, NB, LDB, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dormqr	(	PLASMA_enum	side,
		PLASMA_enum	trans,
		int	M,
		int	N,
		int	K,
		double *	A,
		int	LDA,
		double *	T,
		double *	B,
		int	LDB
	)

PLASMA_dormqr - overwrites the general M-by-N matrix C with Q*C, where Q is an orthogonal matrix (unitary in the complex case) defined as the product of elementary reflectors returned by PLASMA_dgeqrf. Q is of order M.

Parameters:

[in]	side	Intended usage: = PlasmaLeft: apply Q or QT from the left; = PlasmaRight: apply Q or QT from the right. Currently only PlasmaLeft is supported.
[in]	trans	Intended usage: = PlasmaNoTrans: no transpose, apply Q; = PlasmaTrans: ugate transpose, apply Q**T. Currently only PlasmaTrans is supported.
[in]	M	The number of rows of the matrix C. M >= 0.
[in]	N	The number of columns of the matrix C. N >= 0.
[in]	K	The number of columns of elementary tile reflectors whose product defines the matrix Q. If side == PlasmaLeft, M >= K >= 0. If side == PlasmaRight, N >= K >= 0.
[in]	A	Details of the QR factorization of the original matrix A as returned by PLASMA_dgeqrf.
[in]	LDA	The leading dimension of the array A. If side == PlasmaLeft, LDA >= max(1,M). If side == PlasmaRight, LDA >= max(1,N).
[in]	T	Auxiliary factorization data, computed by PLASMA_dgeqrf.
[in,out]	B	On entry, the M-by-N matrix B. On exit, B is overwritten by QB or QTB.
[in]	LDB	The leading dimension of the array C. LDC >= max(1,M).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_dormqr_Tile; PLASMA_dormqr_Tile_Async; PLASMA_cormqr; PLASMA_dormqr; PLASMA_sormqr; PLASMA_dgeqrf

Definition at line 85 of file dormqr.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, PLASMA_dormqr_Tile_Async(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_DGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLeft, PlasmaNoTrans, PlasmaRealDouble, PlasmaRight, PlasmaTrans, plasma_sequence_t::status, and T.

{
    int NB, IB, IBNB, Am, MT, KT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB, descT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dormqr", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if ( side == PlasmaLeft ) {
        Am = M;
    } else {
        Am = N;
    }
    /* Check input arguments */
    if ((side != PlasmaLeft) && (side != PlasmaRight)) {
        plasma_error("PLASMA_dormqr", "illegal value of side");
        return -1;
    }
    if ((trans != PlasmaTrans) && (trans != PlasmaNoTrans)){
        plasma_error("PLASMA_dormqr", "illegal value of trans");
        return -2;
    }
    if (M < 0) {
        plasma_error("PLASMA_dormqr", "illegal value of M");
        return -3;
    }
    if (N < 0) {
        plasma_error("PLASMA_dormqr", "illegal value of N");
        return -4;
    }
    if ( (K < 0) || (K > Am) ) {
        plasma_error("PLASMA_dormqr", "illegal value of K");
        return -5;
    }
    if ( LDA < max(1, Am) ) {
        plasma_error("PLASMA_dormqr", "illegal value of LDA");
        return -7;
    }
    if (LDB < max(1, M)) {
        plasma_error("PLASMA_dormqr", "illegal value of LDB");
        return -10;
    }
    /* Quick return - currently NOT equivalent to LAPACK's:
     * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) */
    if (min(M, min(N, K)) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, K & N; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DGELS, M, K, N);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dormqr", "plasma_tune() failed");
        return status;
    }
    /* Set MT, NT & NTRHS */
    NB   = PLASMA_NB;
    IB   = PLASMA_IB;
    IBNB = IB*NB;
    MT   = (Am%NB==0) ? (Am/NB) : (Am/NB+1);
    KT   = (K%NB==0)  ? (K /NB) : (K /NB+1);
    plasma_sequence_create(plasma, &sequence);
    if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
        descT = plasma_desc_init(
            PlasmaRealDouble,
            IB, NB, IBNB,
            MT*IB, KT*NB, 0, 0, MT*IB, KT*NB);
    }
    else {
        /* Double the size of T to accomodate the tree reduction phase */
        descT = plasma_desc_init(
            PlasmaRealDouble,
            IB, NB, IBNB,
            MT*IB, 2*KT*NB, 0, 0, MT*IB, 2*KT*NB);
    }
    descT.mat = T;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, K, 0, 0, Am, K, plasma_desc_mat_free(&(descA)) );
        plasma_dooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M,  N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, K, 0, 0, Am, K);
        plasma_diplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M,  N);
    }
    /* Call the tile interface */
    PLASMA_dormqr_Tile_Async(
        side, trans, &descA, &descT, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descB, B, NB, NB, LDB, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, K );
        plasma_diptile2lap( descB, B, NB, NB, LDB, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dplgsy	(	double	bump,
		int	N,
		double *	A,
		int	LDA,
		unsigned long long int	seed
	)

PLASMA_dplgsy - Generate a random hermitian matrix by tiles.

Parameters:

[in]	bump	The value to add to the diagonal to be sure to have a positive definite matrix.
[in]	N	The order of the matrix A. N >= 0.
[out]	A	On exit, The random hermitian matrix A generated.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[in]	seed	The seed used in the random generation.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_dplgsy_Tile; PLASMA_dplgsy_Tile_Async; PLASMA_cplgsy; PLASMA_dplgsy; PLASMA_splgsy; PLASMA_dplrnt; PLASMA_dplgsy

Definition at line 58 of file dplgsy.c.

References A, plasma_desc_t::mat, max, plasma_context_self(), plasma_desc_init(), plasma_diptile2lap, PLASMA_dplgsy_Tile_Async(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DGEMM, PLASMA_NB, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, plasma_tune(), PlasmaRealDouble, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dplgsy", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (N < 0) {
        plasma_error("PLASMA_dplgsy", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_dplgsy", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (max(0, N) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DGEMM, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dplgsy", "plasma_tune() failed");
        return status;
    }
    
    /* Set NT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    
    descA = plasma_desc_init(
        PlasmaRealDouble, NB, NB, NB*NB,
        LDA, N, 0, 0, N, N);
    descA.mat = A;
    /* Call the tile interface */
    PLASMA_dplgsy_Tile_Async( bump, &descA, seed, sequence, &request );
    plasma_diptile2lap( descA, A, NB, NB, LDA, N );
    plasma_dynamic_sync();
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dplrnt	(	int	M,
		int	N,
		double *	A,
		int	LDA,
		unsigned long long int	seed
	)

PLASMA_dplrnt - Generate a random matrix by tiles.

Parameters:

[in]	M	The number of rows of A.
[in]	N	The order of the matrix A. N >= 0.
[out]	A	On exit, The random matrix A generated.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[in]	seed	The seed used in the random generation.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_dplrnt_Tile; PLASMA_dplrnt_Tile_Async; PLASMA_cplrnt; PLASMA_dplrnt; PLASMA_splrnt; PLASMA_dplgsy; PLASMA_dplgsy

Definition at line 57 of file dplrnt.c.

References A, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_diptile2lap, PLASMA_dplrnt_Tile_Async(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DGEMM, PLASMA_NB, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, plasma_tune(), PlasmaRealDouble, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dplrnt", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (M < 0) {
        plasma_error("PLASMA_dplrnt", "illegal value of M");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_dplrnt", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_dplrnt", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (min(M, N) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DGEMM, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dplrnt", "plasma_tune() failed");
        return status;
    }
    
    /* Set NT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    descA = plasma_desc_init(
        PlasmaRealDouble, NB, NB, NB*NB,
        LDA, N, 0, 0, M, N);
    descA.mat = A;
    /* Call the tile interface */
    PLASMA_dplrnt_Tile_Async( &descA, seed, sequence, &request );
    plasma_diptile2lap( descA, A, NB, NB, LDA, N );
    plasma_dynamic_sync();
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dposv	(	PLASMA_enum	uplo,
		int	N,
		int	NRHS,
		double *	A,
		int	LDA,
		double *	B,
		int	LDB
	)

PLASMA_dposv - Computes the solution to a system of linear equations A * X = B, where A is an N-by-N symmetric positive definite (or Hermitian positive definite in the complex case) matrix and X and B are N-by-NRHS matrices. The Cholesky decomposition is used to factor A as

$A = \{_{L\times L^H, if uplo = PlasmaLower}^{U^H\times U, if uplo = PlasmaUpper}$

where U is an upper triangular matrix and L is a lower triangular matrix. The factored form of A is then used to solve the system of equations A * X = B.

Parameters:

[in]	uplo	Specifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The number of linear equations, i.e., the order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in,out]	A	On entry, the symmetric positive definite (or Hermitian) matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if return value = 0, the factor U or L from the Cholesky factorization A = U*TU or A = LL*T.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in,out]	B	On entry, the N-by-NRHS right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, the leading minor of order i of A is not positive definite, so the factorization could not be completed, and the solution has not been computed.

See also:: PLASMA_dposv_Tile; PLASMA_dposv_Tile_Async; PLASMA_cposv; PLASMA_dposv; PLASMA_sposv

Definition at line 82 of file dposv.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, PLASMA_dposv_Tile_Async(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLower, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dposv", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_dposv", "illegal value of uplo");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_dposv", "illegal value of N");
        return -2;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_dposv", "illegal value of NRHS");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_dposv", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_dposv", "illegal value of LDB");
        return -7;
    }
    /* Quick return - currently NOT equivalent to LAPACK's
     * LAPACK does not have such check for DPOSV */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_DPOSV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dposv", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB    = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        plasma_dooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        plasma_diplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
    }
    /* Call the tile interface */
    PLASMA_dposv_Tile_Async(uplo, &descA, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_dooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_diptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dpotrf	(	PLASMA_enum	uplo,
		int	N,
		double *	A,
		int	LDA
	)

PLASMA_dpotrf - Computes the Cholesky factorization of a symmetric positive definite (or Hermitian positive definite in the complex case) matrix A. The factorization has the form

$A = \{_{L\times L^H, if uplo = PlasmaLower}^{U^H\times U, if uplo = PlasmaUpper}$

where U is an upper triangular matrix and L is a lower triangular matrix.

Parameters:

[in]	uplo	= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The order of the matrix A. N >= 0.
[in,out]	A	On entry, the symmetric positive definite (or Hermitian) matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if return value = 0, the factor U or L from the Cholesky factorization A = U*TU or A = LL*T.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, the leading minor of order i of A is not positive definite, so the factorization could not be completed, and the solution has not been computed.

See also:: PLASMA_dpotrf_Tile; PLASMA_dpotrf_Tile_Async; PLASMA_cpotrf; PLASMA_dpotrf; PLASMA_spotrf; PLASMA_dpotrs

Definition at line 70 of file dpotrf.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, PLASMA_dpotrf_Tile_Async(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLower, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dpotrf", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_dpotrf", "illegal value of uplo");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_dpotrf", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_dpotrf", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (max(N, 0) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DPOSV, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dpotrf", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB   = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_diplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, N, N);
    }
    /* Call the tile interface */
    PLASMA_dpotrf_Tile_Async(uplo, &descA, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dpotri	(	PLASMA_enum	uplo,
		int	N,
		double *	A,
		int	LDA
	)

PLASMA_dpotri - Computes the inverse of a complex Hermitian positive definite matrix A using the Cholesky factorization A = U**T*U or A = L*L**T computed by PLASMA_dpotrf.

Parameters:

[in]	uplo	= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The order of the matrix A. N >= 0.
[in,out]	A	On entry, the triangular factor U or L from the Cholesky factorization A = U*TU or A = LL*T, as computed by PLASMA_dpotrf. On exit, the upper or lower triangle of the (Hermitian) inverse of A, overwriting the input factor U or L.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, the (i,i) element of the factor U or L is zero, and the inverse could not be computed.

See also:: PLASMA_dpotri_Tile; PLASMA_dpotri_Tile_Async; PLASMA_cpotri; PLASMA_dpotri; PLASMA_spotri; PLASMA_dpotrf

Definition at line 62 of file dpotri.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, PLASMA_dpotri_Tile_Async(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLower, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dpotri", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_dpotri", "illegal value of uplo");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_dpotri", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_dpotri", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (max(N, 0) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DPOSV, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dpotri", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB   = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_diplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, N, N);
    }
    /* Call the tile interface */
    PLASMA_dpotri_Tile_Async(uplo, &descA, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dpotrs	(	PLASMA_enum	uplo,
		int	N,
		int	NRHS,
		double *	A,
		int	LDA,
		double *	B,
		int	LDB
	)

PLASMA_dpotrs - Solves a system of linear equations A * X = B with a symmetric positive definite (or Hermitian positive definite in the complex case) matrix A using the Cholesky factorization A = U**T*U or A = L*L**T computed by PLASMA_dpotrf.

Parameters:

[in]	uplo	= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]	A	The triangular factor U or L from the Cholesky factorization A = U*TU or A = LL*T, computed by PLASMA_dpotrf.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in,out]	B	On entry, the N-by-NRHS right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_dpotrs_Tile; PLASMA_dpotrs_Tile_Async; PLASMA_cpotrs; PLASMA_dpotrs; PLASMA_spotrs; PLASMA_dpotrf

Definition at line 67 of file dpotrs.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, PLASMA_dpotrs_Tile_Async(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLower, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dpotrs", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_dpotrs", "illegal value of uplo");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_dpotrs", "illegal value of N");
        return -2;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_dpotrs", "illegal value of NRHS");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_dpotrs", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_dpotrs", "illegal value of LDB");
        return -7;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DPOSV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dpotrs", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB    = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        plasma_dooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        plasma_diplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
    }
    /* Call the tile interface */
    PLASMA_dpotrs_Tile_Async(uplo, &descA, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_diptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dsgesv	(	int	N,
		int	NRHS,
		double *	A,
		int	LDA,
		int *	IPIV,
		double *	B,
		int	LDB,
		double *	X,
		int	LDX,
		int *	ITER
	)

PLASMA_dsgesv - Computes the solution to a system of linear equations A * X = B, where A is an N-by-N matrix and X and B are N-by-NRHS matrices.

PLASMA_dsgesv first attempts to factorize the matrix in COMPLEX and use this factorization within an iterative refinement procedure to produce a solution with COMPLEX*16 normwise backward error quality (see below). If the approach fails the method switches to a COMPLEX*16 factorization and solve.

The iterative refinement is not going to be a winning strategy if the ratio COMPLEX performance over COMPLEX*16 performance is too small. A reasonable strategy should take the number of right-hand sides and the size of the matrix into account. This might be done with a call to ILAENV in the future. Up to now, we always try iterative refinement.

The iterative refinement process is stopped if ITER > ITERMAX or for all the RHS we have: RNRM < N*XNRM*ANRM*EPS*BWDMAX where:

ITER is the number of the current iteration in the iterative refinement process
RNRM is the infinity-norm of the residual
XNRM is the infinity-norm of the solution
ANRM is the infinity-operator-norm of the matrix A
EPS is the machine epsilon returned by DLAMCH('Epsilon').

Actually, in its current state (PLASMA 2.1.0), the test is slightly relaxed.

The values ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 respectively.

Parameters:

[in]	N	The number of linear equations, i.e., the order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]	A	The N-by-N coefficient matrix A. This matrix is not modified.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[out]	IPIV	On exit, the pivot indices that define the permutations.
[in]	B	The N-by-NRHS matrix of right hand side matrix B.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).
[out]	X	If return value = 0, the N-by-NRHS solution matrix X.
[in]	LDX	The leading dimension of the array B. LDX >= max(1,N).
[out]	ITER	The number of the current iteration in the iterative refinement process

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, so the solution could not be computed.

See also:: PLASMA_dsgesv_Tile; PLASMA_dsgesv_Tile_Async; PLASMA_dsgesv; PLASMA_dgesv

Definition at line 227 of file dsgesv.c.

References plasma_desc_t::mat, max, min, plasma_context_self(), plasma_ddesc_alloc, plasma_desc_init(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, PLASMA_dsgesv_Tile_Async(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DSGESV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaRealDouble, and plasma_sequence_t::status.

{
    int NB;
    int status;
    PLASMA_desc  descA;
    PLASMA_desc  descB;
    PLASMA_desc  descX;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dsgesv", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (N < 0) {
        plasma_error("PLASMA_dsgesv", "illegal value of N");
        return -1;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_dsgesv", "illegal value of NRHS");
        return -2;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_dsgesv", "illegal value of LDA");
        return -4;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_dsgesv", "illegal value of LDB");
        return -8;
    }
    if (LDX < max(1, N)) {
        plasma_error("PLASMA_dsgesv", "illegal value of LDX");
        return -10;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DSGESV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dsgesv", "plasma_tune() failed");
        return status;
    }
    NB = PLASMA_NB;
    
    plasma_sequence_create(plasma, &sequence);
    /* DOUBLE PRECISION INITIALIZATION */
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        plasma_dooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)) );
        plasma_ddesc_alloc(  descX, NB, NB, N, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descX)) );
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        plasma_diplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
        descX = plasma_desc_init(
            PlasmaRealDouble, NB, NB, (NB*NB), 
            LDX, NRHS, 0, 0, N, NRHS);
        descX.mat = X;
    }
    /* Call the native interface */
    status = PLASMA_dsgesv_Tile_Async(&descA, IPIV, &descB, &descX, ITER, sequence, &request);
    if (status == PLASMA_SUCCESS) {
        if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
            plasma_dooptile2lap( descX, X, NB, NB, LDX, NRHS );
            plasma_dynamic_sync();
            plasma_desc_mat_free(&descA);
            plasma_desc_mat_free(&descB);
            plasma_desc_mat_free(&descX);
        } else {
            plasma_diptile2lap( descA, A, NB, NB, LDA, N    );
            plasma_diptile2lap( descB, B, NB, NB, LDB, NRHS );
            plasma_diptile2lap( descX, X, NB, NB, LDX, NRHS );
            plasma_dynamic_sync();
        }
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dsposv	(	PLASMA_enum	uplo,
		int	N,
		int	NRHS,
		double *	A,
		int	LDA,
		double *	B,
		int	LDB,
		double *	X,
		int	LDX,
		int *	ITER
	)

PLASMA_dsposv - Computes the solution to a system of linear equations A * X = B, where A is an N-by-N symmetric positive definite (or Hermitian positive definite in the complex case) matrix and X and B are N-by-NRHS matrices. The Cholesky decomposition is used to factor A as

A = U**H * U, if uplo = PlasmaUpper, or A = L * L**H, if uplo = PlasmaLower,

where U is an upper triangular matrix and L is a lower triangular matrix. The factored form of A is then used to solve the system of equations A * X = B.

PLASMA_dsposv first attempts to factorize the matrix in COMPLEX and use this factorization within an iterative refinement procedure to produce a solution with COMPLEX*16 normwise backward error quality (see below). If the approach fails the method switches to a COMPLEX*16 factorization and solve.

The iterative refinement is not going to be a winning strategy if the ratio COMPLEX performance over COMPLEX*16 performance is too small. A reasonable strategy should take the number of right-hand sides and the size of the matrix into account. This might be done with a call to ILAENV in the future. Up to now, we always try iterative refinement.

The iterative refinement process is stopped if ITER > ITERMAX or for all the RHS we have: RNRM < N*XNRM*ANRM*EPS*BWDMAX where:

ITER is the number of the current iteration in the iterative refinement process
RNRM is the infinity-norm of the residual
XNRM is the infinity-norm of the solution
ANRM is the infinity-operator-norm of the matrix A
EPS is the machine epsilon returned by DLAMCH('Epsilon').

Actually, in its current state (PLASMA 2.1.0), the test is slightly relaxed.

The values ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 respectively.

Parameters:

[in]	uplo	Specifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The number of linear equations, i.e., the order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]	A	The N-by-N symmetric positive definite (or Hermitian) coefficient matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. This matrix is not modified.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in]	B	The N-by-NRHS matrix of right hand side matrix B.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).
[out]	X	If return value = 0, the N-by-NRHS solution matrix X.
[in]	LDX	The leading dimension of the array B. LDX >= max(1,N).
[out]	ITER	The number of the current iteration in the iterative refinement process

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, the leading minor of order i of A is not positive definite, so the factorization could not be completed, and the solution has not been computed.

See also:: PLASMA_dsposv_Tile; PLASMA_dsposv_Tile_Async; PLASMA_dsposv; PLASMA_dposv

Definition at line 171 of file dsposv.c.

References plasma_desc_t::mat, max, min, plasma_context_self(), plasma_ddesc_alloc, plasma_desc_init(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, PLASMA_dsposv_Tile_Async(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DSPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLower, PlasmaRealDouble, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int status;
    PLASMA_desc  descA;
    PLASMA_desc  descB;
    PLASMA_desc  descX;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dsposv", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_dsposv", "illegal value of uplo");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_dsposv", "illegal value of N");
        return -2;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_dsposv", "illegal value of NRHS");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_dsposv", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_dsposv", "illegal value of LDB");
        return -7;
    }
    if (LDX < max(1, N)) {
        plasma_error("PLASMA_dsposv", "illegal value of LDX");
        return -10;
    }
    /* Quick return - currently NOT equivalent to LAPACK's
     * LAPACK does not have such check for DSPOSV */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_DSPOSV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dsposv", "plasma_tune() failed");
        return status;
    }
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    /* DOUBLE PRECISION INITIALIZATION */
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        plasma_dooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)) );
        plasma_ddesc_alloc(  descX, NB, NB, N, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descX)) );
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        plasma_diplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
        descX = plasma_desc_init(
            PlasmaRealDouble, NB, NB, (NB*NB), 
            LDX, NRHS, 0, 0, N, NRHS);
        descX.mat = X;
    }
    /* Call the native interface */
    status = PLASMA_dsposv_Tile_Async(uplo, &descA, &descB, &descX, ITER, sequence, &request);
    if (status == PLASMA_SUCCESS) {
        if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
            plasma_dooptile2lap( descX, X, NB, NB, LDX, NRHS );
            plasma_dynamic_sync();
            plasma_desc_mat_free(&descA);
            plasma_desc_mat_free(&descB);
            plasma_desc_mat_free(&descX);
        } else {
            plasma_diptile2lap( descA, A, NB, NB, LDA, N    );
            plasma_diptile2lap( descB, B, NB, NB, LDB, NRHS );
            plasma_diptile2lap( descX, X, NB, NB, LDX, NRHS );
            plasma_dynamic_sync();
        }
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dsungesv	(	PLASMA_enum	trans,
		int	N,
		int	NRHS,
		double *	A,
		int	LDA,
		double *	B,
		int	LDB,
		double *	X,
		int	LDX,
		int *	ITER
	)

PLASMA_dsungesv - Solves overdetermined or underdetermined linear systems involving an M-by-N matrix A using the QR or the LQ factorization of A. It is assumed that A has full rank. The following options are provided:

trans = PlasmaNoTrans and M >= N: find the least squares solution of an overdetermined

system, i.e., solve the least squares problem: minimize || B - A*X ||.

trans = PlasmaNoTrans and M < N: find the minimum norm solution of an underdetermined

system A * X = B.

Several right hand side vectors B and solution vectors X can be handled in a single call; they are stored as the columns of the M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix X.

PLASMA_dsungesv first attempts to factorize the matrix in COMPLEX and use this factorization within an iterative refinement procedure to produce a solution with COMPLEX*16 normwise backward error quality (see below). If the approach fails the method switches to a COMPLEX*16 factorization and solve.

The iterative refinement is not going to be a winning strategy if the ratio COMPLEX performance over COMPLEX*16 performance is too small. A reasonable strategy should take the number of right-hand sides and the size of the matrix into account. This might be done with a call to ILAENV in the future. Up to now, we always try iterative refinement.

The iterative refinement process is stopped if ITER > ITERMAX or for all the RHS we have: RNRM < N*XNRM*ANRM*EPS*BWDMAX where:

ITER is the number of the current iteration in the iterative refinement process
RNRM is the infinity-norm of the residual
XNRM is the infinity-norm of the solution
ANRM is the infinity-operator-norm of the matrix A
EPS is the machine epsilon returned by DLAMCH('Epsilon').

Actually, in its current state (PLASMA 2.1.0), the test is slightly relaxed.

The values ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 respectively.

We follow Bjorck's algorithm proposed in "Iterative Refinement of Linear Least Squares solutions I", BIT, 7:257-278, 1967.4

Parameters:

[in]	trans	Intended usage: = PlasmaNoTrans: the linear system involves A; = PlasmaTrans: the linear system involves A**H. Currently only PlasmaNoTrans is supported.
[in]	N	The number of columns of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrices B and X. NRHS >= 0.
[in]	A	The M-by-N matrix A. This matrix is not modified.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[in]	B	The M-by-NRHS matrix B of right hand side vectors, stored columnwise. Not modified.
[in]	LDB	The leading dimension of the array B. LDB >= MAX(1,M,N).
[out]	X	If return value = 0, the solution vectors, stored columnwise. if M >= N, rows 1 to N of B contain the least squares solution vectors; the residual sum of squares for the solution in each column is given by the sum of squares of the modulus of elements N+1 to M in that column; if M < N, rows 1 to N of B contain the minimum norm solution vectors;
[in]	LDX	The leading dimension of the array B. LDB >= MAX(1,M,N).
[out]	ITER	The number of the current iteration in the iterative refinement process

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_dsungesv_Tile; PLASMA_dsungesv_Tile_Async; PLASMA_dsungesv; PLASMA_dgels

Definition at line 163 of file dsungesv.c.

References plasma_desc_t::mat, max, PLASMA_Alloc_Workspace_dgels_Tile(), plasma_context_self(), plasma_ddesc_alloc, PLASMA_Dealloc_Handle_Tile(), plasma_desc_init(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, PLASMA_dsungesv_Tile_Async(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_NOT_SUPPORTED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DSGELS, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaNoTrans, and PlasmaRealDouble.

{
    int NB;
    int status;
    PLASMA_desc  descA;
    PLASMA_desc  descB;
    PLASMA_desc *descT;
    PLASMA_desc  descX;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dsungesv", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (trans != PlasmaNoTrans) {
        plasma_error("PLASMA_dsungesv", "only PlasmaNoTrans supported");
        return PLASMA_ERR_NOT_SUPPORTED;
    }
    if (N < 0) {
        plasma_error("PLASMA_dsungesv", "illegal value of N");
        return -2;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_dsungesv", "illegal value of NRHS");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_dsungesv", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_dsungesv", "illegal value of LDB");
        return -8;
    }
    if (LDX < max(1, N)) {
        plasma_error("PLASMA_dsungesv", "illegal value of LDX");
        return -9;
    }
    /* Quick return */
    if ( N == 0 )
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DSGELS, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dsungesv", "plasma_tune() failed");
        return status;
    }
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    /* DOUBLE PRECISION INITIALIZATION */
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        plasma_dooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)) );
        plasma_ddesc_alloc(  descX, NB, NB, N, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descX)) );
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        plasma_diplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
        descX = plasma_desc_init(
            PlasmaRealDouble, NB, NB, (NB*NB), 
            LDX, NRHS, 0, 0, N, NRHS);
        descX.mat = X;
    }
    /* Allocate workspace */
    PLASMA_Alloc_Workspace_dgels_Tile(N, N, &descT);
    /* Call the native interface */
    status = PLASMA_dsungesv_Tile_Async(PlasmaNoTrans, &descA, descT, &descB, &descX, ITER,
                                        sequence, &request);
    if (status == PLASMA_SUCCESS) {
        if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
            plasma_dooptile2lap( descX, X, NB, NB, LDX, NRHS );
            plasma_dynamic_sync();
            plasma_desc_mat_free(&descA);
            plasma_desc_mat_free(&descB);
            plasma_desc_mat_free(&descX);
        } else {
            plasma_diptile2lap( descA, A, NB, NB, LDA, N    );
            plasma_diptile2lap( descB, B, NB, NB, LDB, NRHS );
            plasma_diptile2lap( descX, X, NB, NB, LDX, NRHS );
            plasma_dynamic_sync();
        }
    }
    PLASMA_Dealloc_Handle_Tile(&descT);
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dsyev	(	PLASMA_enum	jobz,
		PLASMA_enum	uplo,
		int	N,
		double *	A,
		int	LDA,
		double *	W,
		PLASMA_desc *	descT,
		double *	Q,
		int	LDQ
	)

PLASMA_dsyev - Computes all eigenvalues and, optionally, eigenvectors of a complex Hermitian matrix A. The matrix A is preliminary reduced to tridiagonal form using a two-stage approach: First stage: reduction to band tridiagonal form; Second stage: reduction from band to tridiagonal form. Note: Only PlasmaNoVec supported!

Parameters:

[in]	jobz	Intended usage: = PlasmaNoVec: computes eigenvalues only; = PlasmaVec: computes eigenvalues and eigenvectors. Note: Only PlasmaNoVec supported!
[in]	uplo	Specifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The order of the matrix A. N >= 0.
[in,out]	A	On entry, the symmetric (or Hermitian) matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, the lower triangle (if uplo = PlasmaLower) or the upper triangle (if uplo = PlasmaUpper) of A, including the diagonal, is destroyed.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[out]	W	On exit, if info = 0, the eigenvalues.
[in,out]	descT	On entry, descriptor as return by PLASMA_Alloc_Workspace_dsyev On exit, contains auxiliary factorization data.
[out]	Q	On exit, if jobz = PlasmaVec and info = 0, the eigenvectors.
[in]	LDQ	The leading dimension of the array Q. LDQ >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if INFO = i, the algorithm failed to converge; i off-diagonal elements of an intermediate tridiagonal form did not converge to zero.

See also:: PLASMA_dsyev_Tile; PLASMA_dsyev_Tile_Async; PLASMA_cheev; PLASMA_dsyev; PLASMA_ssyev

Definition at line 96 of file dsyev.c.

References plasma_desc_t::m, plasma_desc_t::mat, max, plasma_desc_t::n, plasma_context_self(), plasma_ddesc_alloc, plasma_desc_check(), plasma_desc_init(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, PLASMA_dsyev_Tile_Async(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), PLASMA_FUNC_DSYEV, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLower, PlasmaNoVec, PlasmaRealDouble, PlasmaUpper, PlasmaVec, Q, and plasma_sequence_t::status.

{
    int NB, IB, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descQ;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_error("PLASMA_dsyev", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Tune NB & IB depending on N; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DSYEV, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dsyev", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB = PLASMA_NB;
    IB = PLASMA_IB;
    NT = (N%NB==0) ? (N/NB) : (N/NB+1);
    /* Check input arguments */
    if (jobz != PlasmaNoVec && jobz != PlasmaVec) {
        plasma_error("PLASMA_dsyev", "illegal value of jobz");
        return -1;
    }
    if (uplo != PlasmaLower && uplo != PlasmaUpper) {
        plasma_error("PLASMA_dsyev", "illegal value of uplo");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_dsyev", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_dsyev", "illegal value of LDA");
        return -5;
    }
    if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) || 
         ( descT->m != NT*IB ) || (descT->n != NT*NB) ) {
        plasma_error("PLASMA_dsyev", "invalid T descriptor");
        return -7;
    }
    if (LDQ < max(1, N)) {
        plasma_error("PLASMA_dsyev", "illegal value of LDQ");
        return -9;
    }
    /* Quick return */
    if (N == 0)
        return PLASMA_SUCCESS;
    if (jobz == PlasmaVec) {
        plasma_error("PLASMA_dsyev", "computing the eigenvectors is not supported in this version");
        return -1;
    }
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
        if (jobz == PlasmaVec) {
            /* No need for conversion, it's just output */
            plasma_ddesc_alloc( descQ, NB, NB, LDQ, N, 0, 0, N, N, 
                                plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descQ)) );
        }
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N );
        if (jobz == PlasmaVec) {
            /* No need for conversion, it's just output */
            descQ = plasma_desc_init(
                PlasmaRealDouble, NB, NB, NB*NB,
                LDQ, N, 0, 0, N, N);
            descQ.mat = Q;
        }
    }
    /* Call the tile interface */
    PLASMA_dsyev_Tile_Async(jobz, uplo, &descA, W, descT, &descQ, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descA, A, NB, NB, LDA, N );
        if (jobz == PlasmaVec) {
           plasma_dooptile2lap( descQ, Q, NB, NB, LDQ, N );
        }
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        if (jobz == PlasmaVec)
           plasma_desc_mat_free(&descQ);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, N );
        if (jobz == PlasmaVec)
           plasma_diptile2lap( descQ, Q, NB, NB, LDQ, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dsygst	(	PLASMA_enum	itype,
		PLASMA_enum	uplo,
		int	N,
		double *	A,
		int	LDA,
		double *	B,
		int	LDB
	)

PLASMA_dsygst - reduces a complex Hermitian-definite generalized eigenproblem to standard form. If PlasmaItype == 1, the problem is A*x = lambda*B*x, and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) If PlasmaItype == 2 or 3, the problem is A*B*x = lambda*x or B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. B must have been previously factorized as U**T*U or L*L**T by PLASMA_DPOTRF.

Parameters:

[in]	PlasmaItype	Intended usage: = 1: Ax=(lambda)Bx = 2: ABx=(lambda)x = 3: BAx=(lambda)x
[in]	uplo	Specifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The order of the matrices A and B. N >= 0.
[in,out]	A	On entry, the symmetric (or Hermitian) matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if return value == 0, the transformed matrix, stored in the same format as A.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in,out]	B	On entry, the triangular factor from the Cholesky factorization of B, as returned by PLASMA_DPOTRF.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_dsygst_Tile; PLASMA_dsygst_Tile_Async; PLASMA_chegst; PLASMA_dsygst; PLASMA_ssygst

Definition at line 85 of file dsygst.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, PLASMA_dsygst_Tile_Async(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DSYGST, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLower, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dsygst", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (itype != 1 && itype != 2 && itype != 3) {
        plasma_error("PLASMA_dsygst", "Illegal value of itype");
        return -1;
    }
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_dsygst", "Illegal value of uplo");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_dsygst", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_dsygst", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_dsygst", "illegal value of LDB");
        return -7;
    }
    /* Quick return */
    if (N == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_DSYGST, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dsygst", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
        plasma_dooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, N, N, plasma_desc_mat_free(&(descB)) );
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N);
        plasma_diplap2tile( descB, B, NB, NB, LDB, N, 0, 0, N, N);
    }
    /* Call the tile interface */
    PLASMA_dsygst_Tile_Async(itype, uplo, &descA, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dooptile2lap( descB, B, NB, NB, LDB, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, N );
        plasma_diptile2lap( descB, B, NB, NB, LDB, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dsygv	(	PLASMA_enum	itype,
		PLASMA_enum	jobz,
		PLASMA_enum	uplo,
		int	N,
		double *	A,
		int	LDA,
		double *	B,
		int	LDB,
		double *	W,
		PLASMA_desc *	descT,
		double *	Q,
		int	LDQ
	)

PLASMA_dsygv - Computes all eigenvalues and, optionally, eigenvectors of a complex generalized Hermitian-definite eigenproblem of the form: A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and B are assumed to be Hermitian and B is also positive definite. Note: Only PlasmaNoVec supported!

Parameters:

[in]	PlasmaItype	Intended usage: = 1: Ax=(lambda)Bx = 2: ABx=(lambda)x = 3: BAx=(lambda)x
[in]	jobz	Intended usage: = PlasmaNoVec: computes eigenvalues only; = PlasmaVec: computes eigenvalues and eigenvectors. Note: Only PlasmaNoVec supported!
[in]	uplo	Specifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A and B are stored; = PlasmaLower: Lower triangle of A and B are stored.
[in]	N	The order of the matrix A. N >= 0.
[in,out]	A	On entry, the symmetric (or Hermitian) matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if jobz = PlasmaVec, then if return value = 0, A contains the matrix Z of eigenvectors. The eigenvectors are normalized as follows: if ITYPE = 1 or 2, Z*TBZ = I; if ITYPE = 3, ZTinv(B)*Z = I. If jobz = PlasmaNoVec, then on exit the lower triangle (if uplo = PlasmaLower) or the upper triangle (if uplo = PlasmaUpper) of A, including the diagonal, is destroyed.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in,out]	B	On entry, the symmetric (or Hermitian) positive definite matrix B. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of B contains the upper triangular part of the matrix B, and the strictly lower triangular part of B is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of B contains the lower triangular part of the matrix B, and the strictly upper triangular part of B is not referenced. On exit, if return value <= N, the part of B containing the matrix is overwritten by the triangular factor U or L from the Cholesky factorization B = U*TU or B = LL*T.
[in]	LDB	The leading dimension of the array B. LDA >= max(1,N).
[out]	W	On exit, if info = 0, the eigenvalues.
[in,out]	descT	On entry, descriptor as return by PLASMA_Alloc_Workspace_dsygv On exit, contains auxiliary factorization data.
[out]	Q	On exit, if jobz = PlasmaVec and info = 0, the eigenvectors.
[in]	LDQ	The leading dimension of Q.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
<=N	if INFO = i, plasma_dsygv failed to converge; i off-diagonal elements of an intermediate tridiagonal form did not converge to zero.
>N	if INFO = N + i, for 1 <= i <= N, then the leading minor of order i of B is not positive definite. The factorization of B could not be completed and no eigenvalues or eigenvectors were computed.

See also:: PLASMA_dsygv_Tile; PLASMA_dsygv_Tile_Async; PLASMA_chegv; PLASMA_dsygv; PLASMA_ssygv

Definition at line 128 of file dsygv.c.

References plasma_desc_t::m, plasma_desc_t::mat, max, plasma_desc_t::n, plasma_context_self(), plasma_ddesc_alloc, plasma_desc_check(), plasma_desc_init(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, PLASMA_dsygv_Tile_Async(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), PLASMA_FUNC_DSYGV, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLower, PlasmaNoVec, PlasmaRealDouble, PlasmaUpper, PlasmaVec, Q, and plasma_sequence_t::status.

{
    int NB, IB, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB, descQ;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_error("PLASMA_dsygv", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Tune NB & IB depending on N; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DSYGV, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dsygv", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB = PLASMA_NB;
    IB = PLASMA_IB;
    NT = (N%NB==0) ? (N/NB) : (N/NB+1);
    /* Check input arguments */
    if (itype != 1 && itype != 2 && itype != 3) {
        plasma_error("PLASMA_dsygv", "Illegal value of itype");
        return -1;
    }
    if (jobz != PlasmaNoVec && jobz != PlasmaVec) {
        plasma_error("PLASMA_dsygv", "illegal value of jobz");
        return -2;
    }
    if (uplo != PlasmaLower && uplo!= PlasmaUpper) {
        plasma_error("PLASMA_dsygv", "only PlasmaLower supported");
        return -3;
    }
    if (N < 0) {
        plasma_error("PLASMA_dsygv", "illegal value of N");
        return -4;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_dsygv", "illegal value of LDA");
        return -6;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_dsygv", "illegal value of LDB");
        return -8;
    }
    if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) || 
         ( descT->m != NT*IB ) || (descT->n != NT*NB) ) {
        plasma_error("PLASMA_dsygv", "invalid T descriptor");
        return -10;
    }
    if (LDQ < max(1, N)) {
        plasma_error("PLASMA_dsygv", "illegal value of LDQ");
        return -12;
    }
    
    /* Quick return */
    if (N == 0)
        return PLASMA_SUCCESS;
    if (jobz == PlasmaVec) {
        plasma_error("PLASMA_dsygv", "computing the eigenvectors is not supported in this version");
        return -1;
    }
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, 
                             plasma_desc_mat_free(&(descA)) );
        plasma_dooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, N, N, 
                             plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)) );
        if (jobz == PlasmaVec) {
            /* No need for conversion, it's just output */
            plasma_ddesc_alloc( descQ, NB, NB, LDQ, N, 0, 0, N, N, 
                                plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descQ)) );
        }
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N );
        plasma_diplap2tile( descB, B, NB, NB, LDB, N, 0, 0, N, N );
        if (jobz == PlasmaVec) {
            /* No need for conversion, it's just output */
            descQ = plasma_desc_init(
                PlasmaRealDouble, NB, NB, NB*NB,
                LDQ, N, 0, 0, N, N);
            descQ.mat = Q;
        }
    }
    /* Call the tile interface */
    PLASMA_dsygv_Tile_Async(itype, PlasmaNoVec, uplo, 
                            &descA, &descB, W, 
                            descT, &descQ, 
                            sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dooptile2lap( descB, B, NB, NB, LDB, N );
        if (jobz == PlasmaVec) {
           plasma_dooptile2lap( descQ, Q, NB, NB, LDQ, N );
        }
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
        if (jobz == PlasmaVec)
           plasma_desc_mat_free(&descQ);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, N );
        plasma_diptile2lap( descB, B, NB, NB, LDB, N );
        if (jobz == PlasmaVec)
           plasma_diptile2lap( descQ, Q, NB, NB, LDQ, N );
        plasma_dynamic_sync();
    }
    
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dsymm	(	PLASMA_enum	side,
		PLASMA_enum	uplo,
		int	M,
		int	N,
		double	alpha,
		double *	A,
		int	LDA,
		double *	B,
		int	LDB,
		double	beta,
		double *	C,
		int	LDC
	)

PLASMA_dsymm - Performs one of the matrix-matrix operations

$C = \alpha \times A \times B + \beta \times C$

or

$C = \alpha \times B \times A + \beta \times C$

where alpha and beta are scalars, A is an symmetric matrix and B and C are m by n matrices.

Parameters:

[in]	side	Specifies whether the symmetric matrix A appears on the left or right in the operation as follows: = PlasmaLeft: $C = \alpha \times A \times B + \beta \times C$ = PlasmaRight: $C = \alpha \times B \times A + \beta \times C$
[in]	uplo	Specifies whether the upper or lower triangular part of the symmetric matrix A is to be referenced as follows: = PlasmaLower: Only the lower triangular part of the symmetric matrix A is to be referenced. = PlasmaUpper: Only the upper triangular part of the symmetric matrix A is to be referenced.
[in]	M	Specifies the number of rows of the matrix C. M >= 0.
[in]	N	Specifies the number of columns of the matrix C. N >= 0.
[in]	alpha	Specifies the scalar alpha.
[in]	A	A is a LDA-by-ka matrix, where ka is M when side = PlasmaLeft, and is N otherwise. Only the uplo triangular part is referenced.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,ka).
[in]	B	B is a LDB-by-N matrix, where the leading M-by-N part of the array B must contain the matrix B.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,M).
[in]	beta	Specifies the scalar beta.
[in,out]	C	C is a LDC-by-N matrix. On exit, the array is overwritten by the M by N updated matrix.
[in]	LDC	The leading dimension of the array C. LDC >= max(1,M).

Returns:

Return values:

PLASMA_SUCCESS successful exit

See also:: PLASMA_dsymm_Tile; PLASMA_csymm; PLASMA_dsymm; PLASMA_ssymm

Definition at line 94 of file dsymm.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, PLASMA_dsymm_Tile_Async(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DSYMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLeft, PlasmaLower, PlasmaRight, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int Am;
    int status;
    PLASMA_desc descA, descB, descC;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dsymm", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ( (side != PlasmaLeft) && (side != PlasmaRight) ){
        plasma_error("PLASMA_dsymm", "illegal value of side");
        return -1;
    }
    if ((uplo != PlasmaLower) && (uplo != PlasmaUpper)) {
        plasma_error("PLASMA_dsymm", "illegal value of uplo");
        return -2;
    }
    Am = ( side == PlasmaLeft ) ? M : N;
    if (M < 0) {
        plasma_error("PLASMA_dsymm", "illegal value of M");
        return -3;
    }
    if (N < 0) {
        plasma_error("PLASMA_dsymm", "illegal value of N");
        return -4;
    }
    if (LDA < max(1, Am)) {
        plasma_error("PLASMA_dsymm", "illegal value of LDA");
        return -7;
    }
    if (LDB < max(1, M)) {
        plasma_error("PLASMA_dsymm", "illegal value of LDB");
        return -9;
    }
    if (LDC < max(1, M)) {
        plasma_error("PLASMA_dsymm", "illegal value of LDC");
        return -12;
    }
    /* Quick return */
    if (M == 0 || N == 0 ||
        ((alpha == (double)0.0) && beta == (double)1.0))
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DSYMM, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dsymm", "plasma_tune() failed");
        return status;
    }
    /* Set MT & NT & KT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, Am, 0, 0, Am, Am, 
                             plasma_desc_mat_free(&(descA)) );
        plasma_dooplap2tile( descB, B, NB, NB, LDB, N,  0, 0, M,  N,
                             plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
        plasma_dooplap2tile( descC, C, NB, NB, LDC, N,  0, 0, M,  N,
                             plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descC)));
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, Am, 0, 0, Am, Am );
        plasma_diplap2tile( descB, B, NB, NB, LDB, N,  0, 0, M,  N  );
        plasma_diplap2tile( descC, C, NB, NB, LDC, N,  0, 0, M,  N  );
    }
    /* Call the tile interface */
    PLASMA_dsymm_Tile_Async(
        side, uplo, alpha, &descA, &descB, beta, &descC, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
        plasma_desc_mat_free(&descC);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, Am );
        plasma_diptile2lap( descB, B, NB, NB, LDB, N );
        plasma_diptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dsyr2k	(	PLASMA_enum	uplo,
		PLASMA_enum	trans,
		int	N,
		int	K,
		double	alpha,
		double *	A,
		int	LDA,
		double *	B,
		int	LDB,
		double	beta,
		double *	C,
		int	LDC
	)

PLASMA_dsyr2k - Performs one of the symmetric rank 2k operations

$C = \alpha [ op( A ) \times g( op( B )' )] + \alpha [ op( B ) \times g( op( A )' )] + \beta C$

, or

$C = \alpha [ g( op( A )' ) \times op( B ) ] + \alpha [ g( op( B )' ) \times op( A ) ] + \beta C$

,

where op( X ) is one of

op( X ) = X or op( X ) = g( X' )

where alpha and beta are real scalars, C is an n-by-n symmetric matrix and A and B are an n-by-k matrices the first case and k-by-n matrices in the second case.

Parameters:

[in]	uplo	= PlasmaUpper: Upper triangle of C is stored; = PlasmaLower: Lower triangle of C is stored.
[in]	trans	Specifies whether the matrix A is transposed or ugate transposed: = PlasmaNoTrans: $C = \alpha [ op( A ) \times g( op( B )' )] + \alpha [ op( B ) \times g( op( A )' )] + \beta C$ = PlasmaTrans: $C = \alpha [ g( op( A )' ) \times op( B ) ] + \alpha [ g( op( B )' ) \times op( A ) ] + \beta C$
[in]	N	N specifies the order of the matrix C. N must be at least zero.
[in]	K	K specifies the number of columns of the A and B matrices with trans = PlasmaNoTrans. K specifies the number of rows of the A and B matrices with trans = PlasmaTrans.
[in]	alpha	alpha specifies the scalar alpha.
[in]	A	A is a LDA-by-ka matrix, where ka is K when trans = PlasmaNoTrans, and is N otherwise.
[in]	LDA	The leading dimension of the array A. LDA must be at least max( 1, N ), otherwise LDA must be at least max( 1, K ).
[in]	B	B is a LDB-by-kb matrix, where kb is K when trans = PlasmaNoTrans, and is N otherwise.
[in]	LDB	The leading dimension of the array B. LDB must be at least max( 1, N ), otherwise LDB must be at least max( 1, K ).
[in]	beta	beta specifies the scalar beta.
[in,out]	C	C is a LDC-by-N matrix. On exit, the array uplo part of the matrix is overwritten by the uplo part of the updated matrix.
[in]	LDC	The leading dimension of the array C. LDC >= max( 1, N ).

Returns:

Return values:

PLASMA_SUCCESS successful exit

See also:: PLASMA_dsyr2k_Tile; PLASMA_csyr2k; PLASMA_dsyr2k; PLASMA_ssyr2k

Definition at line 96 of file dsyr2k.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, PLASMA_dsyr2k_Tile_Async(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DSYRK, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLower, PlasmaNoTrans, PlasmaTrans, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int Am, An;
    int status;
    PLASMA_desc descA, descB, descC;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dsyr2k", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ((uplo != PlasmaUpper) && (uplo != PlasmaLower)) {
        plasma_error("PLASMA_dsyr2k", "illegal value of uplo");
        return -1;
    }
    if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans)) {
        plasma_error("PLASMA_dsyr2k", "illegal value of trans");
        return -2;
    }
    if ( trans == PlasmaNoTrans ) { 
        Am = N; An = K;
    } else {
        Am = K; An = N;
    }
    if (N < 0) {
        plasma_error("PLASMA_dsyr2k", "illegal value of N");
        return -3;
    }
    if (K < 0) {
        plasma_error("PLASMA_dsyr2k", "illegal value of K");
        return -4;
    }
    if (LDA < max(1, Am)) {
        plasma_error("PLASMA_dsyr2k", "illegal value of LDA");
        return -7;
    }
    if (LDB < max(1, Am)) {
        plasma_error("PLASMA_dsyr2k", "illegal value of LDB");
        return -9;
    }
    if (LDC < max(1, N)) {
        plasma_error("PLASMA_dsyr2k", "illegal value of LDC");
        return -12;
    }
    /* Quick return */
    if (N == 0 ||
        ((alpha == (double)0.0 || K == 0.0) && beta == (double)1.0))
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DSYRK, N, K, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dsyr2k", "plasma_tune() failed");
        return status;
    }
    /* Set MT & NT & KT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)) );
        plasma_dooplap2tile( descB, B, NB, NB, LDB, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
        plasma_dooplap2tile( descC, C, NB, NB, LDC, N,  0, 0, N,  N,  plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descC)));
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An );
        plasma_diplap2tile( descB, B, NB, NB, LDB, An, 0, 0, Am, An );
        plasma_diplap2tile( descC, C, NB, NB, LDC, N,  0, 0, N,  N );
    }
    /* Call the tile interface */
    PLASMA_dsyr2k_Tile_Async(uplo, trans, alpha, &descA, &descB, beta, &descC, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
        plasma_desc_mat_free(&descC);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, An );
        plasma_diptile2lap( descB, B, NB, NB, LDB, An );
        plasma_diptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dsyrk	(	PLASMA_enum	uplo,
		PLASMA_enum	trans,
		int	N,
		int	K,
		double	alpha,
		double *	A,
		int	LDA,
		double	beta,
		double *	C,
		int	LDC
	)

PLASMA_dsyrk - Performs one of the hermitian rank k operations

$C = \alpha [ op( A ) \times g( op( A )' )] + \beta C$

,

where op( X ) is one of

op( X ) = X or op( X ) = g( X' )

where alpha and beta are real scalars, C is an n-by-n hermitian matrix and A is an n-by-k matrix in the first case and a k-by-n matrix in the second case.

Parameters:

[in]	uplo	= PlasmaUpper: Upper triangle of C is stored; = PlasmaLower: Lower triangle of C is stored.
[in]	trans	Specifies whether the matrix A is transposed or ugate transposed: = PlasmaNoTrans: A is not transposed; = PlasmaTrans : A is transposed.
[in]	N	N specifies the order of the matrix C. N must be at least zero.
[in]	K	K specifies the number of columns of the matrix op( A ).
[in]	alpha	alpha specifies the scalar alpha.
[in]	A	A is a LDA-by-ka matrix, where ka is K when trans = PlasmaNoTrans, and is N otherwise.
[in]	LDA	The leading dimension of the array A. LDA must be at least max( 1, N ), otherwise LDA must be at least max( 1, K ).
[in]	beta	beta specifies the scalar beta
[in,out]	C	C is a LDC-by-N matrix. On exit, the array uplo part of the matrix is overwritten by the uplo part of the updated matrix.
[in]	LDC	The leading dimension of the array C. LDC >= max( 1, N ).

Returns:

Return values:

PLASMA_SUCCESS successful exit

See also:: PLASMA_dsyrk_Tile; PLASMA_csyrk; PLASMA_dsyrk; PLASMA_ssyrk

Definition at line 85 of file dsyrk.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, PLASMA_dsyrk_Tile_Async(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DSYRK, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLower, PlasmaNoTrans, PlasmaTrans, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int Am, An;
    int status;
    PLASMA_desc descA, descC;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dsyrk", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ((uplo != PlasmaUpper) && (uplo != PlasmaLower)) {
        plasma_error("PLASMA_dsyrk", "illegal value of uplo");
        return -1;
    }
    if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans)) {
        plasma_error("PLASMA_dsyrk", "illegal value of trans");
        return -2;
    }
    if ( trans == PlasmaNoTrans ) { 
        Am = N; An = K;
    } else {
        Am = K; An = N;
    }
    if (N < 0) {
        plasma_error("PLASMA_dsyrk", "illegal value of N");
        return -3;
    }
    if (K < 0) {
        plasma_error("PLASMA_dsyrk", "illegal value of K");
        return -4;
    }
    if (LDA < max(1, Am)) {
        plasma_error("PLASMA_dsyrk", "illegal value of LDA");
        return -7;
    }
    if (LDC < max(1, N)) {
        plasma_error("PLASMA_dsyrk", "illegal value of LDC");
        return -10;
    }
    /* Quick return */
    if (N == 0 ||
        ((alpha == (double)0.0 || K == 0.0) && beta == (double)1.0))
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_DSYRK, N, K, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dsyrk", "plasma_tune() failed");
        return status;
    }
    /* Set MT & NT & KT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)) );
        plasma_dooplap2tile( descC, C, NB, NB, LDC, N,  0, 0, N,  N,  plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descC)));
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An );
        plasma_diplap2tile( descC, C, NB, NB, LDC, N,  0, 0, N,  N );
    }
    /* Call the tile interface */
    PLASMA_dsyrk_Tile_Async(uplo, trans, alpha, &descA, beta, &descC, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descC);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, An );
        plasma_diptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dsytrd	(	PLASMA_enum	jobz,
		PLASMA_enum	uplo,
		int	N,
		double *	A,
		int	LDA,
		double *	D,
		double *	E,
		PLASMA_desc *	descT,
		double *	Q,
		int	LDQ
	)

PLASMA_dsytrd - reduces a complex Hermitian matrix A to real symmetric tridiagonal form S using a two-stage approach First stage: reduction to band tridiagonal form (unitary Q1); Second stage: reduction from band to tridiagonal form (unitary Q2). Let Q = Q1 * Q2 be the global unitary transformation; Q**T * A * Q = S. Not LAPACK compliant as A does not contain the T elements Note: Only PlasmaNoVec supported!

Parameters:

[in]	jobz	Intended usage: = PlasmaNoVec: computes eigenvalues only; = PlasmaVec: computes eigenvalues and eigenvectors. Note: Only PlasmaNoVec supported!
[in]	uplo	Specifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The order of the matrix A. N >= 0.
[in,out]	A	On entry, the symmetric (or Hermitian) matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, the lower triangle (if uplo = PlasmaLower) or the upper triangle (if uplo = PlasmaUpper) of A, including the diagonal, is destroyed.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[out]	D	On exit, the diagonal elements of the tridiagonal matrix: D(i) = A(i,i).
[out]	E	On exit, he off-diagonal elements of the tridiagonal matrix: E(i) = A(i,i+1) if uplo = PlasmaUpper, E(i) = A(i+1,i) if uplo = PlasmaLower.
[in,out]	descT	On entry, descriptor as return by PLASMA_Alloc_Workspace_dsyev On exit, contains auxiliary factorization data.
[out]	Q	On exit, if jobz = PlasmaVec and info = 0, the eigenvectors.
[in]	LDQ	The leading dimension of the array Q. LDQ >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if INFO = i, the algorithm failed to converge; i off-diagonal elements of an intermediate tridiagonal form did not converge to zero.

See also:: PLASMA_dsytrd_Tile; PLASMA_dsytrd_Tile_Async; PLASMA_chetrd; PLASMA_dsytrd; PLASMA_ssytrd

Definition at line 100 of file dsytrd.c.

References plasma_desc_t::m, max, plasma_desc_t::n, plasma_context_self(), plasma_desc_check(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, PLASMA_dsytrd_Tile_Async(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), PLASMA_FUNC_DSYTRD, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLower, PlasmaNoVec, PlasmaUpper, PlasmaVec, and plasma_sequence_t::status.

{
    int NB, IB, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descQ;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_error("PLASMA_dsytrd", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Tune NB & IB depending on N; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DSYTRD, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dsytrd", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB = PLASMA_NB;
    IB = PLASMA_IB;
    NT = (N%NB==0) ? (N/NB) : (N/NB+1);
    /* Check input arguments */
    if (jobz != PlasmaNoVec && jobz != PlasmaVec) {
        plasma_error("PLASMA_dsytrd", "illegal value of jobz");
        return -1;
    }
    if (uplo != PlasmaLower && uplo != PlasmaUpper) {
        plasma_error("PLASMA_dsytrd", "illegal value of uplo");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_dsytrd", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_dsytrd", "illegal value of LDA");
        return -5;
    }
    if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) || 
         ( descT->m != NT*IB ) || (descT->n != NT*NB) ) {
        plasma_error("PLASMA_dsytrd", "invalid T descriptor");
        return -8;
    }
    if (LDQ < max(1, N)) {
        plasma_error("PLASMA_dsytrd", "illegal value of LDQ");
        return -10;
    }
    /* Quick return */
    if (N == 0)
        return PLASMA_SUCCESS;
    if (jobz == PlasmaVec) {
        plasma_error("PLASMA_dsytrd", "computing the eigenvectors is not supported in this version");
        return -1;
    }
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        if (jobz == PlasmaVec) {
           plasma_dooplap2tile( descQ, Q, NB, NB, LDQ, N,    0, 0, N, N   , plasma_desc_mat_free(&(descQ)) );
        }
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        if (jobz == PlasmaVec)
           plasma_diplap2tile( descQ, Q, NB, NB, LDQ, N,    0, 0, N, N   );
    }
    /* Call the tile interface */
    PLASMA_dsytrd_Tile_Async(jobz, uplo, &descA, D, E, descT, &descQ, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descA, A, NB, NB, LDA, N    );
        if (jobz == PlasmaVec) {
           plasma_dooptile2lap( descQ, Q, NB, NB, LDQ, N    );
        }
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        if (jobz == PlasmaVec)
           plasma_desc_mat_free(&descQ);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, N    );
        if (jobz == PlasmaVec)
           plasma_diptile2lap( descQ, Q, NB, NB, LDQ, N    );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dTile_to_Lapack	(	PLASMA_desc *	A,
		double *	Af77,
		int	LDA
	)

PLASMA_Tile_to_Lapack - Conversion from tile layout to LAPACK layout.

Parameters:

[in]	A	Descriptor of the PLASMA matrix in tile layout.
[in,out]	Af77	LAPACK matrix. If PLASMA_TRANSLATION_MODE is set to PLASMA_INPLACE, Af77 has to be A->mat, else if PLASMA_TRANSLATION_MODE is set to PLASMA_OUTOFPLACE, Af77 has to be allocated before.
[in]	LDA	The leading dimension of the matrix Af77.

Returns:

Return values:

PLASMA_SUCCESS successful exit

See also:: PLASMA_dTile_to_Lapack_Async; PLASMA_dLapack_to_Tile; PLASMA_cTile_to_Lapack; PLASMA_dTile_to_Lapack; PLASMA_sTile_to_Lapack

Definition at line 191 of file dtile.c.

References A, plasma_context_self(), plasma_desc_check(), plasma_dynamic_sync, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), plasma_pdtile_to_lapack(), plasma_sequence_create(), plasma_sequence_destroy(), plasma_static_call_5, PLASMA_SUCCESS, and plasma_sequence_t::status.

{
    PLASMA_desc descA = *A;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request;
    int status;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dTile_to_Lapack", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check descriptor for correctness */
    if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dTile_to_Lapack", "invalid descriptor");
        return PLASMA_ERR_ILLEGAL_VALUE;
    }
    plasma_sequence_create(plasma, &sequence);
    plasma_static_call_5(
        plasma_pdtile_to_lapack,
        PLASMA_desc, descA,
        double*, Af77,
        int, LDA,
        PLASMA_sequence*, sequence,
        PLASMA_request*, &request);
    plasma_dynamic_sync();
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dtrmm	(	PLASMA_enum	side,
		PLASMA_enum	uplo,
		PLASMA_enum	transA,
		PLASMA_enum	diag,
		int	N,
		int	NRHS,
		double	alpha,
		double *	A,
		int	LDA,
		double *	B,
		int	LDB
	)

PLASMA_dtrmm - Computes B = alpha*op( A )*B or B = alpha*B*op( A ).

Parameters:

[in]	side	Specifies whether A appears on the left or on the right of X: = PlasmaLeft: AX = B = PlasmaRight: XA = B
[in]	uplo	Specifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	transA	Specifies whether the matrix A is transposed, not transposed or ugate transposed: = PlasmaNoTrans: A is transposed; = PlasmaTrans: A is not transposed; = PlasmaTrans: A is ugate transposed.
[in]	diag	Specifies whether or not A is unit triangular: = PlasmaNonUnit: A is non unit; = PlasmaUnit: A us unit.
[in]	N	The order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]	alpha	alpha specifies the scalar alpha.
[in]	A	The triangular matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of the array A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of the array A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If diag = PlasmaUnit, the diagonal elements of A are also not referenced and are assumed to be 1.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in,out]	B	On entry, the N-by-NRHS right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_dtrmm_Tile; PLASMA_dtrmm_Tile_Async; PLASMA_ctrmm; PLASMA_dtrmm; PLASMA_strmm

Definition at line 88 of file dtrmm.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, PLASMA_dtrmm_Tile_Async(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLeft, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaRight, PlasmaTrans, PlasmaUnit, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB, NA;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dtrmm", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (side != PlasmaLeft && side != PlasmaRight) {
        plasma_error("PLASMA_dtrmm", "illegal value of side");
        return -1;
    }
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_dtrmm", "illegal value of uplo");
        return -2;
    }
    if (transA != PlasmaTrans && transA != PlasmaNoTrans && transA != PlasmaTrans ) {
        plasma_error("PLASMA_dtrmm", "illegal value of transA");
        return -3;
    }
    if (diag != PlasmaUnit && diag != PlasmaNonUnit) {
        plasma_error("PLASMA_dtrmm", "illegal value of diag");
        return -4;
    }
    if (N < 0) {
        plasma_error("PLASMA_dtrmm", "illegal value of N");
        return -5;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_dtrmm", "illegal value of NRHS");
        return -6;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_dtrmm", "illegal value of LDA");
        return -8;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_dtrmm", "illegal value of LDB");
        return -10;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DPOSV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dtrmm", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB = PLASMA_NB;
    if (side == PlasmaLeft) {
      NA = N;
    } else {
      NA = NRHS;
    }
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, NA,   0, 0, NA, NA,   plasma_desc_mat_free(&(descA)) );
        plasma_dooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N,  NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, NA,   0, 0, NA, NA  );
        plasma_diplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N,  NRHS);
    }
    /* Call the tile interface */
    PLASMA_dtrmm_Tile_Async(
        side, uplo, transA, diag, alpha, &descA, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, NA   );
        plasma_diptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dtrsm	(	PLASMA_enum	side,
		PLASMA_enum	uplo,
		PLASMA_enum	transA,
		PLASMA_enum	diag,
		int	N,
		int	NRHS,
		double	alpha,
		double *	A,
		int	LDA,
		double *	B,
		int	LDB
	)

PLASMA_dtrsm - Computes triangular solve A*X = B or X*A = B.

Parameters:

[in]	side	Specifies whether A appears on the left or on the right of X: = PlasmaLeft: AX = B = PlasmaRight: XA = B
[in]	uplo	Specifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	transA	Specifies whether the matrix A is transposed, not transposed or ugate transposed: = PlasmaNoTrans: A is transposed; = PlasmaTrans: A is not transposed; = PlasmaTrans: A is ugate transposed.
[in]	diag	Specifies whether or not A is unit triangular: = PlasmaNonUnit: A is non unit; = PlasmaUnit: A us unit.
[in]	N	The order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]	alpha	alpha specifies the scalar alpha.
[in]	A	The triangular matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of the array A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of the array A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If diag = PlasmaUnit, the diagonal elements of A are also not referenced and are assumed to be 1.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in,out]	B	On entry, the N-by-NRHS right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_dtrsm_Tile; PLASMA_dtrsm_Tile_Async; PLASMA_ctrsm; PLASMA_dtrsm; PLASMA_strsm

Definition at line 88 of file dtrsm.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, PLASMA_dtrsm_Tile_Async(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLeft, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaRight, PlasmaTrans, PlasmaUnit, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB, NA;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dtrsm", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (side != PlasmaLeft && side != PlasmaRight) {
        plasma_error("PLASMA_dtrsm", "illegal value of side");
        return -1;
    }
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_dtrsm", "illegal value of uplo");
        return -2;
    }
    if (transA != PlasmaTrans && transA != PlasmaNoTrans && transA != PlasmaTrans ) {
        plasma_error("PLASMA_dtrsm", "illegal value of transA");
        return -3;
    }
    if (diag != PlasmaUnit && diag != PlasmaNonUnit) {
        plasma_error("PLASMA_dtrsm", "illegal value of diag");
        return -4;
    }
    if (N < 0) {
        plasma_error("PLASMA_dtrsm", "illegal value of N");
        return -5;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_dtrsm", "illegal value of NRHS");
        return -6;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_dtrsm", "illegal value of LDA");
        return -8;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_dtrsm", "illegal value of LDB");
        return -10;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DPOSV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dtrsm", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB = PLASMA_NB;
    if (side == PlasmaLeft) {
      NA = N;
    } else {
      NA = NRHS;
    }
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, NA,   0, 0, NA, NA,   plasma_desc_mat_free(&(descA)) );
        plasma_dooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N,  NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, NA,   0, 0, NA, NA  );
        plasma_diplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N,  NRHS);
    }
    /* Call the tile interface */
    PLASMA_dtrsm_Tile_Async(
        side, uplo, transA, diag, alpha, &descA, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, NA   );
        plasma_diptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dtrsmpl	(	int	N,
		int	NRHS,
		double *	A,
		int	LDA,
		double *	L,
		int *	IPIV,
		double *	B,
		int	LDB
	)

PLASMA_dtrsmpl - Performs the forward substitution step of solving a system of linear equations after the tile LU factorization of the matrix.

Parameters:

[in]	N	The order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]	A	The tile factor L from the factorization, computed by PLASMA_dgetrf_incpiv.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in]	L	Auxiliary factorization data, related to the tile L factor, computed by PLASMA_dgetrf_incpiv.
[in]	IPIV	The pivot indices from PLASMA_dgetrf_incpiv (not equivalent to LAPACK).
[in,out]	B	On entry, the N-by-NRHS right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_dtrsmpl_Tile; PLASMA_dtrsmpl_Tile_Async; PLASMA_ctrsmpl; PLASMA_dtrsmpl; PLASMA_strsmpl; PLASMA_dgetrf_incpiv

Definition at line 67 of file dtrsmpl.c.

References L, plasma_desc_t::mat, max, min, plasma_context_self(), plasma_desc_init(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, PLASMA_dtrsmpl_Tile_Async(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DGESV, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaRealDouble, and plasma_sequence_t::status.

{
    int NB, IB, IBNB, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB, descL;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dtrsmpl", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (N < 0) {
        plasma_error("PLASMA_dtrsmpl", "illegal value of N");
        return -1;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_dtrsmpl", "illegal value of NRHS");
        return -2;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_dtrsmpl", "illegal value of LDA");
        return -4;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_dtrsmpl", "illegal value of LDB");
        return -8;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DGESV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dtrsmpl", "plasma_tune() failed");
        return status;
    }
    /* Set Mt, NT & NTRHS */
    NB    = PLASMA_NB;
    IB    = PLASMA_IB;
    IBNB  = IB*NB;
    NT    = (N%NB==0) ? (N/NB) : (N/NB+1);
    plasma_sequence_create(plasma, &sequence);
    descL = plasma_desc_init(
        PlasmaRealDouble,
        IB, NB, IBNB,
        NT*IB, NT*NB, 0, 0, NT*IB, NT*NB);
    descL.mat = L;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        plasma_dooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        plasma_diplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
    }
    /* Call the tile interface */
    PLASMA_dtrsmpl_Tile_Async(&descA, &descL, IPIV, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_diptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dtrsmrv	(	PLASMA_enum	side,
		PLASMA_enum	uplo,
		PLASMA_enum	transA,
		PLASMA_enum	diag,
		int	N,
		int	NRHS,
		double	alpha,
		double *	A,
		int	LDA,
		double *	B,
		int	LDB
	)

PLASMA_dtrsmrv - Computes triangular solve A*X = B or X*A = B.

Parameters:

[in]	side	Specifies whether A appears on the left or on the right of X: = PlasmaLeft: AX = B = PlasmaRight: XA = B
[in]	uplo	Specifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	transA	Specifies whether the matrix A is transposed, not transposed or ugate transposed: = PlasmaNoTrans: A is transposed; = PlasmaTrans: A is not transposed; = PlasmaTrans: A is ugate transposed.
[in]	diag	Specifies whether or not A is unit triangular: = PlasmaNonUnit: A is non unit; = PlasmaUnit: A us unit.
[in]	N	The order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]	alpha	alpha specifies the scalar alpha.
[in]	A	The triangular matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of the array A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of the array A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If diag = PlasmaUnit, the diagonal elements of A are also not referenced and are assumed to be 1.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in,out]	B	On entry, the N-by-NRHS right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_dtrsmrv_Tile; PLASMA_dtrsmrv_Tile_Async; PLASMA_ctrsmrv; PLASMA_dtrsmrv; PLASMA_strsmrv

Definition at line 88 of file dtrsmrv.c.

References max, min, plasma_context_self(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, PLASMA_dtrsmrv_Tile_Async(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLeft, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaRight, PlasmaTrans, PlasmaUnit, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB, NA;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dtrsmrv", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (side != PlasmaLeft && side != PlasmaRight) {
        plasma_error("PLASMA_dtrsmrv", "illegal value of side");
        return -1;
    }
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_dtrsmrv", "illegal value of uplo");
        return -2;
    }
    if (transA != PlasmaTrans && transA != PlasmaNoTrans && transA != PlasmaTrans ) {
        plasma_error("PLASMA_dtrsmrv", "illegal value of transA");
        return -3;
    }
    if (diag != PlasmaUnit && diag != PlasmaNonUnit) {
        plasma_error("PLASMA_dtrsmrv", "illegal value of diag");
        return -4;
    }
    if (N < 0) {
        plasma_error("PLASMA_dtrsmrv", "illegal value of N");
        return -5;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_dtrsmrv", "illegal value of NRHS");
        return -6;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_dtrsmrv", "illegal value of LDA");
        return -8;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_dtrsmrv", "illegal value of LDB");
        return -10;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DPOSV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dtrsmrv", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB = PLASMA_NB;
    if (side == PlasmaLeft) {
      NA = N;
    } else {
      NA = NRHS;
    }
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, NA,   0, 0, NA, NA,   plasma_desc_mat_free(&(descA)) );
        plasma_dooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N,  NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_diplap2tile( descA, A, NB, NB, LDA, NA,   0, 0, NA, NA  );
        plasma_diplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N,  NRHS);
    }
    /* Call the tile interface */
    PLASMA_dtrsmrv_Tile_Async(
        side, uplo, transA, diag, alpha, &descA, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, NA   );
        plasma_diptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

int PLASMA_dtrtri	(	PLASMA_enum	uplo,
		PLASMA_enum	diag,
		int	N,
		double *	A,
		int	LDA
	)

PLASMA_dtrtri - Computes the inverse of a complex upper or lower triangular matrix A.

Parameters:

[in]	uplo	= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	diag	= PlasmaNonUnit: A is non-unit triangular; = PlasmaUnit: A is unit triangular.
[in]	N	The order of the matrix A. N >= 0.
[in,out]	A	On entry, the triangular matrix A. If UPLO = 'U', the leading N-by-N upper triangular part of the array A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of the array A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If DIAG = 'U', the diagonal elements of A are also not referenced and are assumed to be 1. On exit, the (triangular) inverse of the original matrix, in the same storage format.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, A(i,i) is exactly zero. The triangular matrix is singular and its inverse can not be computed.

See also:: PLASMA_dtrtri_Tile; PLASMA_dtrtri_Tile_Async; PLASMA_ctrtri; PLASMA_dtrtri; PLASMA_strtri; PLASMA_dpotri

Definition at line 70 of file dtrtri.c.

References max, plasma_context_self(), plasma_desc_mat_free(), plasma_diplap2tile, plasma_diptile2lap, plasma_dooplap2tile, plasma_dooptile2lap, PLASMA_dtrtri_Tile_Async(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_DPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLower, PlasmaNonUnit, PlasmaUnit, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dtrtri", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_dtrtri", "illegal value of uplo");
        return -1;
    }
    if (diag != PlasmaUnit && diag != PlasmaNonUnit) {
        plasma_error("PLASMA_dtrtri", "illegal value of diag");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_dtrtri", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_dtrtri", "illegal value of LDA");
        return -5;
    }
    /* Quick return */
    if (max(N, 0) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DPOSV, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dtrtri", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB   = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_diplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, N, N);
    }
    /* Call the tile interface */
    PLASMA_dtrtri_Tile_Async(uplo, diag, &descA, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_diptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Functions

Detailed Description

Function Documentation

trans = PlasmaNoTrans and M >= N: find the least squares solution of an overdetermined

trans = PlasmaNoTrans and M < N: find the minimum norm solution of an underdetermined

trans = PlasmaNoTrans and M >= N: find the least squares solution of an overdetermined

trans = PlasmaNoTrans and M < N: find the minimum norm solution of an underdetermined