Functions
int	PLASMA_cgebrd (PLASMA_enum jobu, PLASMA_enum jobvt, int M, int N, PLASMA_Complex32_t A, int LDA, float D, float E, PLASMA_Complex32_t U, int LDU, PLASMA_Complex32_t VT, int LDVT, PLASMA_desc descT)
int	PLASMA_cgelqf (int M, int N, PLASMA_Complex32_t A, int LDA, PLASMA_Complex32_t T)
int	PLASMA_cgelqs (int M, int N, int NRHS, PLASMA_Complex32_t A, int LDA, PLASMA_Complex32_t T, PLASMA_Complex32_t *B, int LDB)
int	PLASMA_cgels (PLASMA_enum trans, int M, int N, int NRHS, PLASMA_Complex32_t A, int LDA, PLASMA_Complex32_t T, PLASMA_Complex32_t *B, int LDB)
int	PLASMA_cgemm (PLASMA_enum transA, PLASMA_enum transB, int M, int N, int K, PLASMA_Complex32_t alpha, PLASMA_Complex32_t A, int LDA, PLASMA_Complex32_t B, int LDB, PLASMA_Complex32_t beta, PLASMA_Complex32_t *C, int LDC)
int	PLASMA_cgeqrf (int M, int N, PLASMA_Complex32_t A, int LDA, PLASMA_Complex32_t T)
int	PLASMA_cgeqrs (int M, int N, int NRHS, PLASMA_Complex32_t A, int LDA, PLASMA_Complex32_t T, PLASMA_Complex32_t *B, int LDB)
int	PLASMA_cgesv (int N, int NRHS, PLASMA_Complex32_t A, int LDA, int IPIV, PLASMA_Complex32_t *B, int LDB)
int	PLASMA_cgesv_incpiv (int N, int NRHS, PLASMA_Complex32_t A, int LDA, PLASMA_Complex32_t L, int IPIV, PLASMA_Complex32_t B, int LDB)
int	PLASMA_cgesvd (PLASMA_enum jobu, PLASMA_enum jobvt, int M, int N, PLASMA_Complex32_t A, int LDA, float S, PLASMA_Complex32_t U, int LDU, PLASMA_Complex32_t VT, int LDVT, PLASMA_desc *descT)
int	PLASMA_cgetrf (int M, int N, PLASMA_Complex32_t A, int LDA, int IPIV)
int	PLASMA_cgetrf_incpiv (int M, int N, PLASMA_Complex32_t A, int LDA, PLASMA_Complex32_t L, int *IPIV)
int	PLASMA_cgetri (int N, PLASMA_Complex32_t A, int LDA, int IPIV)
int	PLASMA_cgetrs (PLASMA_enum trans, int N, int NRHS, PLASMA_Complex32_t A, int LDA, int IPIV, PLASMA_Complex32_t *B, int LDB)
int	PLASMA_cgetrs_incpiv (PLASMA_enum trans, int N, int NRHS, PLASMA_Complex32_t A, int LDA, PLASMA_Complex32_t L, int IPIV, PLASMA_Complex32_t B, int LDB)
int	PLASMA_cheev (PLASMA_enum jobz, PLASMA_enum uplo, int N, PLASMA_Complex32_t A, int LDA, float W, PLASMA_desc descT, PLASMA_Complex32_t Q, int LDQ)
int	PLASMA_chegst (PLASMA_enum itype, PLASMA_enum uplo, int N, PLASMA_Complex32_t A, int LDA, PLASMA_Complex32_t B, int LDB)
int	PLASMA_chegv (PLASMA_enum itype, PLASMA_enum jobz, PLASMA_enum uplo, int N, PLASMA_Complex32_t A, int LDA, PLASMA_Complex32_t B, int LDB, float W, PLASMA_desc descT, PLASMA_Complex32_t *Q, int LDQ)
int	PLASMA_chemm (PLASMA_enum side, PLASMA_enum uplo, int M, int N, PLASMA_Complex32_t alpha, PLASMA_Complex32_t A, int LDA, PLASMA_Complex32_t B, int LDB, PLASMA_Complex32_t beta, PLASMA_Complex32_t *C, int LDC)
int	PLASMA_cher2k (PLASMA_enum uplo, PLASMA_enum trans, int N, int K, PLASMA_Complex32_t alpha, PLASMA_Complex32_t A, int LDA, PLASMA_Complex32_t B, int LDB, float beta, PLASMA_Complex32_t *C, int LDC)
int	PLASMA_cherk (PLASMA_enum uplo, PLASMA_enum trans, int N, int K, float alpha, PLASMA_Complex32_t A, int LDA, float beta, PLASMA_Complex32_t C, int LDC)
int	PLASMA_chetrd (PLASMA_enum jobz, PLASMA_enum uplo, int N, PLASMA_Complex32_t A, int LDA, float D, float E, PLASMA_desc descT, PLASMA_Complex32_t *Q, int LDQ)
int	PLASMA_clacpy (PLASMA_enum uplo, int M, int N, PLASMA_Complex32_t A, int LDA, PLASMA_Complex32_t B, int LDB)
float	PLASMA_clange (PLASMA_enum norm, int M, int N, PLASMA_Complex32_t A, int LDA, float work)
float	PLASMA_clanhe (PLASMA_enum norm, PLASMA_enum uplo, int N, PLASMA_Complex32_t A, int LDA, float work)
float	PLASMA_clansy (PLASMA_enum norm, PLASMA_enum uplo, int N, PLASMA_Complex32_t A, int LDA, float work)
int	PLASMA_claset (PLASMA_enum uplo, int M, int N, PLASMA_Complex32_t alpha, PLASMA_Complex32_t beta, PLASMA_Complex32_t *A, int LDA)
int	PLASMA_claswp (int N, PLASMA_Complex32_t A, int LDA, int K1, int K2, int IPIV, int INCX)
int	PLASMA_claswpc (int N, PLASMA_Complex32_t A, int LDA, int K1, int K2, int IPIV, int INCX)
int	PLASMA_clauum (PLASMA_enum uplo, int N, PLASMA_Complex32_t *A, int LDA)
int	PLASMA_cplghe (float bump, int N, PLASMA_Complex32_t *A, int LDA, unsigned long long int seed)
int	PLASMA_cplgsy (PLASMA_Complex32_t bump, int N, PLASMA_Complex32_t *A, int LDA, unsigned long long int seed)
int	PLASMA_cplrnt (int M, int N, PLASMA_Complex32_t *A, int LDA, unsigned long long int seed)
int	PLASMA_cposv (PLASMA_enum uplo, int N, int NRHS, PLASMA_Complex32_t A, int LDA, PLASMA_Complex32_t B, int LDB)
int	PLASMA_cpotrf (PLASMA_enum uplo, int N, PLASMA_Complex32_t *A, int LDA)
int	PLASMA_cpotri (PLASMA_enum uplo, int N, PLASMA_Complex32_t *A, int LDA)
int	PLASMA_cpotrs (PLASMA_enum uplo, int N, int NRHS, PLASMA_Complex32_t A, int LDA, PLASMA_Complex32_t B, int LDB)
int	PLASMA_csymm (PLASMA_enum side, PLASMA_enum uplo, int M, int N, PLASMA_Complex32_t alpha, PLASMA_Complex32_t A, int LDA, PLASMA_Complex32_t B, int LDB, PLASMA_Complex32_t beta, PLASMA_Complex32_t *C, int LDC)
int	PLASMA_csyr2k (PLASMA_enum uplo, PLASMA_enum trans, int N, int K, PLASMA_Complex32_t alpha, PLASMA_Complex32_t A, int LDA, PLASMA_Complex32_t B, int LDB, PLASMA_Complex32_t beta, PLASMA_Complex32_t *C, int LDC)
int	PLASMA_csyrk (PLASMA_enum uplo, PLASMA_enum trans, int N, int K, PLASMA_Complex32_t alpha, PLASMA_Complex32_t A, int LDA, PLASMA_Complex32_t beta, PLASMA_Complex32_t C, int LDC)
int	PLASMA_ctrmm (PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum transA, PLASMA_enum diag, int N, int NRHS, PLASMA_Complex32_t alpha, PLASMA_Complex32_t A, int LDA, PLASMA_Complex32_t B, int LDB)
int	PLASMA_ctrsm (PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum transA, PLASMA_enum diag, int N, int NRHS, PLASMA_Complex32_t alpha, PLASMA_Complex32_t A, int LDA, PLASMA_Complex32_t B, int LDB)
int	PLASMA_ctrsmpl (int N, int NRHS, PLASMA_Complex32_t A, int LDA, PLASMA_Complex32_t L, int IPIV, PLASMA_Complex32_t B, int LDB)
int	PLASMA_ctrsmrv (PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum transA, PLASMA_enum diag, int N, int NRHS, PLASMA_Complex32_t alpha, PLASMA_Complex32_t A, int LDA, PLASMA_Complex32_t B, int LDB)
int	PLASMA_ctrtri (PLASMA_enum uplo, PLASMA_enum diag, int N, PLASMA_Complex32_t *A, int LDA)
int	PLASMA_cunglq (int M, int N, int K, PLASMA_Complex32_t A, int LDA, PLASMA_Complex32_t T, PLASMA_Complex32_t *Q, int LDQ)
int	PLASMA_cungqr (int M, int N, int K, PLASMA_Complex32_t A, int LDA, PLASMA_Complex32_t T, PLASMA_Complex32_t *Q, int LDQ)
int	PLASMA_cunmlq (PLASMA_enum side, PLASMA_enum trans, int M, int N, int K, PLASMA_Complex32_t A, int LDA, PLASMA_Complex32_t T, PLASMA_Complex32_t *B, int LDB)
int	PLASMA_cunmqr (PLASMA_enum side, PLASMA_enum trans, int M, int N, int K, PLASMA_Complex32_t A, int LDA, PLASMA_Complex32_t T, PLASMA_Complex32_t *B, int LDB)
int	PLASMA_cLapack_to_Tile (PLASMA_Complex32_t Af77, int LDA, PLASMA_desc A)
int	PLASMA_cTile_to_Lapack (PLASMA_desc A, PLASMA_Complex32_t Af77, int LDA)

Detailed Description

This is the group of single complex functions using the simple user interface.

Function Documentation

int PLASMA_cgebrd	(	PLASMA_enum	jobu,
		PLASMA_enum	jobvt,
		int	M,
		int	N,
		PLASMA_Complex32_t *	A,
		int	LDA,
		float *	D,
		float *	E,
		PLASMA_Complex32_t *	U,
		int	LDU,
		PLASMA_Complex32_t *	VT,
		int	LDVT,
		PLASMA_desc *	descT
	)

PLASMA_cgebrd - computes the singular value decomposition (SVD) of a complex M-by-N matrix A, optionally computing the left and/or right singular vectors. The SVD is written

 A = U * SIGMA * transpose(V)

where SIGMA is an M-by-N matrix which is zero except for its min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA are the singular values of A; they are real and non-negative, and are returned in descending order. The first min(m,n) columns of U and V are the left and right singular vectors of A.

Note that the routine returns V**T, not V. Not LAPACK Compliant for now! Note: Only PlasmaNoVec supported!

Parameters:

[in]	jobu	Specifies options for computing all or part of the matrix U. Intended usage: = PlasmaVec: all M columns of U are returned in array U; = PlasmaNoVec: no columns of U (no left singular vectors) are computed. Note: Only PlasmaNoVec supported!
[in]	jobvt	Specifies options for computing all or part of the matrix V**H. Intended usage: = PlasmaVec: all M columns of U are returned in array U; = PlasmaNoVec: no columns of U (no left singular vectors) are computed. Note: Only PlasmaNoVec supported!
[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in,out]	A	On entry, the M-by-N matrix A. On exit, if JOBU = 'O', A is overwritten with the first min(m,n) columns of U (the left singular vectors, stored columnwise); if JOBVT = 'O', A is overwritten with the first min(m,n) rows of V**H (the right singular vectors, stored rowwise); if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A are destroyed.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[out]	S	The real singular values of A, sorted so that S(i) >= S(i+1).
[out]	U	(LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. If JOBU = 'A', U contains the M-by-M unitary matrix U; if JOBU = 'S', U contains the first min(m,n) columns of U (the left singular vectors, stored columnwise); if JOBU = 'N' or 'O', U is not referenced.
[in]	LDU	The leading dimension of the array U. LDU >= 1; if JOBU = 'S' or 'A', LDU >= M.
[out]	VT	If JOBVT = 'A', VT contains the N-by-N unitary matrix VH; if JOBVT = 'S', VT contains the first min(m,n) rows of VH (the right singular vectors, stored rowwise); if JOBVT = 'N' or 'O', VT is not referenced.
[in]	LDVT	The leading dimension of the array VT. LDVT >= 1; if JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
[in,out]	descT	On entry, descriptor as return by PLASMA_Alloc_Workspace_cgesvd On exit, contains auxiliary factorization data.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_cgebrd_Tile; PLASMA_cgebrd_Tile_Async; PLASMA_cgebrd; PLASMA_dgebrd; PLASMA_sgebrd

Definition at line 122 of file cgebrd.c.

References plasma_desc_t::m, max, min, plasma_desc_t::n, PLASMA_cgebrd_Tile_Async(), plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, plasma_desc_check(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CGEBRD, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaNoVec, PlasmaVec, and plasma_sequence_t::status.

{
    int NB, IB, MT, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descU, descVT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cgebrd", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    
    /* Tune NB & IB depending on M & N; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CGEBRD, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cgebrd", "plasma_tune() failed");
        return status;
    }
    /* Set MT, NT */
    NB = PLASMA_NB;
    IB = PLASMA_IB;
    MT = (M%NB==0) ? (M/NB) : (M/NB+1);
    NT = (N%NB==0) ? (N/NB) : (N/NB+1);
    /* Check input arguments */
    if (jobu != PlasmaNoVec  && jobu !=PlasmaVec) {
        plasma_error("PLASMA_cgebrd", "illegal value of jobu");
        return -1;
    }
    if (jobvt != PlasmaNoVec && jobvt != PlasmaVec) {
        plasma_error("PLASMA_cgebrd", "illegal value of jobvt");
        return -2;
    }
    if (M < 0) {
        plasma_error("PLASMA_cgebrd", "illegal value of M");
        return -3;
    }
    if (N < 0) {
        plasma_error("PLASMA_cgebrd", "illegal value of N");
        return -4;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_cgebrd", "illegal value of LDA");
        return -6;
    }
    if (LDU < 1) {
        plasma_error("PLASMA_cgebrd", "illegal value of LDU");
        return -9;
    }
    if (LDVT < 1) {
        plasma_error("PLASMA_cgebrd", "illegal value of LDVT");
        return -11;
    }
    if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) || 
         ( descT->m != MT*IB ) || (descT->n != NT*NB) ) {
        plasma_error("PLASMA_cgebrd", "invalid T descriptor");
        return -12;
    }
    /* Quick return */
    if (min(M, N) == 0) {
        return PLASMA_SUCCESS;
    }
    if (jobu == PlasmaVec) {
        plasma_error("PLASMA_cgebrd", "computing the singular vectors is not supported in this version");
        return -1;
    }
    if (jobvt == PlasmaVec) {
        plasma_error("PLASMA_cgebrd", "computing the singular vectors is not supported in this version");
        return -2;
    }
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA,   A, NB, NB,  LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
        if (jobu == PlasmaVec){
            plasma_cooplap2tile( descU,   U, NB, NB,  LDU, M, 0, 0, M, M, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descU)));
        }
        if (jobvt == PlasmaVec){
            plasma_cooplap2tile( descVT, VT, NB, NB, LDVT, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descU)); plasma_desc_mat_free(&(descVT)));
        }
    } else {
        plasma_ciplap2tile( descA,   A, NB, NB,  LDA, N, 0, 0, M, N);
        if (jobu == PlasmaVec){
            plasma_ciplap2tile( descU,   U, NB, NB,  LDU, M, 0, 0, M, M);
        }
        if (jobvt == PlasmaVec){
            plasma_ciplap2tile( descVT, VT, NB, NB, LDVT, N, 0, 0, N, N);
        }
    }
    /* Call the tile interface */
    PLASMA_cgebrd_Tile_Async(jobu, jobvt, &descA, D, E, &descU, &descVT, descT, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descA,   A, NB, NB,  LDA, N );
        if (jobu == PlasmaVec){
            plasma_cooptile2lap( descU,   U, NB, NB,  LDU, M );
        }
        if (jobvt == PlasmaVec){
            plasma_cooptile2lap( descVT, VT, NB, NB, LDVT, N );
        }
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        if (jobu == PlasmaVec){
            plasma_desc_mat_free(&descU);
        }
        if (jobvt == PlasmaVec){
            plasma_desc_mat_free(&descVT);
        }
    } else {
        plasma_ciptile2lap( descA,   A, NB, NB,  LDA, N );
        if (jobu == PlasmaVec){
            plasma_ciptile2lap( descU,   U, NB, NB,  LDU, M );
        }
        if (jobvt == PlasmaVec){
            plasma_ciptile2lap( descVT, VT, NB, NB, LDVT, N );
        }
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_cgelqf	(	int	M,
		int	N,
		PLASMA_Complex32_t *	A,
		int	LDA,
		PLASMA_Complex32_t *	T
	)

PLASMA_cgelqf - Computes the tile LQ factorization of a complex M-by-N matrix A: A = L * Q.

Parameters:

[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in,out]	A	On entry, the M-by-N matrix A. On exit, the elements on and below the diagonal of the array contain the m-by-min(M,N) lower trapezoidal matrix L (L is lower triangular if M <= N); the elements above the diagonal represent the unitary matrix Q as a product of elementary reflectors, stored by tiles.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[out]	T	On exit, auxiliary factorization data, required by PLASMA_cgelqs to solve the system of equations.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_cgelqf_Tile; PLASMA_cgelqf_Tile_Async; PLASMA_cgelqf; PLASMA_dgelqf; PLASMA_sgelqf; PLASMA_cgelqs

Definition at line 62 of file cgelqf.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, PLASMA_cgelqf_Tile_Async(), plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_CGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaComplexFloat, plasma_sequence_t::status, and T.

{
    int NB, IB, IBNB, MT, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cgelqf", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (M < 0) {
        plasma_error("PLASMA_cgelqf", "illegal value of M");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_cgelqf", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_cgelqf", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (min(M, N) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_CGELS, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cgelqf", "plasma_tune() failed");
        return status;
    }
    /* Set MT & NT */
    NB   = PLASMA_NB;
    IB   = PLASMA_IB;
    IBNB = IB*NB;
    MT   = (M%NB==0) ? (M/NB) : (M/NB+1);
    NT   = (N%NB==0) ? (N/NB) : (N/NB+1);
    plasma_sequence_create(plasma, &sequence);
     if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
        descT = plasma_desc_init(
            PlasmaComplexFloat,
            IB, NB, IBNB,
            MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
    }
    else {
        /* Double the size of T to accomodate the tree reduction phase */
        descT = plasma_desc_init(
            PlasmaComplexFloat,
            IB, NB, IBNB,
            MT*IB, 2*NT*NB, 0, 0, MT*IB, 2*NT*NB);
    }
    descT.mat = T;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N);
    }
    /* Call the tile interface */
    PLASMA_cgelqf_Tile_Async(&descA, &descT, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_cgelqs	(	int	M,
		int	N,
		int	NRHS,
		PLASMA_Complex32_t *	A,
		int	LDA,
		PLASMA_Complex32_t *	T,
		PLASMA_Complex32_t *	B,
		int	LDB
	)

PLASMA_cgelqs - Compute a minimum-norm solution min || A*X - B || using the LQ factorization A = L*Q computed by PLASMA_cgelqf.

Parameters:

[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= M >= 0.
[in]	NRHS	The number of columns of B. NRHS >= 0.
[in]	A	Details of the LQ factorization of the original matrix A as returned by PLASMA_cgelqf.
[in]	LDA	The leading dimension of the array A. LDA >= M.
[in]	T	Auxiliary factorization data, computed by PLASMA_cgelqf.
[in,out]	B	On entry, the M-by-NRHS right hand side matrix B. On exit, the N-by-NRHS solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= N.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_cgelqs_Tile; PLASMA_cgelqs_Tile_Async; PLASMA_cgelqs; PLASMA_dgelqs; PLASMA_sgelqs; PLASMA_cgelqf

Definition at line 67 of file cgelqs.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, PLASMA_cgelqs_Tile_Async(), plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_CGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaComplexFloat, plasma_sequence_t::status, and T.

{
    int NB, IB, IBNB, MT, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB, descT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cgelqs", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (M < 0) {
        plasma_error("PLASMA_cgelqs", "illegal value of M");
        return -1;
    }
    if (N < 0 || M > N) {
        plasma_error("PLASMA_cgelqs", "illegal value of N");
        return -2;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_cgelqs", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_cgelqs", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, max(1, N))) {
        plasma_error("PLASMA_cgelqs", "illegal value of LDB");
        return -8;
    }
    /* Quick return */
    if (min(M, min(N, NRHS)) == 0) {
        return PLASMA_SUCCESS;
    }
    /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_CGELS, M, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cgelqs", "plasma_tune() failed");
        return status;
    }
    /* Set MT, NT & NTRHS */
    NB    = PLASMA_NB;
    IB    = PLASMA_IB;
    IBNB  = IB*NB;
    MT    = (M%NB==0) ? (M/NB) : (M/NB+1);
    NT    = (N%NB==0) ? (N/NB) : (N/NB+1);
    plasma_sequence_create(plasma, &sequence);
    if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
        descT = plasma_desc_init(
            PlasmaComplexFloat,
            IB, NB, IBNB,
            MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
    }
    else {
        /* Double the size of T to accomodate the tree reduction phase */
        descT = plasma_desc_init(
            PlasmaComplexFloat,
            IB, NB, IBNB,
            MT*IB, 2*NT*NB, 0, 0, MT*IB, 2*NT*NB);
    }
    descT.mat = T;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, M, N   , plasma_desc_mat_free(&(descA)) );
        plasma_cooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, N,    0, 0, M, N   );
        plasma_ciplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
    }
    /* Call the tile interface */
    PLASMA_cgelqs_Tile_Async(&descA, &descT, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_cooptile2lap( descB, B, NB, NB, LDB, NRHS );
    plasma_dynamic_sync();
    plasma_desc_mat_free(&descA);
    plasma_desc_mat_free(&descB);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_ciptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_cgels	(	PLASMA_enum	trans,
		int	M,
		int	N,
		int	NRHS,
		PLASMA_Complex32_t *	A,
		int	LDA,
		PLASMA_Complex32_t *	T,
		PLASMA_Complex32_t *	B,
		int	LDB
	)

PLASMA_cgels - solves overdetermined or underdetermined linear systems involving an M-by-N matrix A using the QR or the LQ factorization of A. It is assumed that A has full rank. The following options are provided:

trans = PlasmaNoTrans and M >= N: find the least squares solution of an overdetermined

system, i.e., solve the least squares problem: minimize || B - A*X ||.

trans = PlasmaNoTrans and M < N: find the minimum norm solution of an underdetermined

system A * X = B.

Several right hand side vectors B and solution vectors X can be handled in a single call; they are stored as the columns of the M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix X.

Parameters:

[in]	trans	Intended usage: = PlasmaNoTrans: the linear system involves A; = PlasmaConjTrans: the linear system involves A**H. Currently only PlasmaNoTrans is supported.
[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrices B and X. NRHS >= 0.
[in,out]	A	On entry, the M-by-N matrix A. On exit, if M >= N, A is overwritten by details of its QR factorization as returned by PLASMA_cgeqrf; if M < N, A is overwritten by details of its LQ factorization as returned by PLASMA_cgelqf.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[out]	T	On exit, auxiliary factorization data.
[in,out]	B	On entry, the M-by-NRHS matrix B of right hand side vectors, stored columnwise; On exit, if return value = 0, B is overwritten by the solution vectors, stored columnwise: if M >= N, rows 1 to N of B contain the least squares solution vectors; the residual sum of squares for the solution in each column is given by the sum of squares of the modulus of elements N+1 to M in that column; if M < N, rows 1 to N of B contain the minimum norm solution vectors;
[in]	LDB	The leading dimension of the array B. LDB >= MAX(1,M,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_cgels_Tile; PLASMA_cgels_Tile_Async; PLASMA_cgels; PLASMA_dgels; PLASMA_sgels

Definition at line 94 of file cgels.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, PLASMA_cgels_Tile_Async(), plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_NOT_SUPPORTED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_CGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaComplexFloat, PlasmaNoTrans, plasma_sequence_t::status, and T.

{
    int i, j;
    int NB, IB, IBNB, MT, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB, descT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cgels", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (trans != PlasmaNoTrans) {
        plasma_error("PLASMA_cgels", "only PlasmaNoTrans supported");
        return PLASMA_ERR_NOT_SUPPORTED;
    }
    if (M < 0) {
        plasma_error("PLASMA_cgels", "illegal value of M");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_cgels", "illegal value of N");
        return -3;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_cgels", "illegal value of NRHS");
        return -4;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_cgels", "illegal value of LDA");
        return -6;
    }
    if (LDB < max(1, max(M, N))) {
        plasma_error("PLASMA_cgels", "illegal value of LDB");
        return -9;
    }
    /* Quick return */
    if (min(M, min(N, NRHS)) == 0) {
        for (i = 0; i < max(M, N); i++)
            for (j = 0; j < NRHS; j++)
                B[j*LDB+i] = 0.0;
        return PLASMA_SUCCESS;
    }
    /* Tune NB & IB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CGELS, M, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cgels", "plasma_tune() failed");
        return status;
    }
    /* Set MT, NT & NTRHS */
    NB    = PLASMA_NB;
    IB    = PLASMA_IB;
    IBNB  = IB*NB;
    NT    = (N%NB==0) ? (N/NB) : (N/NB+1);
    MT    = (M%NB==0) ? (M/NB) : (M/NB+1);
    plasma_sequence_create(plasma, &sequence);
    if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
        descT = plasma_desc_init(
            PlasmaComplexFloat,
            IB, NB, IBNB,
            MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
    }
    else {
        /* Double the size of T to accomodate the tree reduction phase */
        descT = plasma_desc_init(
            PlasmaComplexFloat,
            IB, NB, IBNB,
            MT*IB, 2*NT*NB, 0, 0, MT*IB, 2*NT*NB);
    }
    descT.mat = T;
    if ( M >= N ) {
        if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
            plasma_cooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, M, N   , plasma_desc_mat_free(&(descA)) );
            plasma_cooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, M, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
        } else {
            plasma_ciplap2tile( descA, A, NB, NB, LDA, N,    0, 0, M, N   );
            plasma_ciplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, M, NRHS);
        }
    } else {
        if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
            plasma_cooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, M, N   , plasma_desc_mat_free(&(descA)) );
            plasma_cooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
        } else {
            plasma_ciplap2tile( descA, A, NB, NB, LDA, N,    0, 0, M, N   );
            plasma_ciplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
        }
    }
    /* Call the tile interface */
    PLASMA_cgels_Tile_Async(PlasmaNoTrans, &descA, &descT, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_cooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_ciptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_cgemm	(	PLASMA_enum	transA,
		PLASMA_enum	transB,
		int	M,
		int	N,
		int	K,
		PLASMA_Complex32_t	alpha,
		PLASMA_Complex32_t *	A,
		int	LDA,
		PLASMA_Complex32_t *	B,
		int	LDB,
		PLASMA_Complex32_t	beta,
		PLASMA_Complex32_t *	C,
		int	LDC
	)

PLASMA_cgemm - Performs one of the matrix-matrix operations

$C = \alpha [op( A )\times op( B )] + \beta C$

,

where op( X ) is one of

op( X ) = X or op( X ) = X' or op( X ) = conjfg( X' )

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters:

[in]	transA	Specifies whether the matrix A is transposed, not transposed or conjfugate transposed: = PlasmaNoTrans: A is not transposed; = PlasmaTrans: A is transposed; = PlasmaConjTrans: A is conjfugate transposed.
[in]	transB	Specifies whether the matrix B is transposed, not transposed or conjfugate transposed: = PlasmaNoTrans: B is not transposed; = PlasmaTrans: B is transposed; = PlasmaConjTrans: B is conjfugate transposed.
[in]	M	M specifies the number of rows of the matrix op( A ) and of the matrix C. M >= 0.
[in]	N	N specifies the number of columns of the matrix op( B ) and of the matrix C. N >= 0.
[in]	K	K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K >= 0.
[in]	alpha	alpha specifies the scalar alpha
[in]	A	A is a LDA-by-ka matrix, where ka is K when transA = PlasmaNoTrans, and is M otherwise.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[in]	B	B is a LDB-by-kb matrix, where kb is N when transB = PlasmaNoTrans, and is K otherwise.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).
[in]	beta	beta specifies the scalar beta
[in,out]	C	C is a LDC-by-N matrix. On exit, the array is overwritten by the M by N matrix ( alphaop( A )op( B ) + beta*C )
[in]	LDC	The leading dimension of the array C. LDC >= max(1,M).

Returns:

Return values:

PLASMA_SUCCESS successful exit

See also:: PLASMA_cgemm_Tile; PLASMA_cgemm; PLASMA_dgemm; PLASMA_sgemm

Definition at line 96 of file cgemm.c.

References max, PLASMA_cgemm_Tile_Async(), plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CGEMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaConjTrans, PlasmaNoTrans, PlasmaTrans, and plasma_sequence_t::status.

{
    int NB;
    int Am, An, Bm, Bn;
    int status;
    PLASMA_desc descA, descB, descC;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cgemm", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ((transA != PlasmaNoTrans) && (transA != PlasmaTrans) && (transA != PlasmaConjTrans)) {
        plasma_error("PLASMA_cgemm", "illegal value of transA");
        return -1;
    }
    if ((transB != PlasmaNoTrans) && (transB != PlasmaTrans) && (transB != PlasmaConjTrans)) {
        plasma_error("PLASMA_cgemm", "illegal value of transB");
        return -2;
    }
    if ( transA == PlasmaNoTrans ) { 
        Am = M; An = K;
    } else {
        Am = K; An = M;
    }
    if ( transB == PlasmaNoTrans ) { 
        Bm = K; Bn = N;
    } else {
        Bm = N; Bn = K;
    }
    if (M < 0) {
        plasma_error("PLASMA_cgemm", "illegal value of M");
        return -3;
    }
    if (N < 0) {
        plasma_error("PLASMA_cgemm", "illegal value of N");
        return -4;
    }
    if (K < 0) {
        plasma_error("PLASMA_cgemm", "illegal value of N");
        return -5;
    }
    if (LDA < max(1, Am)) {
        plasma_error("PLASMA_cgemm", "illegal value of LDA");
        return -8;
    }
    if (LDB < max(1, Bm)) {
        plasma_error("PLASMA_cgemm", "illegal value of LDB");
        return -10;
    }
    if (LDC < max(1, M)) {
        plasma_error("PLASMA_cgemm", "illegal value of LDC");
        return -13;
    }
    /* Quick return */
    if (M == 0 || N == 0 ||
        ((alpha == (PLASMA_Complex32_t)0.0 || K == 0) && beta == (PLASMA_Complex32_t)1.0))
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_CGEMM, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cgemm", "plasma_tune() failed");
        return status;
    }
    /* Set MT & NT & KT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)) );
        plasma_cooplap2tile( descB, B, NB, NB, LDB, Bn, 0, 0, Bm, Bn, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
        plasma_cooplap2tile( descC, C, NB, NB, LDC, N,  0, 0, M,  N,  plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descC)));
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An );
        plasma_ciplap2tile( descB, B, NB, NB, LDB, Bn, 0, 0, Bm, Bn );
        plasma_ciplap2tile( descC, C, NB, NB, LDC, N,  0, 0, M,  N  );
    }
    /* Call the tile interface */
    PLASMA_cgemm_Tile_Async(
        transA, transB, alpha, &descA, &descB, beta, &descC, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
        plasma_desc_mat_free(&descC);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, An );
        plasma_ciptile2lap( descB, B, NB, NB, LDB, Bn );
        plasma_ciptile2lap( descC, C, NB, NB, LDC, N  );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_cgeqrf	(	int	M,
		int	N,
		PLASMA_Complex32_t *	A,
		int	LDA,
		PLASMA_Complex32_t *	T
	)

PLASMA_cgeqrf - Computes the tile QR factorization of a complex M-by-N matrix A: A = Q * R.

Parameters:

[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in,out]	A	On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if M >= N); the elements below the diagonal represent the unitary matrix Q as a product of elementary reflectors stored by tiles.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[out]	T	On exit, auxiliary factorization data, required by PLASMA_cgeqrs to solve the system of equations.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_cgeqrf_Tile; PLASMA_cgeqrf_Tile_Async; PLASMA_cgeqrf; PLASMA_dgeqrf; PLASMA_sgeqrf; PLASMA_cgeqrs

Definition at line 61 of file cgeqrf.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, PLASMA_cgeqrf_Tile_Async(), plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_CGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaComplexFloat, plasma_sequence_t::status, and T.

{
    int NB, IB, IBNB, MT, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cgeqrf", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (M < 0) {
        plasma_error("PLASMA_cgeqrf", "illegal value of M");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_cgeqrf", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_cgeqrf", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (min(M, N) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_CGELS, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cgeqrf", "plasma_tune() failed");
        return status;
    }
    /* Set MT & NT */
    NB   = PLASMA_NB;
    IB   = PLASMA_IB;
    IBNB = IB*NB;
    MT   = (M%NB==0) ? (M/NB) : (M/NB+1);
    NT   = (N%NB==0) ? (N/NB) : (N/NB+1);
    plasma_sequence_create(plasma, &sequence);
 
     if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
        descT = plasma_desc_init(
            PlasmaComplexFloat,
            IB, NB, IBNB,
            MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
    }
    else {
        /* Double the size of T to accomodate the tree reduction phase */
        descT = plasma_desc_init(
            PlasmaComplexFloat,
            IB, NB, IBNB,
            MT*IB, 2*NT*NB, 0, 0, MT*IB, 2*NT*NB);
    }
    descT.mat = T;
    
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N);
    }
    /* Call the tile interface */
    PLASMA_cgeqrf_Tile_Async(&descA, &descT, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_cgeqrs	(	int	M,
		int	N,
		int	NRHS,
		PLASMA_Complex32_t *	A,
		int	LDA,
		PLASMA_Complex32_t *	T,
		PLASMA_Complex32_t *	B,
		int	LDB
	)

PLASMA_cgeqrs - Compute a minimum-norm solution min || A*X - B || using the RQ factorization A = R*Q computed by PLASMA_cgeqrf.

Parameters:

[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= M >= 0.
[in]	NRHS	The number of columns of B. NRHS >= 0.
[in,out]	A	Details of the QR factorization of the original matrix A as returned by PLASMA_cgeqrf.
[in]	LDA	The leading dimension of the array A. LDA >= M.
[in]	T	Auxiliary factorization data, computed by PLASMA_cgeqrf.
[in,out]	B	On entry, the m-by-nrhs right hand side matrix B. On exit, the n-by-nrhs solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_cgeqrs_Tile; PLASMA_cgeqrs_Tile_Async; PLASMA_cgeqrs; PLASMA_dgeqrs; PLASMA_sgeqrs; PLASMA_cgeqrf

Definition at line 67 of file cgeqrs.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, PLASMA_cgeqrs_Tile_Async(), plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_CGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaComplexFloat, plasma_sequence_t::status, and T.

{
    int NB, IB, IBNB, MT, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB, descT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cgeqrs", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (M < 0) {
        plasma_error("PLASMA_cgeqrs", "illegal value of M");
        return -1;
    }
    if (N < 0 || N > M) {
        plasma_error("PLASMA_cgeqrs", "illegal value of N");
        return -2;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_cgeqrs", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_cgeqrs", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, max(1, M))) {
        plasma_error("PLASMA_cgeqrs", "illegal value of LDB");
        return -8;
    }
    /* Quick return */
    if (min(M, min(N, NRHS)) == 0) {
        return PLASMA_SUCCESS;
    }
    /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_CGELS, M, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cgeqrs", "plasma_tune() failed");
        return status;
    }
    /* Set MT, NT & NTRHS */
    NB   = PLASMA_NB;
    IB   = PLASMA_IB;
    IBNB = IB*NB;
    MT   = (M%NB==0) ? (M/NB) : (M/NB+1);
    NT   = (N%NB==0) ? (N/NB) : (N/NB+1);
    plasma_sequence_create(plasma, &sequence);
    if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
        descT = plasma_desc_init(
            PlasmaComplexFloat,
            IB, NB, IBNB,
            MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
    }
    else {
        /* Double the size of T to accomodate the tree reduction phase */
        descT = plasma_desc_init(
            PlasmaComplexFloat,
            IB, NB, IBNB,
            MT*IB, 2*NT*NB, 0, 0, MT*IB, 2*NT*NB);
    }
    descT.mat = T;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, M, N   , plasma_desc_mat_free(&(descA)) );
        plasma_cooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, M, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, N,    0, 0, M, N   );
        plasma_ciplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, M, NRHS);
    }
    /* Call the tile interface */
    PLASMA_cgeqrs_Tile_Async(&descA, &descT, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_cooptile2lap( descB, B, NB, NB, LDB, NRHS );
    plasma_dynamic_sync();
    plasma_desc_mat_free(&descA);
    plasma_desc_mat_free(&descB);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_ciptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_cgesv	(	int	N,
		int	NRHS,
		PLASMA_Complex32_t *	A,
		int	LDA,
		int *	IPIV,
		PLASMA_Complex32_t *	B,
		int	LDB
	)

PLASMA_cgesv - Computes the solution to a system of linear equations A * X = B, where A is an N-by-N matrix and X and B are N-by-NRHS matrices. The tile LU decomposition with partial tile pivoting and row interchanges is used to factor A. The factored form of A is then used to solve the system of equations A * X = B.

Parameters:

[in]	N	The number of linear equations, i.e., the order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in,out]	A	On entry, the N-by-N coefficient matrix A. On exit, the tile L and U factors from the factorization.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[out]	IPIV	On exit, the pivot indices that define the permutations.
[in,out]	B	On entry, the N-by-NRHS matrix of right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, so the solution could not be computed.

See also:: PLASMA_cgesv_Tile; PLASMA_cgesv_Tile_Async; PLASMA_cgesv; PLASMA_dgesv; PLASMA_sgesv

Definition at line 70 of file cgesv.c.

References max, min, PLASMA_cgesv_Tile_Async(), plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), PLASMA_FUNC_CGESV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_error("PLASMA_cgesv", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (N < 0) {
        plasma_error("PLASMA_cgesv", "illegal value of N");
        return -1;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_cgesv", "illegal value of NRHS");
        return -2;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_cgesv", "illegal value of LDA");
        return -4;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_cgesv", "illegal value of LDB");
        return -8;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CGESV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cgesv", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        plasma_cooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        plasma_ciplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
    }
    /* Call the tile interface */
    PLASMA_cgesv_Tile_Async(&descA, IPIV, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_cooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_ciptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_cgesv_incpiv	(	int	N,
		int	NRHS,
		PLASMA_Complex32_t *	A,
		int	LDA,
		PLASMA_Complex32_t *	L,
		int *	IPIV,
		PLASMA_Complex32_t *	B,
		int	LDB
	)

PLASMA_cgesv_incpiv - Computes the solution to a system of linear equations A * X = B, where A is an N-by-N matrix and X and B are N-by-NRHS matrices. The tile LU decomposition with partial tile pivoting and row interchanges is used to factor A. The factored form of A is then used to solve the system of equations A * X = B.

Parameters:

[in]	N	The number of linear equations, i.e., the order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in,out]	A	On entry, the N-by-N coefficient matrix A. On exit, the tile L and U factors from the factorization (not equivalent to LAPACK).
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[out]	L	On exit, auxiliary factorization data, related to the tile L factor, necessary to solve the system of equations.
[out]	IPIV	On exit, the pivot indices that define the permutations (not equivalent to LAPACK).
[in,out]	B	On entry, the N-by-NRHS matrix of right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, so the solution could not be computed.

See also:: PLASMA_cgesv_incpiv_Tile; PLASMA_cgesv_incpiv_Tile_Async; PLASMA_cgesv_incpiv; PLASMA_dgesv_incpiv; PLASMA_sgesv_incpiv

Definition at line 73 of file cgesv_incpiv.c.

References L, plasma_desc_t::mat, max, min, PLASMA_cgesv_incpiv_Tile_Async(), plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), PLASMA_FUNC_CGESV, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaComplexFloat, and plasma_sequence_t::status.

{
    int NB, IB, IBNB, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB, descL;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_error("PLASMA_cgesv_incpiv", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (N < 0) {
        plasma_error("PLASMA_cgesv_incpiv", "illegal value of N");
        return -1;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_cgesv_incpiv", "illegal value of NRHS");
        return -2;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_cgesv_incpiv", "illegal value of LDA");
        return -4;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_cgesv_incpiv", "illegal value of LDB");
        return -8;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CGESV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cgesv_incpiv", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB    = PLASMA_NB;
    IB    = PLASMA_IB;
    IBNB  = IB*NB;
    NT    = (N%NB==0) ? (N/NB) : (N/NB+1);
    plasma_sequence_create(plasma, &sequence);
    descL = plasma_desc_init(
        PlasmaComplexFloat,
        IB, NB, IBNB,
        NT*IB, NT*NB, 0, 0, NT*IB, NT*NB);
    descL.mat = L;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        plasma_cooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        plasma_ciplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
    }
    /* Call the tile interface */
    PLASMA_cgesv_incpiv_Tile_Async(&descA, &descL, IPIV, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_cooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_ciptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_cgesvd	(	PLASMA_enum	jobu,
		PLASMA_enum	jobvt,
		int	M,
		int	N,
		PLASMA_Complex32_t *	A,
		int	LDA,
		float *	S,
		PLASMA_Complex32_t *	U,
		int	LDU,
		PLASMA_Complex32_t *	VT,
		int	LDVT,
		PLASMA_desc *	descT
	)

PLASMA_cgesvd - computes the singular value decomposition (SVD) of a complex M-by-N matrix A, optionally computing the left and/or right singular vectors. The SVD is written

 A = U * SIGMA * transpose(V)

where SIGMA is an M-by-N matrix which is zero except for its min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA are the singular values of A; they are real and non-negative, and are returned in descending order. The first min(m,n) columns of U and V are the left and right singular vectors of A.

Note that the routine returns V**T, not V. Not LAPACK Compliant for now! Note: Only PlasmaNoVec supported!

Parameters:

[in]	jobu	Specifies options for computing all or part of the matrix U. Intended usage: = PlasmaVec: all M columns of U are returned in array U; = PlasmaNoVec: no columns of U (no left singular vectors) are computed. Note: Only PlasmaNoVec supported!
[in]	jobvt	Specifies options for computing all or part of the matrix V**H. Intended usage: = PlasmaVec: all M columns of U are returned in array U; = PlasmaNoVec: no columns of U (no left singular vectors) are computed. Note: Only PlasmaNoVec supported!
[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in,out]	A	On entry, the M-by-N matrix A. On exit, if JOBU = 'O', A is overwritten with the first min(m,n) columns of U (the left singular vectors, stored columnwise); if JOBVT = 'O', A is overwritten with the first min(m,n) rows of V**H (the right singular vectors, stored rowwise); if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A are destroyed.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[out]	S	The real singular values of A, sorted so that S(i) >= S(i+1).
[out]	U	(LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. If JOBU = 'A', U contains the M-by-M unitary matrix U; if JOBU = 'S', U contains the first min(m,n) columns of U (the left singular vectors, stored columnwise); if JOBU = 'N' or 'O', U is not referenced.
[in]	LDU	The leading dimension of the array U. LDU >= 1; if JOBU = 'S' or 'A', LDU >= M.
[out]	VT	If JOBVT = 'A', VT contains the N-by-N unitary matrix VH; if JOBVT = 'S', VT contains the first min(m,n) rows of VH (the right singular vectors, stored rowwise); if JOBVT = 'N' or 'O', VT is not referenced.
[in]	LDVT	The leading dimension of the array VT. LDVT >= 1; if JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
[in,out]	descT	On entry, descriptor as return by PLASMA_Alloc_Workspace_cgesvd On exit, contains auxiliary factorization data.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_cgesvd_Tile; PLASMA_cgesvd_Tile_Async; PLASMA_cgesvd; PLASMA_dgesvd; PLASMA_sgesvd

Definition at line 123 of file cgesvd.c.

References plasma_desc_t::m, max, min, plasma_desc_t::n, PLASMA_cgesvd_Tile_Async(), plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, plasma_desc_check(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CGESVD, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaNoVec, PlasmaVec, and plasma_sequence_t::status.

{
    int NB, IB, MT, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descU, descVT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cgesvd", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    
    /* Tune NB & IB depending on M & N; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CGESVD, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cgesvd", "plasma_tune() failed");
        return status;
    }
    /* Set MT, NT */
    NB = PLASMA_NB;
    IB = PLASMA_IB;
    MT = (M%NB==0) ? (M/NB) : (M/NB+1);
    NT = (N%NB==0) ? (N/NB) : (N/NB+1);
    /* Check input arguments */
    if (jobu != PlasmaNoVec  && jobu !=PlasmaVec) {
        plasma_error("PLASMA_cgesvd", "illegal value of jobu");
        return -1;
    }
    if (jobvt != PlasmaNoVec && jobvt != PlasmaVec) {
        plasma_error("PLASMA_cgesvd", "illegal value of jobvt");
        return -2;
    }
    if (M < 0) {
        plasma_error("PLASMA_cgesvd", "illegal value of M");
        return -3;
    }
    if (N < 0) {
        plasma_error("PLASMA_cgesvd", "illegal value of N");
        return -4;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_cgesvd", "illegal value of LDA");
        return -6;
    }
    if (LDU < 1) {
        plasma_error("PLASMA_cgesvd", "illegal value of LDU");
        return -9;
    }
    if (LDVT < 1) {
        plasma_error("PLASMA_cgesvd", "illegal value of LDVT");
        return -11;
    }
    if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) || 
         ( descT->m != MT*IB ) || (descT->n != NT*NB) ) {
        plasma_error("PLASMA_cgesvd", "invalid T descriptor");
        return -12;
    }
    /* Quick return */
    if (min(M, N) == 0) {
        return PLASMA_SUCCESS;
    }
    if (jobu == PlasmaVec) {
        plasma_error("PLASMA_cgesvd", "computing the singular vectors is not supported in this version");
        return -1;
    }
    if (jobvt == PlasmaVec) {
        plasma_error("PLASMA_cgesvd", "computing the singular vectors is not supported in this version");
        return -2;
    }
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA,   A, NB, NB,  LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
        if (jobu == PlasmaVec){
            plasma_cooplap2tile( descU,   U, NB, NB,  LDU, M, 0, 0, M, M, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descU)));
        }
        if (jobvt == PlasmaVec){
            plasma_cooplap2tile( descVT, VT, NB, NB, LDVT, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descU)); plasma_desc_mat_free(&(descVT)));
        }
    } else {
        plasma_ciplap2tile( descA,   A, NB, NB,  LDA, N, 0, 0, M, N);
        if (jobu == PlasmaVec){
            plasma_ciplap2tile( descU,   U, NB, NB,  LDU, M, 0, 0, M, M);
        }
        if (jobvt == PlasmaVec){
            plasma_ciplap2tile( descVT, VT, NB, NB, LDVT, N, 0, 0, N, N);
        }
    }
    /* Call the tile interface */
    PLASMA_cgesvd_Tile_Async(jobu, jobvt, &descA, S, &descU, &descVT, descT, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descA,   A, NB, NB,  LDA, N );
        if (jobu == PlasmaVec){
            plasma_cooptile2lap( descU,   U, NB, NB,  LDU, M );
        }
        if (jobvt == PlasmaVec){
            plasma_cooptile2lap( descVT, VT, NB, NB, LDVT, N );
        }
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        if (jobu == PlasmaVec){
            plasma_desc_mat_free(&descU);
        }
        if (jobvt == PlasmaVec){
            plasma_desc_mat_free(&descVT);
        }
    } else {
        plasma_ciptile2lap( descA,   A, NB, NB,  LDA, N );
        if (jobu == PlasmaVec){
            plasma_ciptile2lap( descU,   U, NB, NB,  LDU, M );
        }
        if (jobvt == PlasmaVec){
            plasma_ciptile2lap( descVT, VT, NB, NB, LDVT, N );
        }
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_cgetrf	(	int	M,
		int	N,
		PLASMA_Complex32_t *	A,
		int	LDA,
		int *	IPIV
	)

PLASMA_cgetrf - Computes an LU factorization of a general M-by-N matrix A using the tile LU algorithm with partial tile pivoting with row interchanges.

Parameters:

[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in,out]	A	On entry, the M-by-N matrix to be factored. On exit, the tile factors L and U from the factorization.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[out]	IPIV	The pivot indices that define the permutations.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.

See also:: PLASMA_cgetrf_Tile; PLASMA_cgetrf_Tile_Async; PLASMA_cgetrf; PLASMA_dgetrf; PLASMA_sgetrf

Definition at line 62 of file cgetrf.c.

References A, plasma_desc_t::mat, max, plasma_desc_t::mb, min, plasma_desc_t::mt, plasma_context_self(), plasma_desc_init(), plasma_dynamic_call_4, plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CGESV, PLASMA_NB, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, plasma_tune(), PlasmaComplexFloat, and plasma_sequence_t::status.

{
    int NB, NBNB, minMN;
    int status;
    PLASMA_desc descA ;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cgetrf", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (M < 0) {
        plasma_error("PLASMA_cgetrf", "illegal value of M");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_cgetrf", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_cgetrf", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (min(M, N) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_CGESV, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cgetrf", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB   = PLASMA_NB;
    NBNB = NB*NB;
    plasma_sequence_create(plasma, &sequence);
    descA = plasma_desc_init(
        PlasmaComplexFloat,
        NB, NB, NBNB,
        LDA, N, 0, 0, M, N);
    descA.mat = A;
    minMN = min(M, N);
    memset(IPIV, 0, minMN*sizeof(int));
    /* Call the tile interface */
    plasma_dynamic_call_4(plasma_pcgetrf_reclap,
        PLASMA_desc, descA,
        int*, IPIV,
        PLASMA_sequence*, sequence,
        PLASMA_request*, &request);
    plasma_dynamic_sync();
    /*
     * Generate the correct IPIV (Has to be moved in a task)
     */
    { 
        int i, inc, tmp, j;
        for(i=1; i<descA.mt; i++) {
            inc = i*descA.mb;
            tmp = min( minMN - inc, descA.mb);
            if ( tmp < 1 )
              break;
            
            for (j=0; j<tmp; j++)
                IPIV[inc+j] = IPIV[inc+j] + inc;
        }
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_cgetrf_incpiv	(	int	M,
		int	N,
		PLASMA_Complex32_t *	A,
		int	LDA,
		PLASMA_Complex32_t *	L,
		int *	IPIV
	)

PLASMA_cgetrf_incpiv - Computes an LU factorization of a general M-by-N matrix A using the tile LU algorithm with partial tile pivoting with row interchanges.

Parameters:

[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in,out]	A	On entry, the M-by-N matrix to be factored. On exit, the tile factors L and U from the factorization.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[out]	L	On exit, auxiliary factorization data, related to the tile L factor, required by PLASMA_cgetrs_incpiv to solve the system of equations.
[out]	IPIV	The pivot indices that define the permutations (not equivalent to LAPACK).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.

See also:: PLASMA_cgetrf_incpiv_Tile; PLASMA_cgetrf_incpiv_Tile_Async; PLASMA_cgetrf_incpiv; PLASMA_dgetrf_incpiv; PLASMA_sgetrf_incpiv; PLASMA_cgetrs_incpiv

Definition at line 65 of file cgetrf_incpiv.c.

References L, plasma_desc_t::mat, max, min, PLASMA_cgetrf_incpiv_Tile_Async(), plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CGESV, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaComplexFloat, and plasma_sequence_t::status.

{
    int NB, IB, IBNB, MT, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descL;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cgetrf_incpiv", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (M < 0) {
        plasma_error("PLASMA_cgetrf_incpiv", "illegal value of M");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_cgetrf_incpiv", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_cgetrf_incpiv", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (min(M, N) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_CGESV, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cgetrf_incpiv", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB   = PLASMA_NB;
    IB   = PLASMA_IB;
    IBNB = IB*NB;
    MT   = (M%NB==0) ? (M/NB) : (M/NB+1);
    NT   = (N%NB==0) ? (N/NB) : (N/NB+1);
    plasma_sequence_create(plasma, &sequence);
    descL = plasma_desc_init(
        PlasmaComplexFloat,
        IB, NB, IBNB,
        MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
    descL.mat = L;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N);
    }
    /* Call the tile interface */
    PLASMA_cgetrf_incpiv_Tile_Async(&descA, &descL, IPIV, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_cgetri	(	int	N,
		PLASMA_Complex32_t *	A,
		int	LDA,
		int *	IPIV
	)

PLASMA_cgetri - Computes the inverse of a matrix using the LU factorization computed by PLASMA_cgetrf. This method inverts U and then computes inv(A) by solving the system inv(A)*L = inv(U) for inv(A).

Parameters:

[in]	N	The order of the matrix A. N >= 0.
[in,out]	A	On entry, the triangular factor L or U from the factorization A = PLU as computed by PLASMA_cgetrf. On exit, if return value = 0, the inverse of the original matrix A.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in]	IPIV	The pivot indices that define the permutations as returned by PLASMA_cgetrf.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, the (i,i) element of the factor U is exactly zero; The matrix is singular and its inverse could not be computed.

See also:: PLASMA_cgetri_Tile; PLASMA_cgetri_Tile_Async; PLASMA_cgetri; PLASMA_dgetri; PLASMA_sgetri; PLASMA_cgetrf

Definition at line 63 of file cgetri.c.

References max, PLASMA_Alloc_Workspace_cgetri_Tile_Async(), PLASMA_cgetri_Tile_Async(), plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CGESV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    PLASMA_desc descW;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cgetri", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (N < 0) {
        plasma_error("PLASMA_cgetri", "illegal value of N");
        return -1;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_cgetri", "illegal value of LDA");
        return -3;
    }
    /* Quick return */
    if (max(N, 0) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CGESV, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cgetri", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB   = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_ciplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, N, N);
    }
    /* Allocate workspace */
    PLASMA_Alloc_Workspace_cgetri_Tile_Async(&descA, &descW);
    /* Call the tile interface */
    PLASMA_cgetri_Tile_Async(&descA, IPIV, &descW, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    plasma_desc_mat_free(&(descW));
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_cgetrs	(	PLASMA_enum	trans,
		int	N,
		int	NRHS,
		PLASMA_Complex32_t *	A,
		int	LDA,
		int *	IPIV,
		PLASMA_Complex32_t *	B,
		int	LDB
	)

PLASMA_cgetrs - Solves a system of linear equations A * X = B, with a general N-by-N matrix A using the tile LU factorization computed by PLASMA_cgetrf.

Parameters:

[in]	trans	Intended to specify the the form of the system of equations: = PlasmaNoTrans: A * X = B (No transpose) = PlasmaTrans: A*T X = B (Transpose) = PlasmaConjTrans: A*H X = B (Conjugate transpose)
[in]	N	The order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]	A	The tile factors L and U from the factorization, computed by PLASMA_cgetrf.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in]	IPIV	The pivot indices from PLASMA_cgetrf.
[in,out]	B	On entry, the N-by-NRHS matrix of right hand side matrix B. On exit, the solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS successful exit

Returns:: <0 if -i, the i-th argument had an illegal value

See also:: PLASMA_cgetrs_Tile; PLASMA_cgetrs_Tile_Async; PLASMA_cgetrs; PLASMA_dgetrs; PLASMA_sgetrs; PLASMA_cgetrf

Definition at line 72 of file cgetrs.c.

References max, min, PLASMA_cgetrs_Tile_Async(), plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CGESV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaConjTrans, PlasmaNoTrans, PlasmaTrans, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cgetrs", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ( (trans != PlasmaNoTrans) && 
         (trans != PlasmaTrans)   &&
         (trans != PlasmaConjTrans)) {
        plasma_error("PLASMA_cgetrs", "illegal value of trans");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_cgetrs", "illegal value of N");
        return -2;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_cgetrs", "illegal value of NRHS");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_cgetrs", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_cgetrs", "illegal value of LDB");
        return -8;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_CGESV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cgetrs", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        plasma_cooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        plasma_ciplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
    }
    /* Call the tile interface */
    PLASMA_cgetrs_Tile_Async(trans, &descA, IPIV, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_ciptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

int PLASMA_cgetrs_incpiv	(	PLASMA_enum	trans,
		int	N,
		int	NRHS,
		PLASMA_Complex32_t *	A,
		int	LDA,
		PLASMA_Complex32_t *	L,
		int *	IPIV,
		PLASMA_Complex32_t *	B,
		int	LDB
	)

PLASMA_cgetrs_incpiv - Solves a system of linear equations A * X = B, with a general N-by-N matrix A using the tile LU factorization computed by PLASMA_cgetrf_incpiv.

Parameters:

[in]	trans	Intended to specify the the form of the system of equations: = PlasmaNoTrans: A * X = B (No transpose) = PlasmaTrans: A*T X = B (Transpose) = PlasmaConjTrans: A*H X = B (Conjugate transpose) Currently only PlasmaNoTrans is supported.
[in]	N	The order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]	A	The tile factors L and U from the factorization, computed by PLASMA_cgetrf_incpiv.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in]	L	Auxiliary factorization data, related to the tile L factor, computed by PLASMA_cgetrf_incpiv.
[in]	IPIV	The pivot indices from PLASMA_cgetrf_incpiv (not equivalent to LAPACK).
[in,out]	B	On entry, the N-by-NRHS matrix of right hand side matrix B. On exit, the solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS successful exit

Returns:: <0 if -i, the i-th argument had an illegal value

See also:: PLASMA_cgetrs_incpiv_Tile; PLASMA_cgetrs_incpiv_Tile_Async; PLASMA_cgetrs_incpiv; PLASMA_dgetrs_incpiv; PLASMA_sgetrs_incpiv; PLASMA_cgetrf_incpiv

Definition at line 75 of file cgetrs_incpiv.c.

References L, plasma_desc_t::mat, max, min, PLASMA_cgetrs_incpiv_Tile_Async(), plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, PLASMA_ERR_NOT_SUPPORTED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CGESV, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaComplexFloat, PlasmaNoTrans, and plasma_sequence_t::status.

{
    int NB, IB, IBNB, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB, descL;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cgetrs_incpiv", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (trans != PlasmaNoTrans) {
        plasma_error("PLASMA_cgetrs_incpiv", "only PlasmaNoTrans supported");
        return PLASMA_ERR_NOT_SUPPORTED;
    }
    if (N < 0) {
        plasma_error("PLASMA_cgetrs_incpiv", "illegal value of N");
        return -2;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_cgetrs_incpiv", "illegal value of NRHS");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_cgetrs_incpiv", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_cgetrs_incpiv", "illegal value of LDB");
        return -9;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_CGESV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cgetrs_incpiv", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB    = PLASMA_NB;
    IB    = PLASMA_IB;
    IBNB  = IB*NB;
    NT    = (N%NB==0) ? (N/NB) : (N/NB+1);
    plasma_sequence_create(plasma, &sequence);
    descL = plasma_desc_init(
        PlasmaComplexFloat,
        IB, NB, IBNB,
        NT*IB, NT*NB, 0, 0, NT*IB, NT*NB);
    descL.mat = L;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        plasma_cooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        plasma_ciplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
    }
    /* Call the tile interface */
    PLASMA_cgetrs_incpiv_Tile_Async(&descA, &descL, IPIV, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_ciptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_cheev	(	PLASMA_enum	jobz,
		PLASMA_enum	uplo,
		int	N,
		PLASMA_Complex32_t *	A,
		int	LDA,
		float *	W,
		PLASMA_desc *	descT,
		PLASMA_Complex32_t *	Q,
		int	LDQ
	)

PLASMA_cheev - Computes all eigenvalues and, optionally, eigenvectors of a complex Hermitian matrix A. The matrix A is preliminary reduced to tridiagonal form using a two-stage approach: First stage: reduction to band tridiagonal form; Second stage: reduction from band to tridiagonal form. Note: Only PlasmaNoVec supported!

Parameters:

[in]	jobz	Intended usage: = PlasmaNoVec: computes eigenvalues only; = PlasmaVec: computes eigenvalues and eigenvectors. Note: Only PlasmaNoVec supported!
[in]	uplo	Specifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The order of the matrix A. N >= 0.
[in,out]	A	On entry, the symmetric (or Hermitian) matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, the lower triangle (if uplo = PlasmaLower) or the upper triangle (if uplo = PlasmaUpper) of A, including the diagonal, is destroyed.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[out]	W	On exit, if info = 0, the eigenvalues.
[in,out]	descT	On entry, descriptor as return by PLASMA_Alloc_Workspace_cheev On exit, contains auxiliary factorization data.
[out]	Q	On exit, if jobz = PlasmaVec and info = 0, the eigenvectors.
[in]	LDQ	The leading dimension of the array Q. LDQ >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if INFO = i, the algorithm failed to converge; i off-diagonal elements of an intermediate tridiagonal form did not converge to zero.

See also:: PLASMA_cheev_Tile; PLASMA_cheev_Tile_Async; PLASMA_cheev; PLASMA_dsyev; PLASMA_ssyev

Definition at line 96 of file cheev.c.

References plasma_desc_t::m, plasma_desc_t::mat, max, plasma_desc_t::n, plasma_cdesc_alloc, PLASMA_cheev_Tile_Async(), plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, plasma_desc_check(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), PLASMA_FUNC_CHEEV, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaComplexFloat, PlasmaLower, PlasmaNoVec, PlasmaUpper, PlasmaVec, Q, and plasma_sequence_t::status.

{
    int NB, IB, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descQ;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_error("PLASMA_cheev", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Tune NB & IB depending on N; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CHEEV, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cheev", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB = PLASMA_NB;
    IB = PLASMA_IB;
    NT = (N%NB==0) ? (N/NB) : (N/NB+1);
    /* Check input arguments */
    if (jobz != PlasmaNoVec && jobz != PlasmaVec) {
        plasma_error("PLASMA_cheev", "illegal value of jobz");
        return -1;
    }
    if (uplo != PlasmaLower && uplo != PlasmaUpper) {
        plasma_error("PLASMA_cheev", "illegal value of uplo");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_cheev", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_cheev", "illegal value of LDA");
        return -5;
    }
    if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) || 
         ( descT->m != NT*IB ) || (descT->n != NT*NB) ) {
        plasma_error("PLASMA_cheev", "invalid T descriptor");
        return -7;
    }
    if (LDQ < max(1, N)) {
        plasma_error("PLASMA_cheev", "illegal value of LDQ");
        return -9;
    }
    /* Quick return */
    if (N == 0)
        return PLASMA_SUCCESS;
    if (jobz == PlasmaVec) {
        plasma_error("PLASMA_cheev", "computing the eigenvectors is not supported in this version");
        return -1;
    }
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
        if (jobz == PlasmaVec) {
            /* No need for conversion, it's just output */
            plasma_cdesc_alloc( descQ, NB, NB, LDQ, N, 0, 0, N, N, 
                                plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descQ)) );
        }
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N );
        if (jobz == PlasmaVec) {
            /* No need for conversion, it's just output */
            descQ = plasma_desc_init(
                PlasmaComplexFloat, NB, NB, NB*NB,
                LDQ, N, 0, 0, N, N);
            descQ.mat = Q;
        }
    }
    /* Call the tile interface */
    PLASMA_cheev_Tile_Async(jobz, uplo, &descA, W, descT, &descQ, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descA, A, NB, NB, LDA, N );
        if (jobz == PlasmaVec) {
           plasma_cooptile2lap( descQ, Q, NB, NB, LDQ, N );
        }
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        if (jobz == PlasmaVec)
           plasma_desc_mat_free(&descQ);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, N );
        if (jobz == PlasmaVec)
           plasma_ciptile2lap( descQ, Q, NB, NB, LDQ, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_chegst	(	PLASMA_enum	itype,
		PLASMA_enum	uplo,
		int	N,
		PLASMA_Complex32_t *	A,
		int	LDA,
		PLASMA_Complex32_t *	B,
		int	LDB
	)

PLASMA_chegst - reduces a complex Hermitian-definite generalized eigenproblem to standard form. If PlasmaItype == 1, the problem is A*x = lambda*B*x, and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) If PlasmaItype == 2 or 3, the problem is A*B*x = lambda*x or B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L. B must have been previously factorized as U**H*U or L*L**H by PLASMA_CPOTRF.

Parameters:

[in]	PlasmaItype	Intended usage: = 1: Ax=(lambda)Bx = 2: ABx=(lambda)x = 3: BAx=(lambda)x
[in]	uplo	Specifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The order of the matrices A and B. N >= 0.
[in,out]	A	On entry, the symmetric (or Hermitian) matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if return value == 0, the transformed matrix, stored in the same format as A.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in,out]	B	On entry, the triangular factor from the Cholesky factorization of B, as returned by PLASMA_CPOTRF.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_chegst_Tile; PLASMA_chegst_Tile_Async; PLASMA_chegst; PLASMA_dsygst; PLASMA_ssygst

Definition at line 85 of file chegst.c.

References max, PLASMA_chegst_Tile_Async(), plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CHEGST, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLower, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_chegst", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (itype != 1 && itype != 2 && itype != 3) {
        plasma_error("PLASMA_chegst", "Illegal value of itype");
        return -1;
    }
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_chegst", "Illegal value of uplo");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_chegst", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_chegst", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_chegst", "illegal value of LDB");
        return -7;
    }
    /* Quick return */
    if (N == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_CHEGST, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_chegst", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
        plasma_cooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, N, N, plasma_desc_mat_free(&(descB)) );
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N);
        plasma_ciplap2tile( descB, B, NB, NB, LDB, N, 0, 0, N, N);
    }
    /* Call the tile interface */
    PLASMA_chegst_Tile_Async(itype, uplo, &descA, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_cooptile2lap( descB, B, NB, NB, LDB, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, N );
        plasma_ciptile2lap( descB, B, NB, NB, LDB, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_chegv	(	PLASMA_enum	itype,
		PLASMA_enum	jobz,
		PLASMA_enum	uplo,
		int	N,
		PLASMA_Complex32_t *	A,
		int	LDA,
		PLASMA_Complex32_t *	B,
		int	LDB,
		float *	W,
		PLASMA_desc *	descT,
		PLASMA_Complex32_t *	Q,
		int	LDQ
	)

PLASMA_chegv - Computes all eigenvalues and, optionally, eigenvectors of a complex generalized Hermitian-definite eigenproblem of the form: A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and B are assumed to be Hermitian and B is also positive definite. Note: Only PlasmaNoVec supported!

Parameters:

[in]	PlasmaItype	Intended usage: = 1: Ax=(lambda)Bx = 2: ABx=(lambda)x = 3: BAx=(lambda)x
[in]	jobz	Intended usage: = PlasmaNoVec: computes eigenvalues only; = PlasmaVec: computes eigenvalues and eigenvectors. Note: Only PlasmaNoVec supported!
[in]	uplo	Specifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A and B are stored; = PlasmaLower: Lower triangle of A and B are stored.
[in]	N	The order of the matrix A. N >= 0.
[in,out]	A	On entry, the symmetric (or Hermitian) matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if jobz = PlasmaVec, then if return value = 0, A contains the matrix Z of eigenvectors. The eigenvectors are normalized as follows: if ITYPE = 1 or 2, Z*HBZ = I; if ITYPE = 3, ZHinv(B)*Z = I. If jobz = PlasmaNoVec, then on exit the lower triangle (if uplo = PlasmaLower) or the upper triangle (if uplo = PlasmaUpper) of A, including the diagonal, is destroyed.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in,out]	B	On entry, the symmetric (or Hermitian) positive definite matrix B. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of B contains the upper triangular part of the matrix B, and the strictly lower triangular part of B is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of B contains the lower triangular part of the matrix B, and the strictly upper triangular part of B is not referenced. On exit, if return value <= N, the part of B containing the matrix is overwritten by the triangular factor U or L from the Cholesky factorization B = U*HU or B = LL*H.
[in]	LDB	The leading dimension of the array B. LDA >= max(1,N).
[out]	W	On exit, if info = 0, the eigenvalues.
[in,out]	descT	On entry, descriptor as return by PLASMA_Alloc_Workspace_chegv On exit, contains auxiliary factorization data.
[out]	Q	On exit, if jobz = PlasmaVec and info = 0, the eigenvectors.
[in]	LDQ	The leading dimension of Q.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
<=N	if INFO = i, plasma_chegv failed to converge; i off-diagonal elements of an intermediate tridiagonal form did not converge to zero.
>N	if INFO = N + i, for 1 <= i <= N, then the leading minor of order i of B is not positive definite. The factorization of B could not be completed and no eigenvalues or eigenvectors were computed.

See also:: PLASMA_chegv_Tile; PLASMA_chegv_Tile_Async; PLASMA_chegv; PLASMA_dsygv; PLASMA_ssygv

Definition at line 128 of file chegv.c.

References plasma_desc_t::m, plasma_desc_t::mat, max, plasma_desc_t::n, plasma_cdesc_alloc, PLASMA_chegv_Tile_Async(), plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, plasma_desc_check(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), PLASMA_FUNC_CHEGV, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaComplexFloat, PlasmaLower, PlasmaNoVec, PlasmaUpper, PlasmaVec, Q, and plasma_sequence_t::status.

{
    int NB, IB, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB, descQ;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_error("PLASMA_chegv", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Tune NB & IB depending on N; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CHEGV, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_chegv", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB = PLASMA_NB;
    IB = PLASMA_IB;
    NT = (N%NB==0) ? (N/NB) : (N/NB+1);
    /* Check input arguments */
    if (itype != 1 && itype != 2 && itype != 3) {
        plasma_error("PLASMA_chegv", "Illegal value of itype");
        return -1;
    }
    if (jobz != PlasmaNoVec && jobz != PlasmaVec) {
        plasma_error("PLASMA_chegv", "illegal value of jobz");
        return -2;
    }
    if (uplo != PlasmaLower && uplo!= PlasmaUpper) {
        plasma_error("PLASMA_chegv", "only PlasmaLower supported");
        return -3;
    }
    if (N < 0) {
        plasma_error("PLASMA_chegv", "illegal value of N");
        return -4;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_chegv", "illegal value of LDA");
        return -6;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_chegv", "illegal value of LDB");
        return -8;
    }
    if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) || 
         ( descT->m != NT*IB ) || (descT->n != NT*NB) ) {
        plasma_error("PLASMA_chegv", "invalid T descriptor");
        return -10;
    }
    if (LDQ < max(1, N)) {
        plasma_error("PLASMA_chegv", "illegal value of LDQ");
        return -12;
    }
    
    /* Quick return */
    if (N == 0)
        return PLASMA_SUCCESS;
    if (jobz == PlasmaVec) {
        plasma_error("PLASMA_chegv", "computing the eigenvectors is not supported in this version");
        return -1;
    }
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, 
                             plasma_desc_mat_free(&(descA)) );
        plasma_cooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, N, N, 
                             plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)) );
        if (jobz == PlasmaVec) {
            /* No need for conversion, it's just output */
            plasma_cdesc_alloc( descQ, NB, NB, LDQ, N, 0, 0, N, N, 
                                plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descQ)) );
        }
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N );
        plasma_ciplap2tile( descB, B, NB, NB, LDB, N, 0, 0, N, N );
        if (jobz == PlasmaVec) {
            /* No need for conversion, it's just output */
            descQ = plasma_desc_init(
                PlasmaComplexFloat, NB, NB, NB*NB,
                LDQ, N, 0, 0, N, N);
            descQ.mat = Q;
        }
    }
    /* Call the tile interface */
    PLASMA_chegv_Tile_Async(itype, PlasmaNoVec, uplo, 
                            &descA, &descB, W, 
                            descT, &descQ, 
                            sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_cooptile2lap( descB, B, NB, NB, LDB, N );
        if (jobz == PlasmaVec) {
           plasma_cooptile2lap( descQ, Q, NB, NB, LDQ, N );
        }
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
        if (jobz == PlasmaVec)
           plasma_desc_mat_free(&descQ);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, N );
        plasma_ciptile2lap( descB, B, NB, NB, LDB, N );
        if (jobz == PlasmaVec)
           plasma_ciptile2lap( descQ, Q, NB, NB, LDQ, N );
        plasma_dynamic_sync();
    }
    
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_chemm	(	PLASMA_enum	side,
		PLASMA_enum	uplo,
		int	M,
		int	N,
		PLASMA_Complex32_t	alpha,
		PLASMA_Complex32_t *	A,
		int	LDA,
		PLASMA_Complex32_t *	B,
		int	LDB,
		PLASMA_Complex32_t	beta,
		PLASMA_Complex32_t *	C,
		int	LDC
	)

PLASMA_chemm - Performs one of the matrix-matrix operations

$C = \alpha \times A \times B + \beta \times C$

or

$C = \alpha \times B \times A + \beta \times C$

where alpha and beta are scalars, A is an hermitian matrix and B and C are m by n matrices.

Parameters:

[in]	side	Specifies whether the hermitian matrix A appears on the left or right in the operation as follows: = PlasmaLeft: $C = \alpha \times A \times B + \beta \times C$ = PlasmaRight: $C = \alpha \times B \times A + \beta \times C$
[in]	uplo	Specifies whether the upper or lower triangular part of the hermitian matrix A is to be referenced as follows: = PlasmaLower: Only the lower triangular part of the hermitian matrix A is to be referenced. = PlasmaUpper: Only the upper triangular part of the hermitian matrix A is to be referenced.
[in]	M	Specifies the number of rows of the matrix C. M >= 0.
[in]	N	Specifies the number of columns of the matrix C. N >= 0.
[in]	alpha	Specifies the scalar alpha.
[in]	A	A is a LDA-by-ka matrix, where ka is M when side = PlasmaLeft, and is N otherwise. Only the uplo triangular part is referenced.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,ka).
[in]	B	B is a LDB-by-N matrix, where the leading M-by-N part of the array B must contain the matrix B.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,M).
[in]	beta	Specifies the scalar beta.
[in,out]	C	C is a LDC-by-N matrix. On exit, the array is overwritten by the M by N updated matrix.
[in]	LDC	The leading dimension of the array C. LDC >= max(1,M).

Returns:

Return values:

PLASMA_SUCCESS successful exit

See also:: PLASMA_chemm_Tile; PLASMA_chemm; PLASMA_dhemm; PLASMA_shemm

Definition at line 94 of file chemm.c.

References max, PLASMA_chemm_Tile_Async(), plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CHEMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLeft, PlasmaLower, PlasmaRight, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int Am;
    int status;
    PLASMA_desc descA, descB, descC;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_chemm", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ( (side != PlasmaLeft) && (side != PlasmaRight) ){
        plasma_error("PLASMA_chemm", "illegal value of side");
        return -1;
    }
    if ((uplo != PlasmaLower) && (uplo != PlasmaUpper)) {
        plasma_error("PLASMA_chemm", "illegal value of uplo");
        return -2;
    }
    Am = ( side == PlasmaLeft ) ? M : N;
    if (M < 0) {
        plasma_error("PLASMA_chemm", "illegal value of M");
        return -3;
    }
    if (N < 0) {
        plasma_error("PLASMA_chemm", "illegal value of N");
        return -4;
    }
    if (LDA < max(1, Am)) {
        plasma_error("PLASMA_chemm", "illegal value of LDA");
        return -7;
    }
    if (LDB < max(1, M)) {
        plasma_error("PLASMA_chemm", "illegal value of LDB");
        return -9;
    }
    if (LDC < max(1, M)) {
        plasma_error("PLASMA_chemm", "illegal value of LDC");
        return -12;
    }
    /* Quick return */
    if (M == 0 || N == 0 ||
        ((alpha == (PLASMA_Complex32_t)0.0) && beta == (PLASMA_Complex32_t)1.0))
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_CHEMM, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_chemm", "plasma_tune() failed");
        return status;
    }
    /* Set MT & NT & KT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, Am, 0, 0, Am, Am, 
                             plasma_desc_mat_free(&(descA)) );
        plasma_cooplap2tile( descB, B, NB, NB, LDB, N,  0, 0, M,  N,
                             plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
        plasma_cooplap2tile( descC, C, NB, NB, LDC, N,  0, 0, M,  N,
                             plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descC)));
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, Am, 0, 0, Am, Am );
        plasma_ciplap2tile( descB, B, NB, NB, LDB, N,  0, 0, M,  N  );
        plasma_ciplap2tile( descC, C, NB, NB, LDC, N,  0, 0, M,  N  );
    }
    /* Call the tile interface */
    PLASMA_chemm_Tile_Async(
        side, uplo, alpha, &descA, &descB, beta, &descC, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
        plasma_desc_mat_free(&descC);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, Am );
        plasma_ciptile2lap( descB, B, NB, NB, LDB, N );
        plasma_ciptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_cher2k	(	PLASMA_enum	uplo,
		PLASMA_enum	trans,
		int	N,
		int	K,
		PLASMA_Complex32_t	alpha,
		PLASMA_Complex32_t *	A,
		int	LDA,
		PLASMA_Complex32_t *	B,
		int	LDB,
		float	beta,
		PLASMA_Complex32_t *	C,
		int	LDC
	)

PLASMA_cher2k - Performs one of the hermitian rank 2k operations

$C = \alpha [ op( A ) \times conjfg( op( B )' )] + conjfg( \alpha ) [ op( B ) \times conjfg( op( A )' )] + \beta C$

, or

$C = \alpha [ conjfg( op( A )' ) \times op( B ) ] + conjfg( \alpha ) [ conjfg( op( B )' ) \times op( A ) ] + \beta C$

,

where op( X ) is one of

op( X ) = X or op( X ) = conjfg( X' )

where alpha and beta are real scalars, C is an n-by-n symmetric matrix and A and B are an n-by-k matrices the first case and k-by-n matrices in the second case.

Parameters:

[in]	uplo	= PlasmaUpper: Upper triangle of C is stored; = PlasmaLower: Lower triangle of C is stored.
[in]	trans	Specifies whether the matrix A is transposed or conjfugate transposed: = PlasmaNoTrans: $C = \alpha [ op( A ) \times conjfg( op( B )' )] + conjfg( \alpha ) [ op( B ) \times conjfg( op( A )' )] + \beta C$ = PlasmaConjTrans: $C = \alpha [ conjfg( op( A )' ) \times op( B ) ] + conjfg( \alpha ) [ conjfg( op( B )' ) \times op( A ) ] + \beta C$
[in]	N	N specifies the order of the matrix C. N must be at least zero.
[in]	K	K specifies the number of columns of the A and B matrices with trans = PlasmaNoTrans. K specifies the number of rows of the A and B matrices with trans = PlasmaTrans.
[in]	alpha	alpha specifies the scalar alpha.
[in]	A	A is a LDA-by-ka matrix, where ka is K when trans = PlasmaNoTrans, and is N otherwise.
[in]	LDA	The leading dimension of the array A. LDA must be at least max( 1, N ), otherwise LDA must be at least max( 1, K ).
[in]	B	B is a LDB-by-kb matrix, where kb is K when trans = PlasmaNoTrans, and is N otherwise.
[in]	LDB	The leading dimension of the array B. LDB must be at least max( 1, N ), otherwise LDB must be at least max( 1, K ).
[in]	beta	beta specifies the scalar beta.
[in,out]	C	C is a LDC-by-N matrix. On exit, the array uplo part of the matrix is overwritten by the uplo part of the updated matrix.
[in]	LDC	The leading dimension of the array C. LDC >= max( 1, N ).

Returns:

Return values:

PLASMA_SUCCESS successful exit

See also:: PLASMA_cher2k_Tile; PLASMA_cher2k; PLASMA_dher2k; PLASMA_sher2k

Definition at line 96 of file cher2k.c.

References max, PLASMA_cher2k_Tile_Async(), plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CHERK, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaConjTrans, PlasmaLower, PlasmaNoTrans, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int Am, An;
    int status;
    PLASMA_desc descA, descB, descC;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cher2k", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ((uplo != PlasmaUpper) && (uplo != PlasmaLower)) {
        plasma_error("PLASMA_cher2k", "illegal value of uplo");
        return -1;
    }
    if ((trans != PlasmaNoTrans) && (trans != PlasmaConjTrans)) {
        plasma_error("PLASMA_cher2k", "illegal value of trans");
        return -2;
    }
    if ( trans == PlasmaNoTrans ) { 
        Am = N; An = K;
    } else {
        Am = K; An = N;
    }
    if (N < 0) {
        plasma_error("PLASMA_cher2k", "illegal value of N");
        return -3;
    }
    if (K < 0) {
        plasma_error("PLASMA_cher2k", "illegal value of K");
        return -4;
    }
    if (LDA < max(1, Am)) {
        plasma_error("PLASMA_cher2k", "illegal value of LDA");
        return -7;
    }
    if (LDB < max(1, Am)) {
        plasma_error("PLASMA_cher2k", "illegal value of LDB");
        return -9;
    }
    if (LDC < max(1, N)) {
        plasma_error("PLASMA_cher2k", "illegal value of LDC");
        return -12;
    }
    /* Quick return */
    if (N == 0 ||
        ((alpha == (PLASMA_Complex32_t)0.0 || K == 0.0) && beta == (float)1.0))
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CHERK, N, K, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cher2k", "plasma_tune() failed");
        return status;
    }
    /* Set MT & NT & KT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)) );
        plasma_cooplap2tile( descB, B, NB, NB, LDB, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
        plasma_cooplap2tile( descC, C, NB, NB, LDC, N,  0, 0, N,  N,  plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descC)));
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An );
        plasma_ciplap2tile( descB, B, NB, NB, LDB, An, 0, 0, Am, An );
        plasma_ciplap2tile( descC, C, NB, NB, LDC, N,  0, 0, N,  N );
    }
    /* Call the tile interface */
    PLASMA_cher2k_Tile_Async(uplo, trans, alpha, &descA, &descB, beta, &descC, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
        plasma_desc_mat_free(&descC);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, An );
        plasma_ciptile2lap( descB, B, NB, NB, LDB, An );
        plasma_ciptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_cherk	(	PLASMA_enum	uplo,
		PLASMA_enum	trans,
		int	N,
		int	K,
		float	alpha,
		PLASMA_Complex32_t *	A,
		int	LDA,
		float	beta,
		PLASMA_Complex32_t *	C,
		int	LDC
	)

PLASMA_cherk - Performs one of the hermitian rank k operations

$C = \alpha [ op( A ) \times conjfg( op( A )' )] + \beta C$

,

where op( X ) is one of

op( X ) = X or op( X ) = conjfg( X' )

where alpha and beta are real scalars, C is an n-by-n hermitian matrix and A is an n-by-k matrix in the first case and a k-by-n matrix in the second case.

Parameters:

[in]	uplo	= PlasmaUpper: Upper triangle of C is stored; = PlasmaLower: Lower triangle of C is stored.
[in]	trans	Specifies whether the matrix A is transposed or conjfugate transposed: = PlasmaNoTrans: A is not transposed; = PlasmaConjTrans: A is conjfugate transposed.
[in]	N	N specifies the order of the matrix C. N must be at least zero.
[in]	K	K specifies the number of columns of the matrix op( A ).
[in]	alpha	alpha specifies the scalar alpha.
[in]	A	A is a LDA-by-ka matrix, where ka is K when trans = PlasmaNoTrans, and is N otherwise.
[in]	LDA	The leading dimension of the array A. LDA must be at least max( 1, N ), otherwise LDA must be at least max( 1, K ).
[in]	beta	beta specifies the scalar beta
[in,out]	C	C is a LDC-by-N matrix. On exit, the array uplo part of the matrix is overwritten by the uplo part of the updated matrix.
[in]	LDC	The leading dimension of the array C. LDC >= max( 1, N ).

Returns:

Return values:

PLASMA_SUCCESS successful exit

See also:: PLASMA_cherk_Tile; PLASMA_cherk; PLASMA_dherk; PLASMA_sherk

Definition at line 85 of file cherk.c.

References max, PLASMA_cherk_Tile_Async(), plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CHERK, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaConjTrans, PlasmaLower, PlasmaNoTrans, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int Am, An;
    int status;
    PLASMA_desc descA, descC;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cherk", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ((uplo != PlasmaUpper) && (uplo != PlasmaLower)) {
        plasma_error("PLASMA_cherk", "illegal value of uplo");
        return -1;
    }
    if ((trans != PlasmaNoTrans) && (trans != PlasmaConjTrans)) {
        plasma_error("PLASMA_cherk", "illegal value of trans");
        return -2;
    }
    if ( trans == PlasmaNoTrans ) { 
        Am = N; An = K;
    } else {
        Am = K; An = N;
    }
    if (N < 0) {
        plasma_error("PLASMA_cherk", "illegal value of N");
        return -3;
    }
    if (K < 0) {
        plasma_error("PLASMA_cherk", "illegal value of K");
        return -4;
    }
    if (LDA < max(1, Am)) {
        plasma_error("PLASMA_cherk", "illegal value of LDA");
        return -7;
    }
    if (LDC < max(1, N)) {
        plasma_error("PLASMA_cherk", "illegal value of LDC");
        return -10;
    }
    /* Quick return */
    if (N == 0 ||
        ((alpha == (float)0.0 || K == 0.0) && beta == (float)1.0))
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CHERK, N, K, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cherk", "plasma_tune() failed");
        return status;
    }
    /* Set MT & NT & KT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)) );
        plasma_cooplap2tile( descC, C, NB, NB, LDC, N,  0, 0, N,  N,  plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descC)));
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An );
        plasma_ciplap2tile( descC, C, NB, NB, LDC, N,  0, 0, N,  N );
    }
    /* Call the tile interface */
    PLASMA_cherk_Tile_Async(uplo, trans, alpha, &descA, beta, &descC, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descC);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, An );
        plasma_ciptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_chetrd	(	PLASMA_enum	jobz,
		PLASMA_enum	uplo,
		int	N,
		PLASMA_Complex32_t *	A,
		int	LDA,
		float *	D,
		float *	E,
		PLASMA_desc *	descT,
		PLASMA_Complex32_t *	Q,
		int	LDQ
	)

PLASMA_chetrd - reduces a complex Hermitian matrix A to real symmetric tridiagonal form S using a two-stage approach First stage: reduction to band tridiagonal form (unitary Q1); Second stage: reduction from band to tridiagonal form (unitary Q2). Let Q = Q1 * Q2 be the global unitary transformation; Q**H * A * Q = S. Not LAPACK compliant as A does not contain the T elements Note: Only PlasmaNoVec supported!

Parameters:

[in]	jobz	Intended usage: = PlasmaNoVec: computes eigenvalues only; = PlasmaVec: computes eigenvalues and eigenvectors. Note: Only PlasmaNoVec supported!
[in]	uplo	Specifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The order of the matrix A. N >= 0.
[in,out]	A	On entry, the symmetric (or Hermitian) matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, the lower triangle (if uplo = PlasmaLower) or the upper triangle (if uplo = PlasmaUpper) of A, including the diagonal, is destroyed.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[out]	D	On exit, the diagonal elements of the tridiagonal matrix: D(i) = A(i,i).
[out]	E	On exit, he off-diagonal elements of the tridiagonal matrix: E(i) = A(i,i+1) if uplo = PlasmaUpper, E(i) = A(i+1,i) if uplo = PlasmaLower.
[in,out]	descT	On entry, descriptor as return by PLASMA_Alloc_Workspace_cheev On exit, contains auxiliary factorization data.
[out]	Q	On exit, if jobz = PlasmaVec and info = 0, the eigenvectors.
[in]	LDQ	The leading dimension of the array Q. LDQ >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if INFO = i, the algorithm failed to converge; i off-diagonal elements of an intermediate tridiagonal form did not converge to zero.

See also:: PLASMA_chetrd_Tile; PLASMA_chetrd_Tile_Async; PLASMA_chetrd; PLASMA_dsytrd; PLASMA_ssytrd

Definition at line 100 of file chetrd.c.

References plasma_desc_t::m, max, plasma_desc_t::n, PLASMA_chetrd_Tile_Async(), plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, plasma_desc_check(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), PLASMA_FUNC_CHETRD, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLower, PlasmaNoVec, PlasmaUpper, PlasmaVec, and plasma_sequence_t::status.

{
    int NB, IB, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descQ;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_error("PLASMA_chetrd", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Tune NB & IB depending on N; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CHETRD, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_chetrd", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB = PLASMA_NB;
    IB = PLASMA_IB;
    NT = (N%NB==0) ? (N/NB) : (N/NB+1);
    /* Check input arguments */
    if (jobz != PlasmaNoVec && jobz != PlasmaVec) {
        plasma_error("PLASMA_chetrd", "illegal value of jobz");
        return -1;
    }
    if (uplo != PlasmaLower && uplo != PlasmaUpper) {
        plasma_error("PLASMA_chetrd", "illegal value of uplo");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_chetrd", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_chetrd", "illegal value of LDA");
        return -5;
    }
    if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) || 
         ( descT->m != NT*IB ) || (descT->n != NT*NB) ) {
        plasma_error("PLASMA_chetrd", "invalid T descriptor");
        return -8;
    }
    if (LDQ < max(1, N)) {
        plasma_error("PLASMA_chetrd", "illegal value of LDQ");
        return -10;
    }
    /* Quick return */
    if (N == 0)
        return PLASMA_SUCCESS;
    if (jobz == PlasmaVec) {
        plasma_error("PLASMA_chetrd", "computing the eigenvectors is not supported in this version");
        return -1;
    }
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        if (jobz == PlasmaVec) {
           plasma_cooplap2tile( descQ, Q, NB, NB, LDQ, N,    0, 0, N, N   , plasma_desc_mat_free(&(descQ)) );
        }
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        if (jobz == PlasmaVec)
           plasma_ciplap2tile( descQ, Q, NB, NB, LDQ, N,    0, 0, N, N   );
    }
    /* Call the tile interface */
    PLASMA_chetrd_Tile_Async(jobz, uplo, &descA, D, E, descT, &descQ, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descA, A, NB, NB, LDA, N    );
        if (jobz == PlasmaVec) {
           plasma_cooptile2lap( descQ, Q, NB, NB, LDQ, N    );
        }
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        if (jobz == PlasmaVec)
           plasma_desc_mat_free(&descQ);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, N    );
        if (jobz == PlasmaVec)
           plasma_ciptile2lap( descQ, Q, NB, NB, LDQ, N    );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_clacpy	(	PLASMA_enum	uplo,
		int	M,
		int	N,
		PLASMA_Complex32_t *	A,
		int	LDA,
		PLASMA_Complex32_t *	B,
		int	LDB
	)

PLASMA_clacpy copies all or part of a two-dimensional matrix A to another matrix B

Parameters:

[in]	uplo	Specifies the part of the matrix A to be copied to B. = PlasmaUpperLower: All the matrix A = PlasmaUpper: Upper triangular part = PlasmaLower: Lower triangular part
[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in]	A	The M-by-N matrix A. If uplo = PlasmaUpper, only the upper trapezium is accessed; if UPLO = PlasmaLower, only the lower trapezium is accessed.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[out]	B	The M-by-N matrix B. On exit, B = A in the locations specified by UPLO.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,M).

See also:: PLASMA_clacpy_Tile; PLASMA_clacpy_Tile_Async; PLASMA_clacpy; PLASMA_dlacpy; PLASMA_slacpy

Definition at line 62 of file clacpy.c.

References max, min, plasma_ciplap2tile, plasma_ciptile2lap, PLASMA_clacpy_Tile_Async(), plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CGEMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLower, PlasmaUpper, and PlasmaUpperLower.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_clacpy", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ( (uplo != PlasmaUpperLower) && 
         (uplo != PlasmaUpper) &&
         (uplo != PlasmaLower) ) {
        plasma_error("PLASMA_clacpy", "illegal value of uplo");
        return -1;
    }
    if (M < 0) {
        plasma_error("PLASMA_clacpy", "illegal value of M");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_clacpy", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_clacpy", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, M)) {
        plasma_error("PLASMA_clacpy", "illegal value of LDB");
        return -7;
    }
    /* Quick return */
    if (min(N, M) == 0)
      return (float)0.0;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CGEMM, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_clacpy", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB   = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
        plasma_cooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)) );
    } else {
        plasma_ciplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, M, N);
        plasma_ciplap2tile(  descB, B, NB, NB, LDA, N, 0, 0, M, N);
    }
    /* Call the tile interface */
    PLASMA_clacpy_Tile_Async(uplo, &descA, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descB, B, NB, NB, LDB, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ciptile2lap( descB, B, NB, NB, LDB, N );
        plasma_dynamic_sync();
    }
    plasma_sequence_destroy(plasma, sequence);
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

float PLASMA_clange	(	PLASMA_enum	norm,
		int	M,
		int	N,
		PLASMA_Complex32_t *	A,
		int	LDA,
		float *	work
	)

PLASMA_clange returns the value

clange = ( max(abs(A(i,j))), NORM = PlasmaMaxNorm ( ( norm1(A), NORM = PlasmaOneNorm ( ( normI(A), NORM = PlasmaInfNorm ( ( normF(A), NORM = PlasmaFrobeniusNorm

where norm1 denotes the one norm of a matrix (maximum column sum), normI denotes the infinity norm of a matrix (maximum row sum) and normF denotes the Frobenius norm of a matrix (square root of sum of squares). Note that max(abs(A(i,j))) is not a consistent matrix norm.

Parameters:

[in]	norm	= PlasmaMaxNorm: Max norm = PlasmaOneNorm: One norm = PlasmaInfNorm: Infinity norm = PlasmaFrobeniusNorm: Frobenius norm
[in]	M	The number of rows of the matrix A. M >= 0. When M = 0, the returned value is set to zero.
[in]	N	The number of columns of the matrix A. N >= 0. When N = 0, the returned value is set to zero.
[in]	A	The M-by-N matrix A.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[in]	work	real array of dimension (MAX(1,LWORK)), where LWORK >= M when NORM = PlasmaInfNorm; otherwise, WORK is not referenced.

Returns:

Return values:

the	norm described above.

See also:: PLASMA_clange_Tile; PLASMA_clange_Tile_Async; PLASMA_clange; PLASMA_dlange; PLASMA_slange

Definition at line 78 of file clange.c.

References max, min, plasma_ciplap2tile, plasma_ciptile2lap, PLASMA_clange_Tile_Async(), plasma_context_self(), plasma_cooplap2tile, plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CGEMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaFrobeniusNorm, PlasmaInfNorm, PlasmaMaxNorm, and PlasmaOneNorm.

{
    int NB;
    int status;
    float value;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_clange", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ( (norm != PlasmaMaxNorm) && (norm != PlasmaOneNorm)
        && (norm != PlasmaInfNorm) && (norm != PlasmaFrobeniusNorm) ) {
        plasma_error("PLASMA_clange", "illegal value of norm");
        return -1;
    }
    if (M < 0) {
        plasma_error("PLASMA_clange", "illegal value of M");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_clange", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_clange", "illegal value of LDA");
        return -5;
    }
    /* Quick return */
    if (min(N, M) == 0)
      return (float)0.0;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CGEMM, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_clange", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB   = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_ciplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, M, N);
    }
    /* Call the tile interface */
    PLASMA_clange_Tile_Async(norm, &descA, work, &value, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    plasma_sequence_destroy(plasma, sequence);
    return value;
}

Here is the call graph for this function:

Here is the caller graph for this function:

float PLASMA_clanhe	(	PLASMA_enum	norm,
		PLASMA_enum	uplo,
		int	N,
		PLASMA_Complex32_t *	A,
		int	LDA,
		float *	work
	)

PLASMA_clanhe returns the value

clanhe = ( max(abs(A(i,j))), NORM = PlasmaMaxNorm ( ( norm1(A), NORM = PlasmaOneNorm ( ( normI(A), NORM = PlasmaInfNorm ( ( normF(A), NORM = PlasmaFrobeniusNorm

where norm1 denotes the one norm of a matrix (maximum column sum), normI denotes the infinity norm of a matrix (maximum row sum) and normF denotes the Frobenius norm of a matrix (square root of sum of squares). Note that max(abs(A(i,j))) is not a consistent matrix norm.

Parameters:

[in]	norm	= PlasmaMaxNorm: Max norm = PlasmaOneNorm: One norm = PlasmaInfNorm: Infinity norm = PlasmaFrobeniusNorm: Frobenius norm
[in]	uplo	= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The number of columns/rows of the matrix A. N >= 0. When N = 0, the returned value is set to zero.
[in]	A	The N-by-N matrix A.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in]	work	real array of dimension PLASMA_SIZE is PLASMA_STATIC_SCHEDULING is used, and NULL otherwise.

Returns:

Return values:

the	norm described above.

See also:: PLASMA_clanhe_Tile; PLASMA_clanhe_Tile_Async; PLASMA_clanhe; PLASMA_dlanhe; PLASMA_slanhe

Definition at line 77 of file clanhe.c.

References max, plasma_ciplap2tile, plasma_ciptile2lap, PLASMA_clanhe_Tile_Async(), plasma_context_self(), plasma_cooplap2tile, plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CGEMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaFrobeniusNorm, PlasmaInfNorm, PlasmaLower, PlasmaMaxNorm, PlasmaOneNorm, and PlasmaUpper.

{
    int NB;
    int status;
    float value;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_clanhe", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ( (norm != PlasmaMaxNorm) && (norm != PlasmaOneNorm)
        && (norm != PlasmaInfNorm) && (norm != PlasmaFrobeniusNorm) ) {
        plasma_error("PLASMA_clanhe", "illegal value of norm");
        return -1;
    }
    if ( (uplo != PlasmaUpper) && (uplo != PlasmaLower) ) {
        plasma_error("PLASMA_clanhe", "illegal value of uplo");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_clanhe", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_clanhe", "illegal value of LDA");
        return -5;
    }
    /* Quick return */
    if ( N == 0)
      return (float)0.0;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CGEMM, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_clanhe", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB   = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_ciplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, N, N);
    }
    /* Call the tile interface */
    PLASMA_clanhe_Tile_Async(norm, uplo, &descA, work, &value, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    plasma_sequence_destroy(plasma, sequence);
    return value;
}

Here is the call graph for this function:

Here is the caller graph for this function:

float PLASMA_clansy	(	PLASMA_enum	norm,
		PLASMA_enum	uplo,
		int	N,
		PLASMA_Complex32_t *	A,
		int	LDA,
		float *	work
	)

PLASMA_clansy returns the value

clansy = ( max(abs(A(i,j))), NORM = PlasmaMaxNorm ( ( norm1(A), NORM = PlasmaOneNorm ( ( normI(A), NORM = PlasmaInfNorm ( ( normF(A), NORM = PlasmaFrobeniusNorm

where norm1 denotes the one norm of a matrix (maximum column sum), normI denotes the infinity norm of a matrix (maximum row sum) and normF denotes the Frobenius norm of a matrix (square root of sum of squares). Note that max(abs(A(i,j))) is not a consistent matrix norm.

Parameters:

[in]	norm	= PlasmaMaxNorm: Max norm = PlasmaOneNorm: One norm = PlasmaInfNorm: Infinity norm = PlasmaFrobeniusNorm: Frobenius norm
[in]	uplo	= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The number of columns/rows of the matrix A. N >= 0. When N = 0, the returned value is set to zero.
[in]	A	The N-by-N matrix A.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in]	work	real array of dimension PLASMA_SIZE is PLASMA_STATIC_SCHEDULING is used, and NULL otherwise.

Returns:

Return values:

the	norm described above.

See also:: PLASMA_clansy_Tile; PLASMA_clansy_Tile_Async; PLASMA_clansy; PLASMA_dlansy; PLASMA_slansy

Definition at line 77 of file clansy.c.

References max, plasma_ciplap2tile, plasma_ciptile2lap, PLASMA_clansy_Tile_Async(), plasma_context_self(), plasma_cooplap2tile, plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CGEMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaFrobeniusNorm, PlasmaInfNorm, PlasmaLower, PlasmaMaxNorm, PlasmaOneNorm, and PlasmaUpper.

{
    int NB;
    int status;
    float value;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_clansy", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ( (norm != PlasmaMaxNorm) && (norm != PlasmaOneNorm)
        && (norm != PlasmaInfNorm) && (norm != PlasmaFrobeniusNorm) ) {
        plasma_error("PLASMA_clansy", "illegal value of norm");
        return -1;
    }
    if ( (uplo != PlasmaUpper) && (uplo != PlasmaLower) ) {
        plasma_error("PLASMA_clansy", "illegal value of uplo");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_clansy", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_clansy", "illegal value of LDA");
        return -5;
    }
    /* Quick return */
    if ( N == 0)
      return (float)0.0;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CGEMM, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_clansy", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB   = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_ciplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, N, N);
    }
    /* Call the tile interface */
    PLASMA_clansy_Tile_Async(norm, uplo, &descA, work, &value, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    plasma_sequence_destroy(plasma, sequence);
    return value;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_cLapack_to_Tile	(	PLASMA_Complex32_t *	Af77,
		int	LDA,
		PLASMA_desc *	A
	)

PLASMA_cLapack_to_Tile - Conversion from LAPACK layout to tile layout.

Parameters:

[in]	Af77	LAPACK matrix.
[in]	LDA	The leading dimension of the matrix Af77.
[in,out]	A	Descriptor of the PLASMA matrix in tile layout. If PLASMA_TRANSLATION_MODE is set to PLASMA_INPLACE, A->mat is not used and set to Af77 when returns, else if PLASMA_TRANSLATION_MODE is set to PLASMA_OUTOFPLACE, A->mat has to be allocated before.

Returns:

Return values:

PLASMA_SUCCESS successful exit

See also:: PLASMA_cLapack_to_Tile_Async; PLASMA_cTile_to_Lapack; PLASMA_cLapack_to_Tile; PLASMA_dLapack_to_Tile; PLASMA_sLapack_to_Tile

Definition at line 55 of file ctile.c.

References A, plasma_context_self(), plasma_desc_check(), plasma_dynamic_sync, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), plasma_parallel_call_5, plasma_pclapack_to_tile(), plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, and plasma_sequence_t::status.

{
    PLASMA_desc descA = *A;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request;
    int status;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cLapack_to_Tile", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check descriptor for correctness */
    if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cLapack_to_Tile", "invalid descriptor");
        return PLASMA_ERR_ILLEGAL_VALUE;
    }
    plasma_sequence_create(plasma, &sequence);
    plasma_parallel_call_5(
        plasma_pclapack_to_tile,
        PLASMA_Complex32_t*, Af77,
        int, LDA,
        PLASMA_desc, descA,
        PLASMA_sequence*, sequence,
        PLASMA_request*, &request);
    plasma_dynamic_sync();
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_claset	(	PLASMA_enum	uplo,
		int	M,
		int	N,
		PLASMA_Complex32_t	alpha,
		PLASMA_Complex32_t	beta,
		PLASMA_Complex32_t *	A,
		int	LDA
	)

PLASMA_claset copies all or part of a two-dimensional matrix A to another matrix B

Parameters:

[in]	uplo	Specifies the part of the matrix A to be copied to B. = PlasmaUpperLower: All the matrix A = PlasmaUpper: Upper triangular part is set. The lower triangle is unchanged. = PlasmaLower: Lower triangular part is set. The upper triangle is unchange.
[in]	M	The number of rows of the matrix A. M >= 0.
[in]	N	The number of columns of the matrix A. N >= 0.
[in]	alpha	All the offdiagonal array elements are set to alpha.
[in]	beta	All the diagonal array elements are set to beta.
[in,out]	A	On entry, the m by n matrix A. On exit, A(i,j) = ALPHA, 1 <= i <= m, 1 <= j <= n, i.ne.j; A(i,i) = BETA , 1 <= i <= min(m,n)
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).

See also:: PLASMA_claset_Tile; PLASMA_claset_Tile_Async; PLASMA_claset; PLASMA_dlaset; PLASMA_slaset

Definition at line 63 of file claset.c.

References max, min, plasma_ciplap2tile, plasma_ciptile2lap, PLASMA_claset_Tile_Async(), plasma_context_self(), plasma_cooplap2tile, plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CGEMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLower, PlasmaUpper, and PlasmaUpperLower.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_claset", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ( (uplo != PlasmaUpperLower) && 
         (uplo != PlasmaUpper) &&
         (uplo != PlasmaLower) ) {
        plasma_error("PLASMA_claset", "illegal value of uplo");
        return -1;
    }
    if (M < 0) {
        plasma_error("PLASMA_claset", "illegal value of M");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_claset", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_claset", "illegal value of LDA");
        return -5;
    }
    /* Quick return */
    if (min(N, M) == 0)
      return (float)0.0;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CGEMM, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_claset", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB   = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_ciplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, M, N);
    }
    /* Call the tile interface */
    PLASMA_claset_Tile_Async(uplo, alpha, beta, &descA, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    plasma_sequence_destroy(plasma, sequence);
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

int PLASMA_claswp	(	int	N,
		PLASMA_Complex32_t *	A,
		int	LDA,
		int	K1,
		int	K2,
		int *	IPIV,
		int	INCX
	)

PLASMA_claswp - performs a series of row interchanges on the matrix A. One row interchange is initiated for each of rows K1 through K2 of A.

Parameters:

[in]	N	The order of the matrix A. N >= 0.
[in]	A	The tile factors L and U from the factorization, computed by PLASMA_cgetrf.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in]	K1	The first element of IPIV for which a row interchange will be done.
[in]	K2	The last element of IPIV for which a row interchange will be done.
[in]	IPIV	The pivot indices from PLASMA_cgetrf.
[in]	INCX	The increment between successive values of IPIV. If IPIV is negative, the pivots are applied in reverse order.

Returns:

Return values:

PLASMA_SUCCESS successful exit

Returns:: <0 if -i, the i-th argument had an illegal value

See also:: PLASMA_claswp_Tile; PLASMA_claswp_Tile_Async; PLASMA_claswp; PLASMA_dlaswp; PLASMA_slaswp; PLASMA_cgetrf

Definition at line 66 of file claswp.c.

References max, plasma_ciplap2tile, plasma_ciptile2lap, PLASMA_claswp_Tile_Async(), plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CGESV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_claswp", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (N < 0) {
        plasma_error("PLASMA_claswp", "illegal value of N");
        return -1;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_claswp", "illegal value of LDA");
        return -3;
    }
    /* Quick return */
    if ( N == 0 )
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_CGESV, LDA, N, N);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_claswp", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K2, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K2, N);
    }
    /* Call the tile interface */
    PLASMA_claswp_Tile_Async(&descA, K1, K2, IPIV, INCX, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_claswpc	(	int	N,
		PLASMA_Complex32_t *	A,
		int	LDA,
		int	K1,
		int	K2,
		int *	IPIV,
		int	INCX
	)

PLASMA_claswpc - performs a series of row interchanges on the matrix A. One row interchange is initiated for each of rows K1 through K2 of A.

Parameters:

[in]	N	The order of the matrix A. N >= 0.
[in]	A	The tile factors L and U from the factorization, computed by PLASMA_cgetrf.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in]	K1	The first element of IPIV for which a row interchange will be done.
[in]	K2	The last element of IPIV for which a row interchange will be done.
[in]	IPIV	The pivot indices from PLASMA_cgetrf.
[in]	INCX	The increment between successive values of IPIV. If IPIV is negative, the pivots are applied in reverse order.

Returns:

Return values:

PLASMA_SUCCESS successful exit

Returns:: <0 if -i, the i-th argument had an illegal value

See also:: PLASMA_claswpc_Tile; PLASMA_claswpc_Tile_Async; PLASMA_claswpc; PLASMA_dlaswpc; PLASMA_slaswpc; PLASMA_cgetrf

Definition at line 66 of file claswpc.c.

References max, plasma_ciplap2tile, plasma_ciptile2lap, PLASMA_claswpc_Tile_Async(), plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CGESV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_claswpc", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (N < 0) {
        plasma_error("PLASMA_claswpc", "illegal value of N");
        return -1;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_claswpc", "illegal value of LDA");
        return -3;
    }
    /* Quick return */
    if ( N == 0 )
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_CGESV, LDA, N, N);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_claswpc", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K2, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K2, N);
    }
    /* Call the tile interface */
    PLASMA_claswpc_Tile_Async(&descA, K1, K2, IPIV, INCX, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

int PLASMA_clauum	(	PLASMA_enum	uplo,
		int	N,
		PLASMA_Complex32_t *	A,
		int	LDA
	)

PLASMA_clauum - Computes the product U * U' or L' * L, where the triangular factor U or L is stored in the upper or lower triangular part of the array A.

If UPLO = 'U' or 'u' then the upper triangle of the result is stored, overwriting the factor U in A. If UPLO = 'L' or 'l' then the lower triangle of the result is stored, overwriting the factor L in A.

Parameters:

[in]	uplo	= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The order of the triangular factor U or L. N >= 0.
[in,out]	A	On entry, the triangular factor U or L. On exit, if UPLO = 'U', the upper triangle of A is overwritten with the upper triangle of the product U * U'; if UPLO = 'L', the lower triangle of A is overwritten with the lower triangle of the product L' * L.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_clauum_Tile; PLASMA_clauum_Tile_Async; PLASMA_clauum; PLASMA_dlauum; PLASMA_slauum; PLASMA_cpotri

Definition at line 65 of file clauum.c.

References max, plasma_ciplap2tile, plasma_ciptile2lap, PLASMA_clauum_Tile_Async(), plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLower, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_clauum", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_clauum", "illegal value of uplo");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_clauum", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_clauum", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (max(N, 0) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CPOSV, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_clauum", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB   = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_ciplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, N, N);
    }
    /* Call the tile interface */
    PLASMA_clauum_Tile_Async(uplo, &descA, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

int PLASMA_cplghe	(	float	bump,
		int	N,
		PLASMA_Complex32_t *	A,
		int	LDA,
		unsigned long long int	seed
	)

PLASMA_cplghe - Generate a random hermitian matrix by tiles.

Parameters:

[in]	bump	The value to add to the diagonal to be sure to have a positive definite matrix.
[in]	N	The order of the matrix A. N >= 0.
[out]	A	On exit, The random hermitian matrix A generated.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[in]	seed	The seed used in the random generation.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_cplghe_Tile; PLASMA_cplghe_Tile_Async; PLASMA_cplghe; PLASMA_dplghe; PLASMA_splghe; PLASMA_cplrnt; PLASMA_cplgsy

Definition at line 58 of file cplghe.c.

References A, plasma_desc_t::mat, max, plasma_ciptile2lap, plasma_context_self(), PLASMA_cplghe_Tile_Async(), plasma_desc_init(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CGEMM, PLASMA_NB, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, plasma_tune(), PlasmaComplexFloat, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cplghe", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (N < 0) {
        plasma_error("PLASMA_cplghe", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_cplghe", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (max(0, N) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CGEMM, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cplghe", "plasma_tune() failed");
        return status;
    }
    
    /* Set NT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    
    descA = plasma_desc_init(
        PlasmaComplexFloat, NB, NB, NB*NB,
        LDA, N, 0, 0, N, N);
    descA.mat = A;
    /* Call the tile interface */
    PLASMA_cplghe_Tile_Async( bump, &descA, seed, sequence, &request );
    plasma_ciptile2lap( descA, A, NB, NB, LDA, N );
    plasma_dynamic_sync();
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_cplgsy	(	PLASMA_Complex32_t	bump,
		int	N,
		PLASMA_Complex32_t *	A,
		int	LDA,
		unsigned long long int	seed
	)

PLASMA_cplgsy - Generate a random hermitian matrix by tiles.

Parameters:

[in]	bump	The value to add to the diagonal to be sure to have a positive definite matrix.
[in]	N	The order of the matrix A. N >= 0.
[out]	A	On exit, The random hermitian matrix A generated.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[in]	seed	The seed used in the random generation.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_cplgsy_Tile; PLASMA_cplgsy_Tile_Async; PLASMA_cplgsy; PLASMA_dplgsy; PLASMA_splgsy; PLASMA_cplrnt; PLASMA_cplgsy

Definition at line 58 of file cplgsy.c.

References A, plasma_desc_t::mat, max, plasma_ciptile2lap, plasma_context_self(), PLASMA_cplgsy_Tile_Async(), plasma_desc_init(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CGEMM, PLASMA_NB, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, plasma_tune(), PlasmaComplexFloat, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cplgsy", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (N < 0) {
        plasma_error("PLASMA_cplgsy", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_cplgsy", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (max(0, N) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CGEMM, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cplgsy", "plasma_tune() failed");
        return status;
    }
    
    /* Set NT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    
    descA = plasma_desc_init(
        PlasmaComplexFloat, NB, NB, NB*NB,
        LDA, N, 0, 0, N, N);
    descA.mat = A;
    /* Call the tile interface */
    PLASMA_cplgsy_Tile_Async( bump, &descA, seed, sequence, &request );
    plasma_ciptile2lap( descA, A, NB, NB, LDA, N );
    plasma_dynamic_sync();
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_cplrnt	(	int	M,
		int	N,
		PLASMA_Complex32_t *	A,
		int	LDA,
		unsigned long long int	seed
	)

PLASMA_cplrnt - Generate a random matrix by tiles.

Parameters:

[in]	M	The number of rows of A.
[in]	N	The order of the matrix A. N >= 0.
[out]	A	On exit, The random matrix A generated.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[in]	seed	The seed used in the random generation.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_cplrnt_Tile; PLASMA_cplrnt_Tile_Async; PLASMA_cplrnt; PLASMA_dplrnt; PLASMA_splrnt; PLASMA_cplghe; PLASMA_cplgsy

Definition at line 57 of file cplrnt.c.

References A, plasma_desc_t::mat, max, min, plasma_ciptile2lap, plasma_context_self(), PLASMA_cplrnt_Tile_Async(), plasma_desc_init(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CGEMM, PLASMA_NB, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, plasma_tune(), PlasmaComplexFloat, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cplrnt", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (M < 0) {
        plasma_error("PLASMA_cplrnt", "illegal value of M");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_cplrnt", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_cplrnt", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (min(M, N) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CGEMM, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cplrnt", "plasma_tune() failed");
        return status;
    }
    
    /* Set NT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    descA = plasma_desc_init(
        PlasmaComplexFloat, NB, NB, NB*NB,
        LDA, N, 0, 0, M, N);
    descA.mat = A;
    /* Call the tile interface */
    PLASMA_cplrnt_Tile_Async( &descA, seed, sequence, &request );
    plasma_ciptile2lap( descA, A, NB, NB, LDA, N );
    plasma_dynamic_sync();
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_cposv	(	PLASMA_enum	uplo,
		int	N,
		int	NRHS,
		PLASMA_Complex32_t *	A,
		int	LDA,
		PLASMA_Complex32_t *	B,
		int	LDB
	)

PLASMA_cposv - Computes the solution to a system of linear equations A * X = B, where A is an N-by-N symmetric positive definite (or Hermitian positive definite in the complex case) matrix and X and B are N-by-NRHS matrices. The Cholesky decomposition is used to factor A as

$A = \{_{L\times L^H, if uplo = PlasmaLower}^{U^H\times U, if uplo = PlasmaUpper}$

where U is an upper triangular matrix and L is a lower triangular matrix. The factored form of A is then used to solve the system of equations A * X = B.

Parameters:

[in]	uplo	Specifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The number of linear equations, i.e., the order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in,out]	A	On entry, the symmetric positive definite (or Hermitian) matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if return value = 0, the factor U or L from the Cholesky factorization A = U*HU or A = LL*H.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in,out]	B	On entry, the N-by-NRHS right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, the leading minor of order i of A is not positive definite, so the factorization could not be completed, and the solution has not been computed.

See also:: PLASMA_cposv_Tile; PLASMA_cposv_Tile_Async; PLASMA_cposv; PLASMA_dposv; PLASMA_sposv

Definition at line 82 of file cposv.c.

References max, min, plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, PLASMA_cposv_Tile_Async(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLower, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cposv", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_cposv", "illegal value of uplo");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_cposv", "illegal value of N");
        return -2;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_cposv", "illegal value of NRHS");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_cposv", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_cposv", "illegal value of LDB");
        return -7;
    }
    /* Quick return - currently NOT equivalent to LAPACK's
     * LAPACK does not have such check for DPOSV */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_CPOSV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cposv", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB    = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        plasma_cooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        plasma_ciplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
    }
    /* Call the tile interface */
    PLASMA_cposv_Tile_Async(uplo, &descA, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_cooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_ciptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_cpotrf	(	PLASMA_enum	uplo,
		int	N,
		PLASMA_Complex32_t *	A,
		int	LDA
	)

PLASMA_cpotrf - Computes the Cholesky factorization of a symmetric positive definite (or Hermitian positive definite in the complex case) matrix A. The factorization has the form

$A = \{_{L\times L^H, if uplo = PlasmaLower}^{U^H\times U, if uplo = PlasmaUpper}$

where U is an upper triangular matrix and L is a lower triangular matrix.

Parameters:

[in]	uplo	= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The order of the matrix A. N >= 0.
[in,out]	A	On entry, the symmetric positive definite (or Hermitian) matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if return value = 0, the factor U or L from the Cholesky factorization A = U*HU or A = LL*H.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, the leading minor of order i of A is not positive definite, so the factorization could not be completed, and the solution has not been computed.

See also:: PLASMA_cpotrf_Tile; PLASMA_cpotrf_Tile_Async; PLASMA_cpotrf; PLASMA_dpotrf; PLASMA_spotrf; PLASMA_cpotrs

Definition at line 70 of file cpotrf.c.

References max, plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, PLASMA_cpotrf_Tile_Async(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLower, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cpotrf", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_cpotrf", "illegal value of uplo");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_cpotrf", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_cpotrf", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (max(N, 0) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CPOSV, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cpotrf", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB   = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_ciplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, N, N);
    }
    /* Call the tile interface */
    PLASMA_cpotrf_Tile_Async(uplo, &descA, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_cpotri	(	PLASMA_enum	uplo,
		int	N,
		PLASMA_Complex32_t *	A,
		int	LDA
	)

PLASMA_cpotri - Computes the inverse of a complex Hermitian positive definite matrix A using the Cholesky factorization A = U**H*U or A = L*L**H computed by PLASMA_cpotrf.

Parameters:

[in]	uplo	= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The order of the matrix A. N >= 0.
[in,out]	A	On entry, the triangular factor U or L from the Cholesky factorization A = U*HU or A = LL*H, as computed by PLASMA_cpotrf. On exit, the upper or lower triangle of the (Hermitian) inverse of A, overwriting the input factor U or L.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, the (i,i) element of the factor U or L is zero, and the inverse could not be computed.

See also:: PLASMA_cpotri_Tile; PLASMA_cpotri_Tile_Async; PLASMA_cpotri; PLASMA_dpotri; PLASMA_spotri; PLASMA_cpotrf

Definition at line 62 of file cpotri.c.

References max, plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, PLASMA_cpotri_Tile_Async(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLower, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cpotri", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_cpotri", "illegal value of uplo");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_cpotri", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_cpotri", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (max(N, 0) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CPOSV, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cpotri", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB   = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_ciplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, N, N);
    }
    /* Call the tile interface */
    PLASMA_cpotri_Tile_Async(uplo, &descA, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_cpotrs	(	PLASMA_enum	uplo,
		int	N,
		int	NRHS,
		PLASMA_Complex32_t *	A,
		int	LDA,
		PLASMA_Complex32_t *	B,
		int	LDB
	)

PLASMA_cpotrs - Solves a system of linear equations A * X = B with a symmetric positive definite (or Hermitian positive definite in the complex case) matrix A using the Cholesky factorization A = U**H*U or A = L*L**H computed by PLASMA_cpotrf.

Parameters:

[in]	uplo	= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	N	The order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]	A	The triangular factor U or L from the Cholesky factorization A = U*HU or A = LL*H, computed by PLASMA_cpotrf.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in,out]	B	On entry, the N-by-NRHS right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_cpotrs_Tile; PLASMA_cpotrs_Tile_Async; PLASMA_cpotrs; PLASMA_dpotrs; PLASMA_spotrs; PLASMA_cpotrf

Definition at line 67 of file cpotrs.c.

References max, min, plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, PLASMA_cpotrs_Tile_Async(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLower, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cpotrs", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_cpotrs", "illegal value of uplo");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_cpotrs", "illegal value of N");
        return -2;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_cpotrs", "illegal value of NRHS");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_cpotrs", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_cpotrs", "illegal value of LDB");
        return -7;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CPOSV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cpotrs", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB    = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        plasma_cooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        plasma_ciplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
    }
    /* Call the tile interface */
    PLASMA_cpotrs_Tile_Async(uplo, &descA, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_ciptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_csymm	(	PLASMA_enum	side,
		PLASMA_enum	uplo,
		int	M,
		int	N,
		PLASMA_Complex32_t	alpha,
		PLASMA_Complex32_t *	A,
		int	LDA,
		PLASMA_Complex32_t *	B,
		int	LDB,
		PLASMA_Complex32_t	beta,
		PLASMA_Complex32_t *	C,
		int	LDC
	)

PLASMA_csymm - Performs one of the matrix-matrix operations

$C = \alpha \times A \times B + \beta \times C$

or

$C = \alpha \times B \times A + \beta \times C$

where alpha and beta are scalars, A is an symmetric matrix and B and C are m by n matrices.

Parameters:

[in]	side	Specifies whether the symmetric matrix A appears on the left or right in the operation as follows: = PlasmaLeft: $C = \alpha \times A \times B + \beta \times C$ = PlasmaRight: $C = \alpha \times B \times A + \beta \times C$
[in]	uplo	Specifies whether the upper or lower triangular part of the symmetric matrix A is to be referenced as follows: = PlasmaLower: Only the lower triangular part of the symmetric matrix A is to be referenced. = PlasmaUpper: Only the upper triangular part of the symmetric matrix A is to be referenced.
[in]	M	Specifies the number of rows of the matrix C. M >= 0.
[in]	N	Specifies the number of columns of the matrix C. N >= 0.
[in]	alpha	Specifies the scalar alpha.
[in]	A	A is a LDA-by-ka matrix, where ka is M when side = PlasmaLeft, and is N otherwise. Only the uplo triangular part is referenced.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,ka).
[in]	B	B is a LDB-by-N matrix, where the leading M-by-N part of the array B must contain the matrix B.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,M).
[in]	beta	Specifies the scalar beta.
[in,out]	C	C is a LDC-by-N matrix. On exit, the array is overwritten by the M by N updated matrix.
[in]	LDC	The leading dimension of the array C. LDC >= max(1,M).

Returns:

Return values:

PLASMA_SUCCESS successful exit

See also:: PLASMA_csymm_Tile; PLASMA_csymm; PLASMA_dsymm; PLASMA_ssymm

Definition at line 94 of file csymm.c.

References max, plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, PLASMA_csymm_Tile_Async(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CSYMM, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLeft, PlasmaLower, PlasmaRight, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int Am;
    int status;
    PLASMA_desc descA, descB, descC;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_csymm", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ( (side != PlasmaLeft) && (side != PlasmaRight) ){
        plasma_error("PLASMA_csymm", "illegal value of side");
        return -1;
    }
    if ((uplo != PlasmaLower) && (uplo != PlasmaUpper)) {
        plasma_error("PLASMA_csymm", "illegal value of uplo");
        return -2;
    }
    Am = ( side == PlasmaLeft ) ? M : N;
    if (M < 0) {
        plasma_error("PLASMA_csymm", "illegal value of M");
        return -3;
    }
    if (N < 0) {
        plasma_error("PLASMA_csymm", "illegal value of N");
        return -4;
    }
    if (LDA < max(1, Am)) {
        plasma_error("PLASMA_csymm", "illegal value of LDA");
        return -7;
    }
    if (LDB < max(1, M)) {
        plasma_error("PLASMA_csymm", "illegal value of LDB");
        return -9;
    }
    if (LDC < max(1, M)) {
        plasma_error("PLASMA_csymm", "illegal value of LDC");
        return -12;
    }
    /* Quick return */
    if (M == 0 || N == 0 ||
        ((alpha == (PLASMA_Complex32_t)0.0) && beta == (PLASMA_Complex32_t)1.0))
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CSYMM, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_csymm", "plasma_tune() failed");
        return status;
    }
    /* Set MT & NT & KT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, Am, 0, 0, Am, Am, 
                             plasma_desc_mat_free(&(descA)) );
        plasma_cooplap2tile( descB, B, NB, NB, LDB, N,  0, 0, M,  N,
                             plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
        plasma_cooplap2tile( descC, C, NB, NB, LDC, N,  0, 0, M,  N,
                             plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descC)));
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, Am, 0, 0, Am, Am );
        plasma_ciplap2tile( descB, B, NB, NB, LDB, N,  0, 0, M,  N  );
        plasma_ciplap2tile( descC, C, NB, NB, LDC, N,  0, 0, M,  N  );
    }
    /* Call the tile interface */
    PLASMA_csymm_Tile_Async(
        side, uplo, alpha, &descA, &descB, beta, &descC, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
        plasma_desc_mat_free(&descC);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, Am );
        plasma_ciptile2lap( descB, B, NB, NB, LDB, N );
        plasma_ciptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_csyr2k	(	PLASMA_enum	uplo,
		PLASMA_enum	trans,
		int	N,
		int	K,
		PLASMA_Complex32_t	alpha,
		PLASMA_Complex32_t *	A,
		int	LDA,
		PLASMA_Complex32_t *	B,
		int	LDB,
		PLASMA_Complex32_t	beta,
		PLASMA_Complex32_t *	C,
		int	LDC
	)

PLASMA_csyr2k - Performs one of the symmetric rank 2k operations

$C = \alpha [ op( A ) \times conjfg( op( B )' )] + \alpha [ op( B ) \times conjfg( op( A )' )] + \beta C$

, or

$C = \alpha [ conjfg( op( A )' ) \times op( B ) ] + \alpha [ conjfg( op( B )' ) \times op( A ) ] + \beta C$

,

where op( X ) is one of

op( X ) = X or op( X ) = conjfg( X' )

where alpha and beta are real scalars, C is an n-by-n symmetric matrix and A and B are an n-by-k matrices the first case and k-by-n matrices in the second case.

Parameters:

[in]	uplo	= PlasmaUpper: Upper triangle of C is stored; = PlasmaLower: Lower triangle of C is stored.
[in]	trans	Specifies whether the matrix A is transposed or conjfugate transposed: = PlasmaNoTrans: $C = \alpha [ op( A ) \times conjfg( op( B )' )] + \alpha [ op( B ) \times conjfg( op( A )' )] + \beta C$ = PlasmaTrans: $C = \alpha [ conjfg( op( A )' ) \times op( B ) ] + \alpha [ conjfg( op( B )' ) \times op( A ) ] + \beta C$
[in]	N	N specifies the order of the matrix C. N must be at least zero.
[in]	K	K specifies the number of columns of the A and B matrices with trans = PlasmaNoTrans. K specifies the number of rows of the A and B matrices with trans = PlasmaTrans.
[in]	alpha	alpha specifies the scalar alpha.
[in]	A	A is a LDA-by-ka matrix, where ka is K when trans = PlasmaNoTrans, and is N otherwise.
[in]	LDA	The leading dimension of the array A. LDA must be at least max( 1, N ), otherwise LDA must be at least max( 1, K ).
[in]	B	B is a LDB-by-kb matrix, where kb is K when trans = PlasmaNoTrans, and is N otherwise.
[in]	LDB	The leading dimension of the array B. LDB must be at least max( 1, N ), otherwise LDB must be at least max( 1, K ).
[in]	beta	beta specifies the scalar beta.
[in,out]	C	C is a LDC-by-N matrix. On exit, the array uplo part of the matrix is overwritten by the uplo part of the updated matrix.
[in]	LDC	The leading dimension of the array C. LDC >= max( 1, N ).

Returns:

Return values:

PLASMA_SUCCESS successful exit

See also:: PLASMA_csyr2k_Tile; PLASMA_csyr2k; PLASMA_dsyr2k; PLASMA_ssyr2k

Definition at line 96 of file csyr2k.c.

References max, plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, PLASMA_csyr2k_Tile_Async(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CSYRK, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLower, PlasmaNoTrans, PlasmaTrans, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int Am, An;
    int status;
    PLASMA_desc descA, descB, descC;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_csyr2k", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ((uplo != PlasmaUpper) && (uplo != PlasmaLower)) {
        plasma_error("PLASMA_csyr2k", "illegal value of uplo");
        return -1;
    }
    if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans)) {
        plasma_error("PLASMA_csyr2k", "illegal value of trans");
        return -2;
    }
    if ( trans == PlasmaNoTrans ) { 
        Am = N; An = K;
    } else {
        Am = K; An = N;
    }
    if (N < 0) {
        plasma_error("PLASMA_csyr2k", "illegal value of N");
        return -3;
    }
    if (K < 0) {
        plasma_error("PLASMA_csyr2k", "illegal value of K");
        return -4;
    }
    if (LDA < max(1, Am)) {
        plasma_error("PLASMA_csyr2k", "illegal value of LDA");
        return -7;
    }
    if (LDB < max(1, Am)) {
        plasma_error("PLASMA_csyr2k", "illegal value of LDB");
        return -9;
    }
    if (LDC < max(1, N)) {
        plasma_error("PLASMA_csyr2k", "illegal value of LDC");
        return -12;
    }
    /* Quick return */
    if (N == 0 ||
        ((alpha == (PLASMA_Complex32_t)0.0 || K == 0.0) && beta == (PLASMA_Complex32_t)1.0))
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CSYRK, N, K, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_csyr2k", "plasma_tune() failed");
        return status;
    }
    /* Set MT & NT & KT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)) );
        plasma_cooplap2tile( descB, B, NB, NB, LDB, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
        plasma_cooplap2tile( descC, C, NB, NB, LDC, N,  0, 0, N,  N,  plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descC)));
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An );
        plasma_ciplap2tile( descB, B, NB, NB, LDB, An, 0, 0, Am, An );
        plasma_ciplap2tile( descC, C, NB, NB, LDC, N,  0, 0, N,  N );
    }
    /* Call the tile interface */
    PLASMA_csyr2k_Tile_Async(uplo, trans, alpha, &descA, &descB, beta, &descC, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
        plasma_desc_mat_free(&descC);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, An );
        plasma_ciptile2lap( descB, B, NB, NB, LDB, An );
        plasma_ciptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_csyrk	(	PLASMA_enum	uplo,
		PLASMA_enum	trans,
		int	N,
		int	K,
		PLASMA_Complex32_t	alpha,
		PLASMA_Complex32_t *	A,
		int	LDA,
		PLASMA_Complex32_t	beta,
		PLASMA_Complex32_t *	C,
		int	LDC
	)

PLASMA_csyrk - Performs one of the hermitian rank k operations

$C = \alpha [ op( A ) \times conjfg( op( A )' )] + \beta C$

,

where op( X ) is one of

op( X ) = X or op( X ) = conjfg( X' )

where alpha and beta are real scalars, C is an n-by-n hermitian matrix and A is an n-by-k matrix in the first case and a k-by-n matrix in the second case.

Parameters:

[in]	uplo	= PlasmaUpper: Upper triangle of C is stored; = PlasmaLower: Lower triangle of C is stored.
[in]	trans	Specifies whether the matrix A is transposed or conjfugate transposed: = PlasmaNoTrans: A is not transposed; = PlasmaTrans : A is transposed.
[in]	N	N specifies the order of the matrix C. N must be at least zero.
[in]	K	K specifies the number of columns of the matrix op( A ).
[in]	alpha	alpha specifies the scalar alpha.
[in]	A	A is a LDA-by-ka matrix, where ka is K when trans = PlasmaNoTrans, and is N otherwise.
[in]	LDA	The leading dimension of the array A. LDA must be at least max( 1, N ), otherwise LDA must be at least max( 1, K ).
[in]	beta	beta specifies the scalar beta
[in,out]	C	C is a LDC-by-N matrix. On exit, the array uplo part of the matrix is overwritten by the uplo part of the updated matrix.
[in]	LDC	The leading dimension of the array C. LDC >= max( 1, N ).

Returns:

Return values:

PLASMA_SUCCESS successful exit

See also:: PLASMA_csyrk_Tile; PLASMA_csyrk; PLASMA_dsyrk; PLASMA_ssyrk

Definition at line 85 of file csyrk.c.

References max, plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, PLASMA_csyrk_Tile_Async(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CSYRK, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLower, PlasmaNoTrans, PlasmaTrans, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int Am, An;
    int status;
    PLASMA_desc descA, descC;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_csyrk", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if ((uplo != PlasmaUpper) && (uplo != PlasmaLower)) {
        plasma_error("PLASMA_csyrk", "illegal value of uplo");
        return -1;
    }
    if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans)) {
        plasma_error("PLASMA_csyrk", "illegal value of trans");
        return -2;
    }
    if ( trans == PlasmaNoTrans ) { 
        Am = N; An = K;
    } else {
        Am = K; An = N;
    }
    if (N < 0) {
        plasma_error("PLASMA_csyrk", "illegal value of N");
        return -3;
    }
    if (K < 0) {
        plasma_error("PLASMA_csyrk", "illegal value of K");
        return -4;
    }
    if (LDA < max(1, Am)) {
        plasma_error("PLASMA_csyrk", "illegal value of LDA");
        return -7;
    }
    if (LDC < max(1, N)) {
        plasma_error("PLASMA_csyrk", "illegal value of LDC");
        return -10;
    }
    /* Quick return */
    if (N == 0 ||
        ((alpha == (PLASMA_Complex32_t)0.0 || K == 0.0) && beta == (PLASMA_Complex32_t)1.0))
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_CSYRK, N, K, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_csyrk", "plasma_tune() failed");
        return status;
    }
    /* Set MT & NT & KT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)) );
        plasma_cooplap2tile( descC, C, NB, NB, LDC, N,  0, 0, N,  N,  plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descC)));
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An );
        plasma_ciplap2tile( descC, C, NB, NB, LDC, N,  0, 0, N,  N );
    }
    /* Call the tile interface */
    PLASMA_csyrk_Tile_Async(uplo, trans, alpha, &descA, beta, &descC, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descC);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, An );
        plasma_ciptile2lap( descC, C, NB, NB, LDC, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_cTile_to_Lapack	(	PLASMA_desc *	A,
		PLASMA_Complex32_t *	Af77,
		int	LDA
	)

PLASMA_Tile_to_Lapack - Conversion from tile layout to LAPACK layout.

Parameters:

[in]	A	Descriptor of the PLASMA matrix in tile layout.
[in,out]	Af77	LAPACK matrix. If PLASMA_TRANSLATION_MODE is set to PLASMA_INPLACE, Af77 has to be A->mat, else if PLASMA_TRANSLATION_MODE is set to PLASMA_OUTOFPLACE, Af77 has to be allocated before.
[in]	LDA	The leading dimension of the matrix Af77.

Returns:

Return values:

PLASMA_SUCCESS successful exit

See also:: PLASMA_cTile_to_Lapack_Async; PLASMA_cLapack_to_Tile; PLASMA_cTile_to_Lapack; PLASMA_dTile_to_Lapack; PLASMA_sTile_to_Lapack

Definition at line 191 of file ctile.c.

References A, plasma_context_self(), plasma_desc_check(), plasma_dynamic_sync, PLASMA_ERR_ILLEGAL_VALUE, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), plasma_pctile_to_lapack(), plasma_sequence_create(), plasma_sequence_destroy(), plasma_static_call_5, PLASMA_SUCCESS, and plasma_sequence_t::status.

{
    PLASMA_desc descA = *A;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request;
    int status;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cTile_to_Lapack", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check descriptor for correctness */
    if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cTile_to_Lapack", "invalid descriptor");
        return PLASMA_ERR_ILLEGAL_VALUE;
    }
    plasma_sequence_create(plasma, &sequence);
    plasma_static_call_5(
        plasma_pctile_to_lapack,
        PLASMA_desc, descA,
        PLASMA_Complex32_t*, Af77,
        int, LDA,
        PLASMA_sequence*, sequence,
        PLASMA_request*, &request);
    plasma_dynamic_sync();
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_ctrmm	(	PLASMA_enum	side,
		PLASMA_enum	uplo,
		PLASMA_enum	transA,
		PLASMA_enum	diag,
		int	N,
		int	NRHS,
		PLASMA_Complex32_t	alpha,
		PLASMA_Complex32_t *	A,
		int	LDA,
		PLASMA_Complex32_t *	B,
		int	LDB
	)

PLASMA_ctrmm - Computes B = alpha*op( A )*B or B = alpha*B*op( A ).

Parameters:

[in]	side	Specifies whether A appears on the left or on the right of X: = PlasmaLeft: AX = B = PlasmaRight: XA = B
[in]	uplo	Specifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	transA	Specifies whether the matrix A is transposed, not transposed or conjfugate transposed: = PlasmaNoTrans: A is transposed; = PlasmaTrans: A is not transposed; = PlasmaConjTrans: A is conjfugate transposed.
[in]	diag	Specifies whether or not A is unit triangular: = PlasmaNonUnit: A is non unit; = PlasmaUnit: A us unit.
[in]	N	The order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]	alpha	alpha specifies the scalar alpha.
[in]	A	The triangular matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of the array A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of the array A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If diag = PlasmaUnit, the diagonal elements of A are also not referenced and are assumed to be 1.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in,out]	B	On entry, the N-by-NRHS right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_ctrmm_Tile; PLASMA_ctrmm_Tile_Async; PLASMA_ctrmm; PLASMA_dtrmm; PLASMA_strmm

Definition at line 88 of file ctrmm.c.

References max, min, plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, PLASMA_ctrmm_Tile_Async(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaConjTrans, PlasmaLeft, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaRight, PlasmaTrans, PlasmaUnit, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB, NA;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_ctrmm", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (side != PlasmaLeft && side != PlasmaRight) {
        plasma_error("PLASMA_ctrmm", "illegal value of side");
        return -1;
    }
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_ctrmm", "illegal value of uplo");
        return -2;
    }
    if (transA != PlasmaConjTrans && transA != PlasmaNoTrans && transA != PlasmaTrans ) {
        plasma_error("PLASMA_ctrmm", "illegal value of transA");
        return -3;
    }
    if (diag != PlasmaUnit && diag != PlasmaNonUnit) {
        plasma_error("PLASMA_ctrmm", "illegal value of diag");
        return -4;
    }
    if (N < 0) {
        plasma_error("PLASMA_ctrmm", "illegal value of N");
        return -5;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_ctrmm", "illegal value of NRHS");
        return -6;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_ctrmm", "illegal value of LDA");
        return -8;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_ctrmm", "illegal value of LDB");
        return -10;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CPOSV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_ctrmm", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB = PLASMA_NB;
    if (side == PlasmaLeft) {
      NA = N;
    } else {
      NA = NRHS;
    }
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, NA,   0, 0, NA, NA,   plasma_desc_mat_free(&(descA)) );
        plasma_cooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N,  NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, NA,   0, 0, NA, NA  );
        plasma_ciplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N,  NRHS);
    }
    /* Call the tile interface */
    PLASMA_ctrmm_Tile_Async(
        side, uplo, transA, diag, alpha, &descA, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, NA   );
        plasma_ciptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_ctrsm	(	PLASMA_enum	side,
		PLASMA_enum	uplo,
		PLASMA_enum	transA,
		PLASMA_enum	diag,
		int	N,
		int	NRHS,
		PLASMA_Complex32_t	alpha,
		PLASMA_Complex32_t *	A,
		int	LDA,
		PLASMA_Complex32_t *	B,
		int	LDB
	)

PLASMA_ctrsm - Computes triangular solve A*X = B or X*A = B.

Parameters:

[in]	side	Specifies whether A appears on the left or on the right of X: = PlasmaLeft: AX = B = PlasmaRight: XA = B
[in]	uplo	Specifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	transA	Specifies whether the matrix A is transposed, not transposed or conjfugate transposed: = PlasmaNoTrans: A is transposed; = PlasmaTrans: A is not transposed; = PlasmaConjTrans: A is conjfugate transposed.
[in]	diag	Specifies whether or not A is unit triangular: = PlasmaNonUnit: A is non unit; = PlasmaUnit: A us unit.
[in]	N	The order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]	alpha	alpha specifies the scalar alpha.
[in]	A	The triangular matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of the array A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of the array A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If diag = PlasmaUnit, the diagonal elements of A are also not referenced and are assumed to be 1.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in,out]	B	On entry, the N-by-NRHS right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_ctrsm_Tile; PLASMA_ctrsm_Tile_Async; PLASMA_ctrsm; PLASMA_dtrsm; PLASMA_strsm

Definition at line 88 of file ctrsm.c.

References max, min, plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, PLASMA_ctrsm_Tile_Async(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaConjTrans, PlasmaLeft, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaRight, PlasmaTrans, PlasmaUnit, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB, NA;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_ctrsm", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (side != PlasmaLeft && side != PlasmaRight) {
        plasma_error("PLASMA_ctrsm", "illegal value of side");
        return -1;
    }
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_ctrsm", "illegal value of uplo");
        return -2;
    }
    if (transA != PlasmaConjTrans && transA != PlasmaNoTrans && transA != PlasmaTrans ) {
        plasma_error("PLASMA_ctrsm", "illegal value of transA");
        return -3;
    }
    if (diag != PlasmaUnit && diag != PlasmaNonUnit) {
        plasma_error("PLASMA_ctrsm", "illegal value of diag");
        return -4;
    }
    if (N < 0) {
        plasma_error("PLASMA_ctrsm", "illegal value of N");
        return -5;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_ctrsm", "illegal value of NRHS");
        return -6;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_ctrsm", "illegal value of LDA");
        return -8;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_ctrsm", "illegal value of LDB");
        return -10;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CPOSV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_ctrsm", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB = PLASMA_NB;
    if (side == PlasmaLeft) {
      NA = N;
    } else {
      NA = NRHS;
    }
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, NA,   0, 0, NA, NA,   plasma_desc_mat_free(&(descA)) );
        plasma_cooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N,  NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, NA,   0, 0, NA, NA  );
        plasma_ciplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N,  NRHS);
    }
    /* Call the tile interface */
    PLASMA_ctrsm_Tile_Async(
        side, uplo, transA, diag, alpha, &descA, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, NA   );
        plasma_ciptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_ctrsmpl	(	int	N,
		int	NRHS,
		PLASMA_Complex32_t *	A,
		int	LDA,
		PLASMA_Complex32_t *	L,
		int *	IPIV,
		PLASMA_Complex32_t *	B,
		int	LDB
	)

PLASMA_ctrsmpl - Performs the forward substitution step of solving a system of linear equations after the tile LU factorization of the matrix.

Parameters:

[in]	N	The order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]	A	The tile factor L from the factorization, computed by PLASMA_cgetrf_incpiv.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in]	L	Auxiliary factorization data, related to the tile L factor, computed by PLASMA_cgetrf_incpiv.
[in]	IPIV	The pivot indices from PLASMA_cgetrf_incpiv (not equivalent to LAPACK).
[in,out]	B	On entry, the N-by-NRHS right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_ctrsmpl_Tile; PLASMA_ctrsmpl_Tile_Async; PLASMA_ctrsmpl; PLASMA_dtrsmpl; PLASMA_strsmpl; PLASMA_cgetrf_incpiv

Definition at line 67 of file ctrsmpl.c.

References L, plasma_desc_t::mat, max, min, plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, PLASMA_ctrsmpl_Tile_Async(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CGESV, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaComplexFloat, and plasma_sequence_t::status.

{
    int NB, IB, IBNB, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB, descL;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_ctrsmpl", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (N < 0) {
        plasma_error("PLASMA_ctrsmpl", "illegal value of N");
        return -1;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_ctrsmpl", "illegal value of NRHS");
        return -2;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_ctrsmpl", "illegal value of LDA");
        return -4;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_ctrsmpl", "illegal value of LDB");
        return -8;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CGESV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_ctrsmpl", "plasma_tune() failed");
        return status;
    }
    /* Set Mt, NT & NTRHS */
    NB    = PLASMA_NB;
    IB    = PLASMA_IB;
    IBNB  = IB*NB;
    NT    = (N%NB==0) ? (N/NB) : (N/NB+1);
    plasma_sequence_create(plasma, &sequence);
    descL = plasma_desc_init(
        PlasmaComplexFloat,
        IB, NB, IBNB,
        NT*IB, NT*NB, 0, 0, NT*IB, NT*NB);
    descL.mat = L;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        plasma_cooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   );
        plasma_ciplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
    }
    /* Call the tile interface */
    PLASMA_ctrsmpl_Tile_Async(&descA, &descL, IPIV, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, N    );
        plasma_ciptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_ctrsmrv	(	PLASMA_enum	side,
		PLASMA_enum	uplo,
		PLASMA_enum	transA,
		PLASMA_enum	diag,
		int	N,
		int	NRHS,
		PLASMA_Complex32_t	alpha,
		PLASMA_Complex32_t *	A,
		int	LDA,
		PLASMA_Complex32_t *	B,
		int	LDB
	)

PLASMA_ctrsmrv - Computes triangular solve A*X = B or X*A = B.

Parameters:

[in]	side	Specifies whether A appears on the left or on the right of X: = PlasmaLeft: AX = B = PlasmaRight: XA = B
[in]	uplo	Specifies whether the matrix A is upper triangular or lower triangular: = PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	transA	Specifies whether the matrix A is transposed, not transposed or conjfugate transposed: = PlasmaNoTrans: A is transposed; = PlasmaTrans: A is not transposed; = PlasmaConjTrans: A is conjfugate transposed.
[in]	diag	Specifies whether or not A is unit triangular: = PlasmaNonUnit: A is non unit; = PlasmaUnit: A us unit.
[in]	N	The order of the matrix A. N >= 0.
[in]	NRHS	The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
[in]	alpha	alpha specifies the scalar alpha.
[in]	A	The triangular matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular part of the array A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N lower triangular part of the array A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If diag = PlasmaUnit, the diagonal elements of A are also not referenced and are assumed to be 1.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).
[in,out]	B	On entry, the N-by-NRHS right hand side matrix B. On exit, if return value = 0, the N-by-NRHS solution matrix X.
[in]	LDB	The leading dimension of the array B. LDB >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_ctrsmrv_Tile; PLASMA_ctrsmrv_Tile_Async; PLASMA_ctrsmrv; PLASMA_dtrsmrv; PLASMA_strsmrv

Definition at line 88 of file ctrsmrv.c.

References max, min, plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, PLASMA_ctrsmrv_Tile_Async(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaConjTrans, PlasmaLeft, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaRight, PlasmaTrans, PlasmaUnit, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB, NA;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_ctrsmrv", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (side != PlasmaLeft && side != PlasmaRight) {
        plasma_error("PLASMA_ctrsmrv", "illegal value of side");
        return -1;
    }
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_ctrsmrv", "illegal value of uplo");
        return -2;
    }
    if (transA != PlasmaConjTrans && transA != PlasmaNoTrans && transA != PlasmaTrans ) {
        plasma_error("PLASMA_ctrsmrv", "illegal value of transA");
        return -3;
    }
    if (diag != PlasmaUnit && diag != PlasmaNonUnit) {
        plasma_error("PLASMA_ctrsmrv", "illegal value of diag");
        return -4;
    }
    if (N < 0) {
        plasma_error("PLASMA_ctrsmrv", "illegal value of N");
        return -5;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_ctrsmrv", "illegal value of NRHS");
        return -6;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_ctrsmrv", "illegal value of LDA");
        return -8;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_ctrsmrv", "illegal value of LDB");
        return -10;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CPOSV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_ctrsmrv", "plasma_tune() failed");
        return status;
    }
    /* Set NT & NTRHS */
    NB = PLASMA_NB;
    if (side == PlasmaLeft) {
      NA = N;
    } else {
      NA = NRHS;
    }
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, NA,   0, 0, NA, NA,   plasma_desc_mat_free(&(descA)) );
        plasma_cooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N,  NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, NA,   0, 0, NA, NA  );
        plasma_ciplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N,  NRHS);
    }
    /* Call the tile interface */
    PLASMA_ctrsmrv_Tile_Async(
        side, uplo, transA, diag, alpha, &descA, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, NA   );
        plasma_ciptile2lap( descB, B, NB, NB, LDB, NRHS );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

int PLASMA_ctrtri	(	PLASMA_enum	uplo,
		PLASMA_enum	diag,
		int	N,
		PLASMA_Complex32_t *	A,
		int	LDA
	)

PLASMA_ctrtri - Computes the inverse of a complex upper or lower triangular matrix A.

Parameters:

[in]	uplo	= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]	diag	= PlasmaNonUnit: A is non-unit triangular; = PlasmaUnit: A is unit triangular.
[in]	N	The order of the matrix A. N >= 0.
[in,out]	A	On entry, the triangular matrix A. If UPLO = 'U', the leading N-by-N upper triangular part of the array A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of the array A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If DIAG = 'U', the diagonal elements of A are also not referenced and are assumed to be 1. On exit, the (triangular) inverse of the original matrix, in the same storage format.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,N).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value
>0	if i, A(i,i) is exactly zero. The triangular matrix is singular and its inverse can not be computed.

See also:: PLASMA_ctrtri_Tile; PLASMA_ctrtri_Tile_Async; PLASMA_ctrtri; PLASMA_dtrtri; PLASMA_strtri; PLASMA_cpotri

Definition at line 70 of file ctrtri.c.

References max, plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, PLASMA_ctrtri_Tile_Async(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FUNC_CPOSV, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaLower, PlasmaNonUnit, PlasmaUnit, PlasmaUpper, and plasma_sequence_t::status.

{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_ctrtri", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_ctrtri", "illegal value of uplo");
        return -1;
    }
    if (diag != PlasmaUnit && diag != PlasmaNonUnit) {
        plasma_error("PLASMA_ctrtri", "illegal value of diag");
        return -2;
    }
    if (N < 0) {
        plasma_error("PLASMA_ctrtri", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_ctrtri", "illegal value of LDA");
        return -5;
    }
    /* Quick return */
    if (max(N, 0) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CPOSV, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_ctrtri", "plasma_tune() failed");
        return status;
    }
    /* Set NT */
    NB   = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_ciplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, N, N);
    }
    /* Call the tile interface */
    PLASMA_ctrtri_Tile_Async(uplo, diag, &descA, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

int PLASMA_cunglq	(	int	M,
		int	N,
		int	K,
		PLASMA_Complex32_t *	A,
		int	LDA,
		PLASMA_Complex32_t *	T,
		PLASMA_Complex32_t *	Q,
		int	LDQ
	)

PLASMA_cunglq - Generates an M-by-N matrix Q with orthonormal rows, which is defined as the first M rows of a product of the elementary reflectors returned by PLASMA_cgelqf.

Parameters:

[in]	M	The number of rows of the matrix Q. M >= 0.
[in]	N	The number of columns of the matrix Q. N >= M.
[in]	K	The number of rows of elementary tile reflectors whose product defines the matrix Q. M >= K >= 0.
[in]	A	Details of the LQ factorization of the original matrix A as returned by PLASMA_cgelqf.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[in]	T	Auxiliary factorization data, computed by PLASMA_cgelqf.
[out]	Q	On exit, the M-by-N matrix Q.
[in]	LDQ	The leading dimension of the array Q. LDQ >= max(1,M).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
PLASMA_SUCCESS	<0 if -i, the i-th argument had an illegal value

See also:: PLASMA_cunglq_Tile; PLASMA_cunglq_Tile_Async; PLASMA_cunglq; PLASMA_dorglq; PLASMA_sorglq; PLASMA_cgelqf

Definition at line 68 of file cunglq.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, PLASMA_cunglq_Tile_Async(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_CGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaComplexFloat, plasma_sequence_t::status, and T.

{
    int NB, IB, IBNB, KT, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descQ, descT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cunglq", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if (M < 0) {
        plasma_error("PLASMA_cunglq", "illegal value of M");
        return -1;
    }
    if (N < M) {
        plasma_error("PLASMA_cunglq", "illegal value of N");
        return -2;
    }
    if (K < 0 || K > M) {
        plasma_error("PLASMA_cunglq", "illegal value of K");
        return -3;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_cunglq", "illegal value of LDA");
        return -5;
    }
    if (LDQ < max(1, M)) {
        plasma_error("PLASMA_cunglq", "illegal value of LDQ");
        return -8;
    }
    /* Quick return - currently NOT equivalent to LAPACK's:
     * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDQ ) */
    if (min(M, min(N, K)) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CGELS, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cunglq", "plasma_tune() failed");
        return status;
    }
    /* Set MT & NT */
    NB   = PLASMA_NB;
    IB   = PLASMA_IB;
    IBNB = IB*NB;
    NT   = (N%NB==0) ? (N/NB) : (N/NB+1);
    KT   = (K%NB==0) ? (K/NB) : (K/NB+1);
    plasma_sequence_create(plasma, &sequence);
    if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
        descT = plasma_desc_init(
            PlasmaComplexFloat,
            IB, NB, IBNB,
            KT*IB, NT*NB, 0, 0, KT*IB, NT*NB);
    }
    else {
        /* Double the size of T to accomodate the tree reduction phase */
        descT = plasma_desc_init(
            PlasmaComplexFloat,
            IB, NB, IBNB,
            KT*IB, 2*NT*NB, 0, 0, KT*IB, 2*NT*NB);
    }
    descT.mat = T;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K, N, plasma_desc_mat_free(&(descA)) );
        plasma_cooplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descQ)));
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K, N);
        plasma_ciplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, M, N);
    }
    /* Call the tile interface */
    PLASMA_cunglq_Tile_Async(&descA, &descT, &descQ, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descQ, Q, NB, NB, LDQ, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descQ);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, N );
        plasma_ciptile2lap( descQ, Q, NB, NB, LDQ, N );
        plasma_dynamic_sync();
    }
        
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_cungqr	(	int	M,
		int	N,
		int	K,
		PLASMA_Complex32_t *	A,
		int	LDA,
		PLASMA_Complex32_t *	T,
		PLASMA_Complex32_t *	Q,
		int	LDQ
	)

PLASMA_cungqr - Generates an M-by-N matrix Q with orthonormal columns, which is defined as the first N columns of a product of the elementary reflectors returned by PLASMA_cgeqrf.

Parameters:

[in]	M	The number of rows of the matrix Q. M >= 0.
[in]	N	The number of columns of the matrix Q. N >= M.
[in]	K	The number of columns of elementary tile reflectors whose product defines the matrix Q. M >= K >= 0.
[in]	A	Details of the QR factorization of the original matrix A as returned by PLASMA_cgeqrf.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,M).
[in]	T	Auxiliary factorization data, computed by PLASMA_cgeqrf.
[out]	Q	On exit, the M-by-N matrix Q.
[in]	LDQ	The leading dimension of the array Q. LDQ >= max(1,M).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_cungqr_Tile; PLASMA_cungqr_Tile_Async; PLASMA_cungqr; PLASMA_dorgqr; PLASMA_sorgqr; PLASMA_cgeqrf

Definition at line 68 of file cungqr.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, PLASMA_cungqr_Tile_Async(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_CGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaComplexFloat, plasma_sequence_t::status, and T.

{
    int NB, IB, IBNB, MT, KT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descQ, descT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cungqr", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if (M < 0) {
        plasma_error("PLASMA_cungqr", "illegal value of M");
        return -1;
    }
    if (N < 0 || N > M) {
        plasma_error("PLASMA_cungqr", "illegal value of N");
        return -2;
    }
    if (K < 0 || K > N) {
        plasma_error("PLASMA_cungqr", "illegal value of K");
        return -3;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_cungqr", "illegal value of LDA");
        return -5;
    }
    if (LDQ < max(1, M)) {
        plasma_error("PLASMA_cungqr", "illegal value of LDQ");
        return -8;
    }
    if (min(M, min(N, K)) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M & N; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CGELS, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cungqr", "plasma_tune() failed");
        return status;
    }
    /* Set MT & KT */
    NB   = PLASMA_NB;
    IB   = PLASMA_IB;
    IBNB = IB*NB;
    MT   = (M%NB==0) ? (M/NB) : (M/NB+1);
    KT   = (K%NB==0) ? (K/NB) : (K/NB+1);
    plasma_sequence_create(plasma, &sequence);
    if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
        descT = plasma_desc_init(
            PlasmaComplexFloat,
            IB, NB, IBNB,
            MT*IB, KT*NB, 0, 0, MT*IB, KT*NB);
    }
    else {
        /* Double the size of T to accomodate the tree reduction phase */
        descT = plasma_desc_init(
            PlasmaComplexFloat,
            IB, NB, IBNB,
            MT*IB, 2*KT*NB, 0, 0, MT*IB, 2*KT*NB);
    }
    descT.mat = T;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, K, plasma_desc_mat_free(&(descA)) );
        plasma_cooplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descQ)));
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, K);
        plasma_ciplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, M, N);
    }
    /* Call the tile interface */
    PLASMA_cungqr_Tile_Async(&descA, &descT, &descQ, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descQ, Q, NB, NB, LDQ, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descQ);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, K );
        plasma_ciptile2lap( descQ, Q, NB, NB, LDQ, N );
        plasma_dynamic_sync();
    }
    
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_cunmlq	(	PLASMA_enum	side,
		PLASMA_enum	trans,
		int	M,
		int	N,
		int	K,
		PLASMA_Complex32_t *	A,
		int	LDA,
		PLASMA_Complex32_t *	T,
		PLASMA_Complex32_t *	B,
		int	LDB
	)

PLASMA_cunmlq - overwrites the general M-by-N matrix C with Q*C, where Q is an orthogonal matrix (unitary in the complex case) defined as the product of elementary reflectors returned by PLASMA_cgelqf. Q is of order M.

Parameters:

[in]	side	Intended usage: = PlasmaLeft: apply Q or QH from the left; = PlasmaRight: apply Q or QH from the right. Currently only PlasmaLeft is supported.
[in]	trans	Intended usage: = PlasmaNoTrans: no transpose, apply Q; = PlasmaConjTrans: conjfugate transpose, apply Q**H. Currently only PlasmaConjTrans is supported.
[in]	M	The number of rows of the matrix C. M >= 0.
[in]	N	The number of columns of the matrix C. N >= 0.
[in]	K	The number of rows of elementary tile reflectors whose product defines the matrix Q. M >= K >= 0.
[in]	A	Details of the LQ factorization of the original matrix A as returned by PLASMA_cgelqf.
[in]	LDA	The leading dimension of the array A. LDA >= max(1,K).
[in]	T	Auxiliary factorization data, computed by PLASMA_cgelqf.
[in,out]	B	On entry, the M-by-N matrix B. On exit, B is overwritten by QB or QHB.
[in]	LDB	The leading dimension of the array C. LDC >= max(1,M).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_cunmlq_Tile; PLASMA_cunmlq_Tile_Async; PLASMA_cunmlq; PLASMA_dormlq; PLASMA_sormlq; PLASMA_cgelqf

Definition at line 83 of file cunmlq.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, PLASMA_cunmlq_Tile_Async(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_CGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaComplexFloat, PlasmaConjTrans, PlasmaLeft, PlasmaNoTrans, PlasmaRight, plasma_sequence_t::status, and T.

{
    int NB, IB, IBNB, KT, NT, An;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB, descT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cunmlq", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if (side == PlasmaLeft)
        An = M;
    else 
        An = N;
    /* Check input arguments */
    if ( (side != PlasmaLeft) && (side != PlasmaRight) ) {
        plasma_error("PLASMA_cunmlq", "illegal value of side");
        return -1;
    }
    if ( (trans != PlasmaConjTrans) && (trans != PlasmaNoTrans) ){
        plasma_error("PLASMA_cunmlq", "illegal value of trans");
        return -2;
    }
    if (M < 0) {
        plasma_error("PLASMA_cunmlq", "illegal value of M");
        return -3;
    }
    if (N < 0) {
        plasma_error("PLASMA_cunmlq", "illegal value of N");
        return -4;
    }
    if ((K < 0) || (K > An)) {
        plasma_error("PLASMA_cunmlq", "illegal value of K");
        return -5;
    }
    if (LDA < max(1, K)) {
        plasma_error("PLASMA_cunmlq", "illegal value of LDA");
        return -7;
    }
    if (LDB < max(1, M)) {
        plasma_error("PLASMA_cunmlq", "illegal value of LDB");
        return -10;
    }
    /* Quick return - currently NOT equivalent to LAPACK's:
     * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) */
    if (min(M, min(N, K)) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CGELS, M, K, N);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cunmlq", "plasma_tune() failed");
        return status;
    }
    /* Set MT, NT & NTRHS */
    NB   = PLASMA_NB;
    IB   = PLASMA_IB;
    IBNB = IB*NB;
    KT   = ( K%NB==0) ? (K /NB) : (K /NB+1);
    NT   = (An%NB==0) ? (An/NB) : (An/NB+1);
    plasma_sequence_create(plasma, &sequence);
    if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
        descT = plasma_desc_init(
            PlasmaComplexFloat,
            IB, NB, IBNB,
            KT*IB, NT*NB, 0, 0, KT*IB, NT*NB);
    }
    else {
        /* Double the size of T to accomodate the tree reduction phase */
        descT = plasma_desc_init(
            PlasmaComplexFloat,
            IB, NB, IBNB,
            KT*IB, 2*NT*NB, 0, 0, KT*IB, 2*NT*NB);
    }
    descT.mat = T;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, K, An, plasma_desc_mat_free(&(descA)) );
        plasma_cooplap2tile( descB, B, NB, NB, LDB, N,  0, 0, M, N,  plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, An, 0, 0, K, An);
        plasma_ciplap2tile( descB, B, NB, NB, LDB, N,  0, 0, M, N);
    }
    /* Call the tile interface */
    PLASMA_cunmlq_Tile_Async(
        side, trans, &descA, &descT, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descB, B, NB, NB, LDB, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, An );
        plasma_ciptile2lap( descB, B, NB, NB, LDB, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_cunmqr	(	PLASMA_enum	side,
		PLASMA_enum	trans,
		int	M,
		int	N,
		int	K,
		PLASMA_Complex32_t *	A,
		int	LDA,
		PLASMA_Complex32_t *	T,
		PLASMA_Complex32_t *	B,
		int	LDB
	)

PLASMA_cunmqr - overwrites the general M-by-N matrix C with Q*C, where Q is an orthogonal matrix (unitary in the complex case) defined as the product of elementary reflectors returned by PLASMA_cgeqrf. Q is of order M.

Parameters:

[in]	side	Intended usage: = PlasmaLeft: apply Q or QH from the left; = PlasmaRight: apply Q or QH from the right. Currently only PlasmaLeft is supported.
[in]	trans	Intended usage: = PlasmaNoTrans: no transpose, apply Q; = PlasmaConjTrans: conjfugate transpose, apply Q**H. Currently only PlasmaConjTrans is supported.
[in]	M	The number of rows of the matrix C. M >= 0.
[in]	N	The number of columns of the matrix C. N >= 0.
[in]	K	The number of columns of elementary tile reflectors whose product defines the matrix Q. If side == PlasmaLeft, M >= K >= 0. If side == PlasmaRight, N >= K >= 0.
[in]	A	Details of the QR factorization of the original matrix A as returned by PLASMA_cgeqrf.
[in]	LDA	The leading dimension of the array A. If side == PlasmaLeft, LDA >= max(1,M). If side == PlasmaRight, LDA >= max(1,N).
[in]	T	Auxiliary factorization data, computed by PLASMA_cgeqrf.
[in,out]	B	On entry, the M-by-N matrix B. On exit, B is overwritten by QB or QHB.
[in]	LDB	The leading dimension of the array C. LDC >= max(1,M).

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

See also:: PLASMA_cunmqr_Tile; PLASMA_cunmqr_Tile_Async; PLASMA_cunmqr; PLASMA_dormqr; PLASMA_sormqr; PLASMA_cgeqrf

Definition at line 85 of file cunmqr.c.

References plasma_context_struct::householder, plasma_desc_t::mat, max, min, plasma_ciplap2tile, plasma_ciptile2lap, plasma_context_self(), plasma_cooplap2tile, plasma_cooptile2lap, PLASMA_cunmqr_Tile_Async(), plasma_desc_init(), plasma_desc_mat_free(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_FLAT_HOUSEHOLDER, PLASMA_FUNC_CGELS, PLASMA_IB, PLASMA_NB, PLASMA_OUTOFPLACE, PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), PLASMA_SUCCESS, PLASMA_TRANSLATION, plasma_tune(), PlasmaComplexFloat, PlasmaConjTrans, PlasmaLeft, PlasmaNoTrans, PlasmaRight, plasma_sequence_t::status, and T.

{
    int NB, IB, IBNB, Am, MT, KT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB, descT;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cunmqr", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if ( side == PlasmaLeft ) {
        Am = M;
    } else {
        Am = N;
    }
    /* Check input arguments */
    if ((side != PlasmaLeft) && (side != PlasmaRight)) {
        plasma_error("PLASMA_cunmqr", "illegal value of side");
        return -1;
    }
    if ((trans != PlasmaConjTrans) && (trans != PlasmaNoTrans)){
        plasma_error("PLASMA_cunmqr", "illegal value of trans");
        return -2;
    }
    if (M < 0) {
        plasma_error("PLASMA_cunmqr", "illegal value of M");
        return -3;
    }
    if (N < 0) {
        plasma_error("PLASMA_cunmqr", "illegal value of N");
        return -4;
    }
    if ( (K < 0) || (K > Am) ) {
        plasma_error("PLASMA_cunmqr", "illegal value of K");
        return -5;
    }
    if ( LDA < max(1, Am) ) {
        plasma_error("PLASMA_cunmqr", "illegal value of LDA");
        return -7;
    }
    if (LDB < max(1, M)) {
        plasma_error("PLASMA_cunmqr", "illegal value of LDB");
        return -10;
    }
    /* Quick return - currently NOT equivalent to LAPACK's:
     * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) */
    if (min(M, min(N, K)) == 0)
        return PLASMA_SUCCESS;
    /* Tune NB & IB depending on M, K & N; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_CGELS, M, K, N);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cunmqr", "plasma_tune() failed");
        return status;
    }
    /* Set MT, NT & NTRHS */
    NB   = PLASMA_NB;
    IB   = PLASMA_IB;
    IBNB = IB*NB;
    MT   = (Am%NB==0) ? (Am/NB) : (Am/NB+1);
    KT   = (K%NB==0)  ? (K /NB) : (K /NB+1);
    plasma_sequence_create(plasma, &sequence);
    if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
        descT = plasma_desc_init(
            PlasmaComplexFloat,
            IB, NB, IBNB,
            MT*IB, KT*NB, 0, 0, MT*IB, KT*NB);
    }
    else {
        /* Double the size of T to accomodate the tree reduction phase */
        descT = plasma_desc_init(
            PlasmaComplexFloat,
            IB, NB, IBNB,
            MT*IB, 2*KT*NB, 0, 0, MT*IB, 2*KT*NB);
    }
    descT.mat = T;
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, K, 0, 0, Am, K, plasma_desc_mat_free(&(descA)) );
        plasma_cooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M,  N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
        plasma_ciplap2tile( descA, A, NB, NB, LDA, K, 0, 0, Am, K);
        plasma_ciplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M,  N);
    }
    /* Call the tile interface */
    PLASMA_cunmqr_Tile_Async(
        side, trans, &descA, &descT, &descB, sequence, &request);
    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooptile2lap( descB, B, NB, NB, LDB, N );
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
        plasma_desc_mat_free(&descB);
    } else {
        plasma_ciptile2lap( descA, A, NB, NB, LDA, K );
        plasma_ciptile2lap( descB, B, NB, NB, LDB, N );
        plasma_dynamic_sync();
    }
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

Functions

Detailed Description

Function Documentation

trans = PlasmaNoTrans and M >= N: find the least squares solution of an overdetermined

trans = PlasmaNoTrans and M < N: find the minimum norm solution of an underdetermined