/* ///////////////////////////// P /// L /// A /// S /// M /// A /////////////////////////////// */
/* ///                    PLASMA computational routine (version 2.0.0)                       ///
 * ///                    Release Date: July, 4th 2009                                       ///
 * ///                    PLASMA is a software package provided by Univ. of Tennessee,       ///
 * ///                    Univ. of California Berkeley and Univ. of Colorado Denver          /// */

/* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
// PLASMA_zposv - Computes the solution to a system of linear equations A * X = B,
// where A is an N-by-N symmetric positive definite (or Hermitian positive definite
// in the complex case) matrix and X and B are N-by-NRHS matrices.
// The Cholesky decomposition is used to factor A as
//
//   A = U**H * U, if uplo = PlasmaUpper, or
//   A = L * L**H, if uplo =  PlasmaLower,
//
// where U is an upper triangular matrix and  L is a lower triangular matrix.
// The factored form of A is then used to solve the system of equations A * X = B.

/* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
// uplo     PLASMA_enum (IN)
//          Specifies whether the matrix A is upper triangular or lower triangular:
//          = PlasmaUpper: Upper triangle of A is stored;
//          = PlasmaLower: Lower triangle of A is stored.
//
// N        int (IN)
//          The number of linear equations, i.e., the order of the matrix A. N >= 0.
//
// NRHS     int (IN)
//          The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
//
// A        PLASMA_Complex64_t* (INOUT)
//          On entry, the symmetric positive definite (or Hermitian) matrix A.
//          If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A
//          contains the upper triangular part of the matrix A, and the strictly lower triangular
//          part of A is not referenced.
//          If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower
//          triangular part of the matrix A, and the strictly upper triangular part of A is not
//          referenced.
//          On exit, if return value = 0, the factor U or L from the Cholesky factorization
//          A = U**H*U or A = L*L**H.
//
// LDA      int (IN)
//          The leading dimension of the array A. LDA >= max(1,N).
//
// B        PLASMA_Complex64_t* (INOUT)
//          On entry, the N-by-NRHS right hand side matrix B.
//          On exit, if return value = 0, the N-by-NRHS solution matrix X.
//
// LDB      int (IN)
//          The leading dimension of the array B. LDB >= max(1,N).

/* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
//          = 0: successful exit
//          < 0: if -i, the i-th argument had an illegal value
//          > 0: if i, the leading minor of order i of A is not positive definite, so the
//               factorization could not be completed, and the solution has not been computed.

/* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
#include "common.h"

int PLASMA_zposv(PLASMA_enum uplo, int N, int NRHS, PLASMA_Complex64_t *A, int LDA,
                 PLASMA_Complex64_t *B, int LDB)
{
    int NB, NT, NTRHS;
    int status;
    PLASMA_Complex64_t *Abdl;
    PLASMA_Complex64_t *Bbdl;
    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zposv", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_zposv", "illegal value of uplo");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_zposv", "illegal value of N");
        return -2;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_zposv", "illegal value of NRHS");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_zposv", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_zposv", "illegal value of LDB");
        return -7;
    }
    /* Quick return - currently NOT equivalent to LAPACK's
     * LAPACK does not have such check for DPOSV */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;

    /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_ZPOSV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zposv", "plasma_tune() failed");
        return status;
    }

    /* Set NT & NTRHS */
    NB = PLASMA_NB;
    NT = (N%NB==0) ? (N/NB) : (N/NB+1);
    NTRHS = (NRHS%NB==0) ? (NRHS/NB) : (NRHS/NB+1);

    /* Allocate memory for matrices in block layout */
    Abdl = (PLASMA_Complex64_t *)plasma_shared_alloc(plasma, NT*NT*PLASMA_NBNBSIZE, PlasmaComplexDouble);
    Bbdl = (PLASMA_Complex64_t *)plasma_shared_alloc(plasma, NT*NTRHS*PLASMA_NBNBSIZE, PlasmaComplexDouble);
    if (Abdl == NULL || Bbdl == NULL) {
        plasma_error("PLASMA_zposv", "plasma_shared_alloc() failed");
        plasma_shared_free(plasma, Abdl);
        plasma_shared_free(plasma, Bbdl);
        return PLASMA_ERR_OUT_OF_RESOURCES;
    }

    PLASMA_desc descA = plasma_desc_init(
        Abdl, PlasmaComplexDouble,
        PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
        N, N, 0, 0, N, N);

    PLASMA_desc descB = plasma_desc_init(
        Bbdl, PlasmaComplexDouble,
        PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
        N, NRHS, 0, 0, N, NRHS);

    plasma_parallel_call_3(plasma_lapack_to_tile,
        PLASMA_Complex64_t*, A,
        int, LDA,
        PLASMA_desc, descA);

    plasma_parallel_call_3(plasma_lapack_to_tile,
        PLASMA_Complex64_t*, B,
        int, LDB,
        PLASMA_desc, descB);

    plasma_parallel_call_2(plasma_pzpotrf,
        PLASMA_enum, uplo,
        PLASMA_desc, descA);

    if (PLASMA_INFO == PLASMA_SUCCESS)
    {
        plasma_parallel_call_7(plasma_pztrsm,
            PLASMA_enum, PlasmaLeft,
            PLASMA_enum, uplo,
            PLASMA_enum, uplo == PlasmaUpper ? PlasmaConjTrans : PlasmaNoTrans,
            PLASMA_enum, PlasmaNonUnit,
            PLASMA_Complex64_t, 1.0,
            PLASMA_desc, descA,
            PLASMA_desc, descB);

        plasma_parallel_call_7(plasma_pztrsm,
            PLASMA_enum, PlasmaLeft,
            PLASMA_enum, uplo,
            PLASMA_enum, uplo == PlasmaUpper ? PlasmaNoTrans : PlasmaConjTrans,
            PLASMA_enum, PlasmaNonUnit,
            PLASMA_Complex64_t, 1.0,
            PLASMA_desc, descA,
            PLASMA_desc, descB);

        plasma_parallel_call_3(plasma_tile_to_lapack,
            PLASMA_desc, descA,
            PLASMA_Complex64_t*, A,
            int, LDA);

        plasma_parallel_call_3(plasma_tile_to_lapack,
            PLASMA_desc, descB,
            PLASMA_Complex64_t*, B,
            int, LDB);
    }
    plasma_shared_free(plasma, Abdl);
    plasma_shared_free(plasma, Bbdl);
    return PLASMA_INFO;
}
