/* ///////////////////////////// P /// L /// A /// S /// M /// A /////////////////////////////// */
/* ///                    PLASMA computational routines (version 2.1.0)                      ///
 * ///                    Author: Jakub Kurzak                                               ///
 * ///                    Release Date: November, 15th 2009                                  ///
 * ///                    PLASMA is a software package provided by Univ. of Tennessee,       ///
 * ///                    Univ. of California Berkeley and Univ. of Colorado Denver          /// */
/* ///////////////////////////////////////////////////////////////////////////////////////////// */
#include "common.h"

/* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
// PLASMA_cgetrs - Solves a system of linear equations A * X = B, with a general N-by-N matrix A
// using the tile LU factorization computed by PLASMA_cgetrf.

/* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
// trans    PLASMA_enum (IN)
//          Intended to specify the the form of the system of equations:
//          = PlasmaNoTrans:   A * X = B     (No transpose)
//          = PlasmaTrans:     A**T * X = B  (Transpose)
//          = PlasmaConjTrans: A**H * X = B  (Conjugate transpose)
//          Currently only PlasmaNoTrans is supported.
//
// N        int (IN)
//          The order of the matrix A.  N >= 0.
//
// NRHS     int (IN)
//          The number of right hand sides, i.e., the number of columns of the matrix B.
//          NRHS >= 0.
//
// A        PLASMA_Complex32_t* (IN)
//          The tile factors L and U from the factorization, computed by PLASMA_cgetrf.
//
// LDA      int (IN)
//          The leading dimension of the array A. LDA >= max(1,N).
//
// L        PLASMA_Complex32_t* (IN)
//          Auxiliary factorization data, related to the tile L factor, computed by PLASMA_cgetrf.
//
// IPIV     int* (IN)
//          The pivot indices from PLASMA_cgetrf (not equivalent to LAPACK).
//
// B        PLASMA_Complex32_t* (INOUT)
//          On entry, the N-by-NRHS matrix of right hand side matrix B.
//          On exit, the solution matrix X.
//
// LDB      int (IN)
//          The leading dimension of the array B. LDB >= max(1,N).

/* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
//          = 0: successful exit
//          < 0: if -i, the i-th argument had an illegal value

/* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
int PLASMA_cgetrs(PLASMA_enum trans, int N, int NRHS, PLASMA_Complex32_t *A, int LDA,
                  PLASMA_Complex32_t *L, int *IPIV, PLASMA_Complex32_t *B, int LDB)
{
    int NB, NT, NTRHS;
    int status;
    PLASMA_Complex32_t *Abdl;
    PLASMA_Complex32_t *Bbdl;
    PLASMA_Complex32_t *Lbdl;
    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cgetrs", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (trans != PlasmaNoTrans) {
        plasma_error("PLASMA_cgetrs", "only PlasmaNoTrans supported");
        return PLASMA_ERR_NOT_SUPPORTED;
    }
    if (N < 0) {
        plasma_error("PLASMA_cgetrs", "illegal value of N");
        return -2;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_cgetrs", "illegal value of NRHS");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_cgetrs", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_cgetrs", "illegal value of LDB");
        return -9;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;

    /* Tune NB & IB depending on N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_CGESV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cgetrs", "plasma_tune() failed");
        return status;
    }

    /* Set NT & NTRHS */
    NB = PLASMA_NB;
    NT = (N%NB==0) ? (N/NB) : (N/NB+1);
    NTRHS = (NRHS%NB==0) ? (NRHS/NB) : (NRHS/NB+1);

    /* Allocate memory for matrices in block layout */
    Abdl = (PLASMA_Complex32_t *)plasma_shared_alloc(plasma, NT*NT*PLASMA_NBNBSIZE, PlasmaComplexFloat);
    Lbdl = (PLASMA_Complex32_t *)plasma_shared_alloc(plasma, NT*NT*PLASMA_IBNBSIZE, PlasmaComplexFloat);
    Bbdl = (PLASMA_Complex32_t *)plasma_shared_alloc(plasma, NT*NTRHS*PLASMA_NBNBSIZE, PlasmaComplexFloat);
    if (Abdl == NULL || Lbdl == NULL || Bbdl == NULL) {
        plasma_error("PLASMA_cgetrs", "plasma_shared_alloc() failed");
        plasma_shared_free(plasma, Abdl);
        plasma_shared_free(plasma, Lbdl);
        plasma_shared_free(plasma, Bbdl);
        return PLASMA_ERR_OUT_OF_RESOURCES;
    }

    PLASMA_desc descA = plasma_desc_init(
        Abdl, PlasmaComplexFloat,
        PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
        N, N, 0, 0, N, N);

    PLASMA_desc descB = plasma_desc_init(
        Bbdl, PlasmaComplexFloat,
        PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
        N, NRHS, 0, 0, N, NRHS);

    PLASMA_desc descL = plasma_desc_init(
        Lbdl, PlasmaComplexFloat,
        PLASMA_IB, PLASMA_NB, PLASMA_IBNBSIZE,
        N, N, 0, 0, N, N);

    plasma_parallel_call_3(plasma_lapack_to_tile,
        PLASMA_Complex32_t*, A,
        int, LDA,
        PLASMA_desc, descA);

    plasma_parallel_call_3(plasma_lapack_to_tile,
        PLASMA_Complex32_t*, B,
        int, LDB,
        PLASMA_desc, descB);

    /* Receive L from the user */
    plasma_memcpy(Lbdl, L, NT*NT*PLASMA_IBNBSIZE, PlasmaComplexFloat);

    /* Call the native interface */
    status = PLASMA_cgetrs_Tile(&descA, &descL, IPIV, &descB);

    if (status == PLASMA_SUCCESS)
        plasma_parallel_call_3(plasma_tile_to_lapack,
            PLASMA_desc, descB,
            PLASMA_Complex32_t*, B,
            int, LDB);

    plasma_shared_free(plasma, Abdl);
    plasma_shared_free(plasma, Lbdl);
    plasma_shared_free(plasma, Bbdl);
    return status;
}

/* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
// PLASMA_cgetrs_Tile - Solves a system of linear equations A * X = B, with a general N-by-N
// matrix A using the tile LU factorization computed by PLASMA_cgetrf.
// All matrices are passed through descriptors. All dimensions are taken from the descriptors.

/* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
// A        PLASMA_Complex32_t* (IN)
//          The tile factors L and U from the factorization, computed by PLASMA_cgetrf.
//
// L        PLASMA_Complex32_t* (IN)
//          Auxiliary factorization data, related to the tile L factor, computed by PLASMA_cgetrf.
//
// IPIV     int* (IN)
//          The pivot indices from PLASMA_cgetrf (not equivalent to LAPACK).
//
// B        PLASMA_Complex32_t* (INOUT)
//          On entry, the N-by-NRHS matrix of right hand side matrix B.
//          On exit, the solution matrix X.

/* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
//          = 0: successful exit

/* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
int PLASMA_cgetrs_Tile(PLASMA_desc *A, PLASMA_desc *L, int *IPIV, PLASMA_desc *B)
{
    PLASMA_desc descA = *A;
    PLASMA_desc descL = *L;
    PLASMA_desc descB = *B;
    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cgetrs_Tile", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check descriptors for correctness */
    if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cgetrs_Tile", "invalid first descriptor");
        return PLASMA_ERR_ILLEGAL_VALUE;
    }
    if (plasma_desc_check(&descL) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cgetrs_Tile", "invalid second descriptor");
        return PLASMA_ERR_ILLEGAL_VALUE;
    }
    if (plasma_desc_check(&descB) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cgetrs_Tile", "invalid third descriptor");
        return PLASMA_ERR_ILLEGAL_VALUE;
    }
    /* Check input arguments */
    if (descA.nb != descA.mb || descB.nb != descB.mb) {
        plasma_error("PLASMA_cgetrs_Tile", "only square tiles supported");
        return PLASMA_ERR_ILLEGAL_VALUE;
    }
    /* Quick return */
/*
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;
*/
    plasma_parallel_call_4(plasma_pctrsmpl,
        PLASMA_desc, descA,
        PLASMA_desc, descB,
        PLASMA_desc, descL,
        int*, IPIV);

    plasma_parallel_call_7(plasma_pctrsm,
        PLASMA_enum, PlasmaLeft,
        PLASMA_enum, PlasmaUpper,
        PLASMA_enum, PlasmaNoTrans,
        PLASMA_enum, PlasmaNonUnit,
        PLASMA_Complex32_t, 1.0,
        PLASMA_desc, descA,
        PLASMA_desc, descB);

    return PLASMA_SUCCESS;
}
