/* ///////////////////////////// P /// L /// A /// S /// M /// A /////////////////////////////// */
/* ///                    PLASMA computational routine (version 2.0.0)                       ///
 * ///                    Release Date: July, 4th 2009                                       ///
 * ///                    PLASMA is a software package provided by Univ. of Tennessee,       ///
 * ///                    Univ. of California Berkeley and Univ. of Colorado Denver          /// */

/* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
// PLASMA_zgels_Tile - solves overdetermined or underdetermined linear systems involving an M-by-N
// matrix A using the QR or the LQ factorization of A.  It is assumed that A has full rank.
// The following options are provided:
//
// 1. trans = PlasmaNoTrans and M >= N: find the least squares solution of an overdetermined
//    system, i.e., solve the least squares problem: minimize || B - A*X ||.
//
// 2. trans = PlasmaNoTrans and M < N:  find the minimum norm solution of an underdetermined
//    system A * X = B.
//
// Several right hand side vectors B and solution vectors X can be handled in a single call;
// they are stored as the columns of the M-by-NRHS right hand side matrix B and the N-by-NRHS
// solution
// matrix X.
// All matrices are passed through descriptors. All dimensions are taken from the descriptors.

/* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
// trans    PLASMA_enum (IN)
//          Intended usage:
//          = PlasmaNoTrans:   the linear system involves A;
//          = PlasmaConjTrans: the linear system involves A**H.
//          Currently only PlasmaNoTrans is supported.
//
// A        PLASMA_Complex64_t* (INOUT)
//          On entry, the M-by-N matrix A.
//          On exit,
//          if M >= N, A is overwritten by details of its QR factorization as returned by
//                     PLASMA_zgeqrf;
//          if M < N, A is overwritten by details of its LQ factorization as returned by
//                      PLASMA_zgelqf.
//
// T        PLASMA_Complex64_t* (OUT)
//          On exit, auxiliary factorization data.
//
// B        PLASMA_Complex64_t* (INOUT)
//          On entry, the M-by-NRHS matrix B of right hand side vectors, stored columnwise;
//          On exit, if return value = 0, B is overwritten by the solution vectors, stored
//          columnwise:
//          if M >= N, rows 1 to N of B contain the least squares solution vectors; the residual
//          sum of squares for the solution in each column is given by the sum of squares of the
//          modulus of elements N+1 to M in that column;
//          if M < N, rows 1 to N of B contain the minimum norm solution vectors;

/* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
//          = 0: successful exit

/* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
#include "common.h"

int PLASMA_zgels_Tile(PLASMA_enum trans, PLASMA_desc *A, PLASMA_desc *T, PLASMA_desc *B)
{
    PLASMA_desc descA = *A;
    PLASMA_desc descT = *T;
    PLASMA_desc descB = *B;
    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zgels_Tile", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check descriptors for correctness */
    if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zgels_Tile", "invalid first descriptor");
        return PLASMA_ERR_ILLEGAL_VALUE;
    }
    if (plasma_desc_check(&descT) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zgels_Tile", "invalid second descriptor");
        return PLASMA_ERR_ILLEGAL_VALUE;
    }
    if (plasma_desc_check(&descB) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zgels_Tile", "invalid third descriptor");
        return PLASMA_ERR_ILLEGAL_VALUE;
    }
    /* Check input arguments */
    if (trans != PlasmaNoTrans) {
        plasma_error("PLASMA_zgels_Tile", "only PlasmaNoTrans supported");
        return PLASMA_ERR_NOT_SUPPORTED;
    }
    /* Quick return */
/*
    if (min(M, min(N, NRHS)) == 0) {
        for (i = 0; i < max(M, N); i++)
            for (j = 0; j < NRHS; j++)
                B[j*LDB+i] = 0.0;
        return PLASMA_SUCCESS;
    }
*/
    if (descA.m >= descA.n)
    {
        plasma_parallel_call_2(plasma_pzgeqrf,
            PLASMA_desc, descA,
            PLASMA_desc, descT);

        plasma_parallel_call_3(plasma_pzunmqr,
            PLASMA_desc, descA,
            PLASMA_desc, descB,
            PLASMA_desc, descT);

        plasma_parallel_call_7(plasma_pztrsm,
            PLASMA_enum, PlasmaLeft,
            PLASMA_enum, PlasmaUpper,
            PLASMA_enum, PlasmaNoTrans,
            PLASMA_enum, PlasmaNonUnit,
            PLASMA_Complex64_t, 1.0,
            PLASMA_desc, plasma_desc_submatrix(descA, 0, 0, descA.n, descA.n),
            PLASMA_desc, plasma_desc_submatrix(descB, 0, 0, descA.n, descB.n));
    }
    else
    {
/*
        for (i = M; i < N; i++)
            for (j = 0; j < NRHS; j++)
                B[j*LDB+i] = 0.0;
*/
        plasma_parallel_call_1(plasma_tile_zero,
            PLASMA_desc, plasma_desc_submatrix(descB, descA.lm, 0, descA.ln-descA.lm, descB.ln));

        plasma_parallel_call_2(plasma_pzgelqf,
            PLASMA_desc, descA,
            PLASMA_desc, descT);

        plasma_parallel_call_7(plasma_pztrsm,
            PLASMA_enum, PlasmaLeft,
            PLASMA_enum, PlasmaLower,
            PLASMA_enum, PlasmaNoTrans,
            PLASMA_enum, PlasmaNonUnit,
            PLASMA_Complex64_t, 1.0,
            PLASMA_desc, plasma_desc_submatrix(descA, 0, 0, descA.m, descA.m),
            PLASMA_desc, plasma_desc_submatrix(descB, 0, 0, descA.m, descB.n));

        plasma_parallel_call_3(plasma_pzunmlq,
            PLASMA_desc, descA,
            PLASMA_desc, descB,
            PLASMA_desc, descT);
    }
    return PLASMA_SUCCESS;
}
