/* ///////////////////////////// P /// L /// A /// S /// M /// A /////////////////////////////// */
/* ///                    PLASMA computational routine (version 2.0.0)                       ///
 * ///                    Release Date: July, 4th 2009                                       ///
 * ///                    PLASMA is a software package provided by Univ. of Tennessee,       ///
 * ///                    Univ. of California Berkeley and Univ. of Colorado Denver          /// */

/* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
// PLASMA_zunmlq - overwrites the general M-by-N matrix C with Q*C, where Q is an orthogonal
// matrix (unitary in the complex case) defined as the product of elementary reflectors returned
// by PLASMA_zgelqf. Q is of order M.

/* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
// side     PLASMA_enum (IN)
//          Intended usage:
//          = PlasmaLeft:  apply Q or Q**H from the left;
//          = PlasmaRight: apply Q or Q**H from the right.
//          Currently only PlasmaLeft is supported.
//
// trans    PLASMA_enum (IN)
//          Intended usage:
//          = PlasmaNoTrans:   no transpose, apply Q;
//          = PlasmaConjTrans: conjugate transpose, apply Q**H.
//          Currently only PlasmaConjTrans is supported.
//
// M        int (IN)
//          The number of rows of the matrix C. M >= 0.
//
// N        int (IN)
//          The number of columns of the matrix C. N >= 0.
//
// K        int (IN)
//          The number of rows of elementary tile reflectors whose product defines the matrix Q.
//          M >= K >= 0.
//
// A        PLASMA_Complex64_t* (IN)
//          Details of the LQ factorization of the original matrix A as returned by PLASMA_zgelqf.
//
// LDA      int (IN)
//          The leading dimension of the array A. LDA >= max(1,K).
//
// T        PLASMA_Complex64_t* (IN)
//          Auxiliary factorization data, computed by PLASMA_zgelqf.
//
// B        PLASMA_Complex64_t* (INOUT)
//          On entry, the M-by-N matrix B.
//          On exit, B is overwritten by Q*B or Q**H*B.
//
// LDB      int (IN)
//          The leading dimension of the array C. LDC >= max(1,M).

/* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
//          = 0: successful exit
//          < 0: if -i, the i-th argument had an illegal value

/* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
#include "common.h"

int PLASMA_zunmlq(PLASMA_enum side, PLASMA_enum trans, int M, int N, int K, PLASMA_Complex64_t *A,
                  int LDA, PLASMA_Complex64_t *T, PLASMA_Complex64_t *B, int LDB)
{
    int NB, MT, NT, KT;
    int status;
    PLASMA_Complex64_t *Abdl;
    PLASMA_Complex64_t *Bbdl;
    PLASMA_Complex64_t *Tbdl;
    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zunmlq", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (side != PlasmaLeft) {
        plasma_error("PLASMA_zunmlq", "only PlasmaLeft supported");
        return PLASMA_ERR_NOT_SUPPORTED;
    }
    if (trans != PlasmaConjTrans) {
        plasma_error("PLASMA_zunmlq", "only PlasmaConjTrans supported");
        return PLASMA_ERR_NOT_SUPPORTED;
    }
    if (M < 0) {
        plasma_error("PLASMA_zunmlq", "illegal value of M");
        return -3;
    }
    if (N < 0) {
        plasma_error("PLASMA_zunmlq", "illegal value of N");
        return -4;
    }
    if (K < 0) {
        plasma_error("PLASMA_zunmlq", "illegal value of K");
        return -5;
    }
    if (LDA < max(1, K)) {
        plasma_error("PLASMA_zunmlq", "illegal value of LDA");
        return -7;
    }
    if (LDB < max(1, M)) {
        plasma_error("PLASMA_zunmlq", "illegal value of LDB");
        return -10;
    }
    /* Quick return - currently NOT equivalent to LAPACK's:
     * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) */
    if (min(M, min(N, K)) == 0)
        return PLASMA_SUCCESS;

    /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_ZGELS, M, K, N);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zunmlq", "plasma_tune() failed");
        return status;
    }

    /* Set MT, NT & NTRHS */
    NB = PLASMA_NB;
    MT = (M%NB==0) ? (M/NB) : (M/NB+1);
    NT = (N%NB==0) ? (N/NB) : (N/NB+1);
    KT = (K%NB==0) ? (K/NB) : (K/NB+1);

    /* Allocate memory for matrices in block layout */
    Abdl = (PLASMA_Complex64_t *)plasma_shared_alloc(plasma, MT*KT*PLASMA_NBNBSIZE, PlasmaComplexDouble);
    Tbdl = (PLASMA_Complex64_t *)plasma_shared_alloc(plasma, MT*KT*PLASMA_IBNBSIZE, PlasmaComplexDouble);
    Bbdl = (PLASMA_Complex64_t *)plasma_shared_alloc(plasma, MT*NT*PLASMA_NBNBSIZE, PlasmaComplexDouble);
    if (Abdl == NULL || Tbdl == NULL || Bbdl == NULL) {
        plasma_error("PLASMA_zunmlq", "plasma_shared_alloc() failed");
        plasma_shared_free(plasma, Abdl);
        plasma_shared_free(plasma, Tbdl);
        plasma_shared_free(plasma, Bbdl);
        return PLASMA_ERR_OUT_OF_RESOURCES;
    }

    PLASMA_desc descA = plasma_desc_init(
        Abdl, PlasmaComplexDouble,
        PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
        K, M, 0, 0, K, M);

    PLASMA_desc descB = plasma_desc_init(
        Bbdl, PlasmaComplexDouble,
        PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
        M, N, 0, 0, M, N);

    PLASMA_desc descT = plasma_desc_init(
        Tbdl, PlasmaComplexDouble,
        PLASMA_IB, PLASMA_NB, PLASMA_IBNBSIZE,
        K, M, 0, 0, K, M);

    plasma_parallel_call_3(plasma_lapack_to_tile,
        PLASMA_Complex64_t*, A,
        int, LDA,
        PLASMA_desc, descA);

    plasma_parallel_call_3(plasma_lapack_to_tile,
        PLASMA_Complex64_t*, B,
        int, LDB,
        PLASMA_desc, descB);

    /* Accept T from the user */
    plasma_memcpy(Tbdl, T, MT*KT*PLASMA_IBNBSIZE, PlasmaComplexDouble);

    plasma_parallel_call_3(plasma_pzunmlq,
        PLASMA_desc, descA,
        PLASMA_desc, descB,
        PLASMA_desc, descT);

    plasma_parallel_call_3(plasma_tile_to_lapack,
        PLASMA_desc, descB,
        PLASMA_Complex64_t*, B,
        int, LDB);

    plasma_shared_free(plasma, Abdl);
    plasma_shared_free(plasma, Tbdl);
    plasma_shared_free(plasma, Bbdl);
    return PLASMA_SUCCESS;
}
