/*////////////////////////////////////////////////////////////////////////////////////////
 *  -- PLASMA --
 *     University of Tennessee
 */
#include "common.h"
#include "auxiliary.h"
#include "allocate.h"
#include "bdl_convert.h"
#include "barrier.h"

#include <string.h>

/*////////////////////////////////////////////////////////////////////////////////////////
 *  Computes LU factorization
 */
int plasma_DGETRF(int M, int N, double *A, int LDA, double *L, int *IPIV)
{
    int NB, MT, NT;
    int status;
    double *Abdl;
    double *Lbdl;
    double *bdl_mem;
    PLASMA_long size_elems;

    /* Check if initialized */
    if (!plasma_cntrl.initialized) {
        plasma_warning("plasma_DGETRF", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }

    /* Check input arguments */
    if (M < 0) {
        plasma_error("plasma_DGETRF", "illegal value of M");
        return PLASMA_ERR_ILLEGAL_VALUE;
    }
    if (N < 0) {
        plasma_error("plasma_DGETRF", "illegal value of N");
        return PLASMA_ERR_ILLEGAL_VALUE;
    }
    if (LDA < max(1, M)) {
        plasma_error("plasma_DGETRF", "illegal value of LDA");
        return PLASMA_ERR_ILLEGAL_VALUE;
    }
    /* Quick return */
    if (min(M, N) == 0)
        return PLASMA_SUCCESS;

    /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_TUNE_DGESV, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("plasma_DGETRF", "plasma_tune() failed");
        return status;
    }

    /* Set NT & NTRHS */
    NB = plasma_cntrl.NB;
    MT = (M%NB==0) ? (M/NB) : (M/NB+1);
    NT = (N%NB==0) ? (N/NB) : (N/NB+1);

    /* If progress table too small, reallocate */
    size_elems = MT*NT;
    if (plasma_cntrl.progress_size_elems < size_elems) {
        status = plasma_free_aux_progress();
        if (status != PLASMA_SUCCESS) {
            plasma_error("plasma_DGETRF", "plasma_free_aux_progress() failed");
        }
        status = plasma_alloc_aux_progress(size_elems);
        if (status != PLASMA_SUCCESS) {
            plasma_error("plasma_DGETRF", "plasma_alloc_aux_progress() failed");
            return status;
        }
    }

    /* Assign arrays to BDL storage */
    bdl_mem = plasma_aux.bdl_mem;
    Abdl = bdl_mem; bdl_mem += MT*NT*plasma_cntrl.NBNBSIZE;
    Lbdl = bdl_mem; bdl_mem += MT*NT*plasma_cntrl.IBNBSIZE;
    /* If BDL storage too small, reallocate & reassign */
    size_elems = bdl_mem - plasma_aux.bdl_mem;
    if (plasma_cntrl.bdl_size_elems < size_elems) {
        status = plasma_free_aux_bdl();
        if (status != PLASMA_SUCCESS) {
            plasma_error("plasma_DGETRF", "plasma_free_aux_bdl() failed");
            return status;
        }
        status = plasma_alloc_aux_bdl(size_elems, PLASMA_TRUE);
        if (status != PLASMA_SUCCESS) {
            plasma_error("plasma_DGETRF", "plasma_alloc_aux_bdl() failed");
            return status;
        }
        bdl_mem = plasma_aux.bdl_mem;
        Abdl = bdl_mem; bdl_mem += MT*NT*plasma_cntrl.NBNBSIZE;
        Lbdl = bdl_mem; bdl_mem += MT*NT*plasma_cntrl.IBNBSIZE;
    }

    /* Convert A from LAPACK to BDL */
    /* Set arguments */
    plasma_args.F77 = A;
    plasma_args.A = Abdl;
    plasma_args.M = M;
    plasma_args.N = N;
    plasma_args.LDA = LDA;
    plasma_args.NB = plasma_cntrl.NB;
    plasma_args.MT = MT;
    plasma_args.NT = NT;
    plasma_args.NBNBSIZE = plasma_cntrl.NBNBSIZE;
    /* Signal workers */
    pthread_mutex_lock(&plasma_cntrl.action_mutex);
    plasma_cntrl.action = PLASMA_ACT_F77_TO_BDL;
    pthread_mutex_unlock(&plasma_cntrl.action_mutex);
    pthread_cond_broadcast(&plasma_cntrl.action_condt);
    /* Call for master */
    plasma_barrier(0, plasma_cntrl.cores_num);
    plasma_cntrl.action = PLASMA_ACT_STAND_BY;
    plasma_lapack_to_bdl(plasma_args.F77, plasma_args.A, plasma_args.M, plasma_args.N,
                         plasma_args.LDA, plasma_args.NB, plasma_args.MT, plasma_args.NT,
                         plasma_args.NBNBSIZE, plasma_cntrl.cores_num, 0);
    plasma_barrier(0, plasma_cntrl.cores_num);

    /* Clear IPIV and Lbdl */
    memset(IPIV, 0, MT*NT*plasma_cntrl.NB*sizeof(int));
    memset(Lbdl, 0, MT*NT*plasma_cntrl.IBNBSIZE*sizeof(double));

    /* Use LU factorization */
    /* Call parallel DGETRF */
    /* Set arguments */
    plasma_args.M = M;
    plasma_args.N = N;
    plasma_args.A = Abdl;
    plasma_args.NB = plasma_cntrl.NB;
    plasma_args.NBNBSIZE = plasma_cntrl.NBNBSIZE;
    plasma_args.IBNBSIZE = plasma_cntrl.IBNBSIZE;
    plasma_args.IB = plasma_cntrl.IB;
    plasma_args.MT = MT;
    plasma_args.NT = NT;
    plasma_args.L = Lbdl;
    plasma_args.IPIV = IPIV;
    /* Clear progress table */
    plasma_clear_aux_progress(MT*NT, -1);
    /* Signal workers */
    pthread_mutex_lock(&plasma_cntrl.action_mutex);
    plasma_cntrl.action = PLASMA_ACT_DGETRF;
    pthread_mutex_unlock(&plasma_cntrl.action_mutex);
    pthread_cond_broadcast(&plasma_cntrl.action_condt);
    /* Call for master */
    plasma_barrier(0, plasma_cntrl.cores_num);
    plasma_cntrl.action = PLASMA_ACT_STAND_BY;
    plasma_pDGETRF(plasma_args.M, plasma_args.N, plasma_args.A, plasma_args.NB,
                   plasma_args.NBNBSIZE, plasma_args.IBNBSIZE, plasma_args.IB,
                   plasma_args.MT, plasma_args.NT, plasma_args.L, plasma_args.IPIV,
                   &plasma_args.INFO, plasma_cntrl.cores_num, 0);
    plasma_barrier(0, plasma_cntrl.cores_num);

    /* Return L to the user */
    memcpy(L, Lbdl, MT*NT*plasma_args.IBNBSIZE*sizeof(double));

    /* Convert A from BDL to LAPACK */
    /* Set arguments */
    plasma_args.A = Abdl;
    plasma_args.F77 = A;
    plasma_args.M = M;
    plasma_args.N = N;
    plasma_args.LDA = LDA;
    plasma_args.NB = plasma_cntrl.NB;
    plasma_args.MT = MT;
    plasma_args.NT = NT;
    plasma_args.NBNBSIZE = plasma_cntrl.NBNBSIZE;
    /* Signal workers */
    pthread_mutex_lock(&plasma_cntrl.action_mutex);
    plasma_cntrl.action = PLASMA_ACT_BDL_TO_F77;
    pthread_mutex_unlock(&plasma_cntrl.action_mutex);
    pthread_cond_broadcast(&plasma_cntrl.action_condt);
    /* Call for master */
    plasma_barrier(0, plasma_cntrl.cores_num);
    plasma_cntrl.action = PLASMA_ACT_STAND_BY;
    plasma_bdl_to_lapack(plasma_args.A, plasma_args.F77, plasma_args.M, plasma_args.N,
                         plasma_args.LDA, plasma_args.NB, plasma_args.MT, plasma_args.NT,
                         plasma_args.NBNBSIZE, plasma_cntrl.cores_num, 0);
    plasma_barrier(0, plasma_cntrl.cores_num);

    return plasma_args.INFO;
}
