/*////////////////////////////////////////////////////////////////////////////////////////
 *  -- PLASMA --
 *     University of Tennessee
 */
#include "common.h"
#include "core_blas.h"

/*////////////////////////////////////////////////////////////////////////////////////////
 *  Parallel tile LQ factorization
 */
#define A(m,n) &A[NBNBSIZE*(m)+NBNBSIZE*MT*(n)]
#define T(m,n) &T[IBNBSIZE*(m)+IBNBSIZE*MT*(n)]
#define progress(m,n) plasma_aux.progress[(m)+MT*(n)]

void plasma_pDGELQF(int M, int N, double *A, int NB, int NBNBSIZE, int IBNBSIZE,
                    int IB, int MT, int NT, double *T, int *INFO, int cores_num,
                    int my_core_id)
{
    double *WORK = plasma_aux.WORK[my_core_id];
    double *TAU = plasma_aux.TAU[my_core_id];
    int k, m, n;
    int next_k;
    int next_m;
    int next_n;

    k = 0;
    m = my_core_id;
    while (m >= MT) {
        k++;
        m = m-MT+k;
    }
    n = k;

    while (k < min(NT, MT) && m < MT) {
        next_m = m;
        next_n = n;
        next_k = k;

        next_n++;
        if (next_n == NT) {
            next_m += cores_num;
            while (next_m >= MT && next_k < min(NT, MT)) {
                next_k++;
                next_m = next_m-MT+next_k;
            }
            next_n = next_k;
        }

        if (m == k) {
            if (n == k) {
                while(progress(k, k) != k-1);
                core_DGELQT(
                    k == MT-1 ? M-k*NB : NB,
                    k == NT-1 ? N-k*NB : NB,
                    IB,
                    A(k, k), NB,
                    T(k, k), IB,
                    TAU, WORK);
                progress(k, k) = k;
            }
            else {
                while(progress(k, n) != k-1);
                core_DTSLQT(
                    k == MT-1 ? M-k*NB : NB,
                    n == NT-1 ? N-n*NB : NB,
                    IB,
                    A(k, k), NB,
                    A(k, n), NB,
                    T(k, n), IB,
                    TAU, WORK);
                progress(k, n) = k;
            }
        }
        else {
            if (n == k) {
                while(progress(k, k) != k);
                while(progress(m, k) != k-1);
                core_DLARFB(
                    PlasmaRight, PlasmaNoTrans,
                    PlasmaForward, PlasmaRowwise,
                    m == MT-1 ? M-m*NB : NB,
                    k == NT-1 ? N-k*NB : NB,
                    NB,
                    IB,
                    A(k, k), NB,
                    T(k, k), IB,
                    A(m, k), NB,
                    WORK, NB);
            }
            else {
                while(progress(k, n) != k);
                while(progress(m, n) != k-1);
                core_DSSRFB(
                    PlasmaRight, PlasmaRowwise,
                    NB,
                    n == NT-1 ? N-n*NB : NB,
                    m == MT-1 ? M-m*NB : NB,
                    IB,
                    NB,
                    A(m, k), NB,
                    A(m, n), NB,
                    A(k, n), NB,
                    T(k, n), IB,
                    WORK);
                progress(m, n) = k;
            }
        }
        m = next_m;
        n = next_n;
        k = next_k;
    }
}

#undef A
#undef T
#undef progress
