/* ///////////////////////////// P /// L /// A /// S /// M /// A /////////////////////////////// */
/* ///                    PLASMA auxiliary routines (version 2.0.0)                          ///
 * ///                    Release Date: July, 4th 2009                                       ///
 * ///                    PLASMA is a software package provided by Univ. of Tennessee,       ///
 * ///                    Univ. of California Berkeley and Univ. of Colorado Denver          /// */
/* ///////////////////////////////////////////////////////////////////////////////////////////// */
#include "common.h"

/* ///////////////////////////////////////////////////////////////////////////////////////////// */
//  Parallel application of Q using tile V - LQ factorization
#define A(m,n) &((PLASMA_Complex64_t*)A.mat)[A.bsiz*(m)+A.bsiz*A.lmt*(n)]
#define B(m,n) &((PLASMA_Complex64_t*)B.mat)[B.bsiz*(m)+B.bsiz*B.lmt*(n)]
#define T(m,n) &((PLASMA_Complex64_t*)T.mat)[T.bsiz*(m)+T.bsiz*T.lmt*(n)]
void plasma_pzunmlq(plasma_context_t *plasma)
{
    PLASMA_desc A;
    PLASMA_desc B;
    PLASMA_desc T;

    int k, m, n;
    int next_k;
    int next_m;
    int next_n;
    PLASMA_Complex64_t *work;

    plasma_unpack_args_3(A, B, T);
    work = (PLASMA_Complex64_t *)plasma_private_alloc(plasma, T.mb*T.nb, T.dtyp);
    ss_init(B.mt, B.nt, min(A.mt, A.nt));

    k = min(A.mt, A.nt)-1;
    n = PLASMA_RANK;
    while (n >= B.nt) {
        k--;
        n = n-B.nt;
    }
    m = B.mt-1;

    while (k >= 0 && n < B.nt) {
        next_n = n;
        next_m = m;
        next_k = k;

        next_m--;
        if (next_m == k-1) {
            next_n += PLASMA_SIZE;
            while (next_n >= B.nt && next_k >= 0) {
                next_k--;
                next_n = next_n-B.nt;
            }
            next_m = B.mt-1;
        }

        if (m == k) {
            CORE_zunmlq(
                PlasmaLeft, PlasmaConjTrans,
                k == A.nt-1 ? A.n-k*A.nb : A.nb,
                n == B.nt-1 ? B.n-n*B.nb : B.nb,
                T.mb,
                k == A.mt-1 ? min(A.m-k*A.nb, A.n-k*A.nb) : A.nb,
                A(k, k), A.nb,
                T(k, k), T.mb,
                B(k, n), B.nb,
                work, A.nb);
            ss_cond_set(k, n, k);
        }
        else {
            ss_cond_wait(m, n, k+1);
            CORE_zssmlq(
                PlasmaLeft, PlasmaConjTrans,
                A.nb,
                m == B.mt-1 ? B.m-m*B.nb : B.nb,
                n == B.nt-1 ? B.n-n*B.nb : B.nb,
                T.mb,
                k == A.nt-1 ? max(A.m-k*A.nb, A.n-k*A.nb) : A.nb,
                B(k, n), B.nb,
                B(m, n), B.nb,
                A(k, m), A.nb,
                T(k, m), T.mb,
                work, T.mb);
            ss_cond_set(m, n, k);
        }
        m = next_m;
        n = next_n;
        k = next_k;
    }
    plasma_private_free(plasma, work);
    ss_finalize();
}
