/**
 *
 * @file pzunglq.c
 *
 *  PLASMA auxiliary routines
 *  PLASMA is a software package provided by Univ. of Tennessee,
 *  Univ. of California Berkeley and Univ. of Colorado Denver
 *
 * @version 2.2.0
 * @author Hatem Ltaief
 * @author Jakub Kurzak
 * @date 2009-11-15
 *
 **/
#include "common.h"

#define A(m,n) BLKADDR(A, PLASMA_Complex64_t, m, n)
#define Q(m,n) BLKADDR(Q, PLASMA_Complex64_t, m, n)
#define T(m,n) BLKADDR(T, PLASMA_Complex64_t, m, n)
/***************************************************************************//**
 *  Parallel construction of Q using tile V (application to identity) - dynamic scheduling
 **/
void plasma_pzunglq_quark(PLASMA_desc A, PLASMA_desc Q, PLASMA_desc T, PLASMA_sequence *sequence, PLASMA_request *request)
{
    int k, m, n;
    plasma_context_t *plasma;
    PLASMA_enum plasma_right = PlasmaRight;
    PLASMA_enum plasma_no_trans = PlasmaNoTrans;
    int temp1, temp2, temp3;
    Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;

    plasma = plasma_context_self();
    if (sequence->status != PLASMA_SUCCESS)
        return;
    QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);

    for (k = min(A.mt, A.nt) - 1; k >= 0; k--) {
        for (n = Q.nt - 1; n > k; n--) {
            for (m = 0; m < Q.mt; m++) {
                temp1 = Q.n-n*Q.nb;
                temp2 = Q.m-m*Q.nb;
                temp3 = A.m-k*A.nb;
                QUARK_Insert_Task(plasma->quark, CORE_zssmlq_quark, &task_flags,
                    sizeof(PLASMA_enum),                  &plasma_right,                VALUE,
                    sizeof(PLASMA_enum),                  &plasma_no_trans,             VALUE,
                    sizeof(int),                          &A.nb,                        VALUE,
                    sizeof(int),                          n == Q.nt-1 ? &temp1 : &Q.nb, VALUE,
                    sizeof(int),                          m == Q.mt-1 ? &temp2 : &Q.nb, VALUE,
                    sizeof(int),                          &T.mb,                        VALUE,
                    sizeof(int),                          k == A.mt-1 ? &temp3 : &A.nb, VALUE,
                    sizeof(PLASMA_Complex64_t)*A.mb*A.nb, Q(m, k),                          INOUT | LOCALITY,
                    sizeof(int),                          &Q.nb,                        VALUE,
                    sizeof(PLASMA_Complex64_t)*A.mb*A.nb, Q(m, n),                          INOUT,
                    sizeof(int),                          &Q.nb,                        VALUE,
                    sizeof(PLASMA_Complex64_t)*A.mb*A.nb, A(k, n),                          INPUT,
                    sizeof(int),                          &A.nb,                        VALUE,
                    sizeof(PLASMA_Complex64_t)*T.mb*T.nb, T(k, n),                          INPUT,
                    sizeof(int),                          &T.mb,                        VALUE,
                    sizeof(PLASMA_Complex64_t)*T.mb*T.nb, NULL,                             SCRATCH,
                    sizeof(int),                          &T.nb,                        VALUE,
                    0);
            }
        }
        for (m = 0; m < Q.mt; m++) {
            temp1 = Q.m-m*Q.nb;
            temp2 = A.n-k*A.nb;
            temp3 = min(A.m, A.n)-k*A.nb;
            QUARK_Insert_Task(plasma->quark, CORE_zunmlq_quark, &task_flags,
                sizeof(PLASMA_enum),                  &plasma_right,                VALUE,
                sizeof(PLASMA_enum),                  &plasma_no_trans,             VALUE,
                sizeof(int),                          m == Q.mt-1 ? &temp1 : &Q.nb, VALUE,
                sizeof(int),                          k == A.nt-1 ? &temp2 : &A.nb, VALUE,
                sizeof(int),                          &T.mb,                        VALUE,
                sizeof(int),                          k == A.mt-1 ? &temp3 : &A.nb, VALUE,
                sizeof(PLASMA_Complex64_t)*A.mb*A.nb, A(k, k),                          INPUT,
                sizeof(int),                          &A.nb,                        VALUE,
                sizeof(PLASMA_Complex64_t)*T.mb*T.nb, T(k, k),                          INPUT,
                sizeof(int),                          &T.mb,                        VALUE,
                sizeof(PLASMA_Complex64_t)*A.mb*A.nb, Q(m, k),                          INOUT | LOCALITY,
                sizeof(int),                          &Q.nb,                        VALUE,
                sizeof(PLASMA_Complex64_t)*T.mb*T.nb, NULL,                             SCRATCH,
                sizeof(int),                          &T.nb,                        VALUE,
                0);
        }
    }
}
