/* ///////////////////////////// P /// L /// A /// S /// M /// A /////////////////////////////// */
/* ///                    PLASMA auxiliary routines (version 2.1.0)                          ///
 * ///                    Author: Jakub Kurzak, Hatem Ltaief                                 ///
 * ///                    Release Date: November, 15th 2009                                  ///
 * ///                    PLASMA is a software package provided by Univ. of Tennessee,       ///
 * ///                    Univ. of California Berkeley and Univ. of Colorado Denver          /// */
/* ///////////////////////////////////////////////////////////////////////////////////////////// */
#include "common.h"

/* ///////////////////////////////////////////////////////////////////////////////////////////// */
//  Parallel triangular solve
#define A(m,n) &((PLASMA_Complex32_t*)A.mat)[A.bsiz*(m)+A.bsiz*A.lmt*(n)]
#define B(m,n) &((PLASMA_Complex32_t*)B.mat)[B.bsiz*(m)+B.bsiz*B.lmt*(n)]
void plasma_pctrsm(plasma_context_t *plasma)
{
    PLASMA_enum side;
    PLASMA_enum uplo;
    PLASMA_enum transA;
    PLASMA_enum diag;
    PLASMA_Complex32_t      alpha;
    PLASMA_desc A;
    PLASMA_desc B;

    int k, m, n;
    int next_k;
    int next_m;
    int next_n;

    plasma_unpack_args_7(side, uplo, transA, diag, alpha, A, B);
    ss_init(B.mt, B.nt, -1);

    k = 0;
    m = PLASMA_RANK;
    while (m >= A.nt) {
        k++;
        m = m-A.nt+k;
    }
    n = 0;

    while (k < A.nt && m < A.nt) {
        next_n = n;
        next_m = m;
        next_k = k;

        next_n++;
        if (next_n >= B.nt) {
            next_m += PLASMA_SIZE;
            while (next_m >= A.nt && next_k < A.nt) {
                next_k++;
                next_m = next_m-A.nt+next_k;
            }
            next_n = 0;
        }

        if (m == k)
        {
            ss_cond_wait(m, n, k-1);
            if (uplo == PlasmaLower) {
                if (transA == PlasmaNoTrans)
                    CORE_ctrsm(
                        PlasmaLeft, PlasmaLower,
                        PlasmaNoTrans, diag,
                        k == A.nt-1 ? A.n-k*A.nb : A.nb,
                        n == B.nt-1 ? B.n-n*B.nb : B.nb,
                        1.0, A(k, k), A.nb,
                             B(k, n), B.nb);
                else
                    CORE_ctrsm(
                        PlasmaLeft, PlasmaLower,
                        PlasmaConjTrans, diag,
                        k == 0 ? A.n-(A.nt-1)*A.nb : A.nb,
                        n == B.nt-1 ? B.n-n*B.nb : B.nb,
                        1.0, A(A.nt-1-k, A.nt-1-k), A.nb,
                             B(A.nt-1-k, n), B.nb);
            }
            else {
                if (transA == PlasmaNoTrans)
                    CORE_ctrsm(
                        PlasmaLeft, PlasmaUpper,
                        PlasmaNoTrans, diag,
                        k == 0 ? A.n-(A.nt-1)*A.nb : A.nb,
                        n == B.nt-1 ? B.n-n*B.nb : B.nb,
                        1.0, A(A.nt-1-k, A.nt-1-k), A.nb,
                             B(A.nt-1-k, n), A.nb);
                else
                    CORE_ctrsm(
                        PlasmaLeft, PlasmaUpper,
                        PlasmaConjTrans, diag,
                        k == A.nt-1 ? A.n-k*A.nb : A.nb,
                        n == B.nt-1 ? B.n-n*B.nb : B.nb,
                        1.0, A(k, k), A.nb,
                             B(k, n), B.nb);
            }
            ss_cond_set(k, n, k);
        }
        else
        {
            ss_cond_wait(k, n, k);
            ss_cond_wait(m, n, k-1);
            if (uplo == PlasmaLower) {
                if (transA == PlasmaNoTrans)
                    CORE_cgemm(
                        PlasmaNoTrans, PlasmaNoTrans,
                        m == A.nt-1 ? A.n-m*A.nb : A.nb,
                        n == B.nt-1 ? B.n-n*B.nb : B.nb,
                        A.nb,
                       -1.0, A(m, k), A.nb,
                             B(k, n), B.nb,
                        1.0, B(m, n), B.nb);
                else
                    CORE_cgemm(
                        PlasmaConjTrans, PlasmaNoTrans,
                        A.nb,
                        n == B.nt-1 ? B.n-n*B.nb : B.nb,
                        k == 0 ? A.n-(A.nt-1)*A.nb : A.nb,
                       -1.0, A(A.nt-1-k, A.nt-1-m), A.nb,
                             B(A.nt-1-k, n), B.nb,
                        1.0, B(A.nt-1-m, n), B.nb);
            }
            else {
                if (transA == PlasmaNoTrans)
                    CORE_cgemm(
                        PlasmaNoTrans, PlasmaNoTrans,
                        A.nb,
                        n == B.nt-1 ? B.n-n*B.nb : B.nb,
                        k == 0 ? A.n-(A.nt-1)*A.nb : A.nb,
                       -1.0, A(A.nt-1-m, A.nt-1-k), A.nb,
                             B(A.nt-1-k, n), B.nb,
                        1.0, B(A.nt-1-m, n), B.nb);
                else
                    CORE_cgemm(
                        PlasmaConjTrans, PlasmaNoTrans,
                        m == A.nt-1 ? A.n-m*A.nb : A.nb,
                        n == B.nt-1 ? B.n-n*B.nb : B.nb,
                        A.nb,
                       -1.0, A(k, m), A.nb,
                             B(k, n), B.nb,
                        1.0, B(m, n), B.nb);
            }
            ss_cond_set(m, n, k);
        }
        n = next_n;
        m = next_m;
        k = next_k;
    }
    ss_finalize();
}
