/*////////////////////////////////////////////////////////////////////////////////////////
 *  -- PLASMA --
 *     University of Tennessee
 */
#include "common.h"
#include "core_blas.h"

/*////////////////////////////////////////////////////////////////////////////////////////
 *  Parallel triangular solve
 */
#define A(m,n) &A[NBNBSIZE*(m)+NBNBSIZE*MT*(n)]
#define B(m,n) &B[NBNBSIZE*(m)+NBNBSIZE*MTB*(n)]
#define progress(m,n) plasma_aux.progress[(m)+NT*(n)]

void plasma_pDTRSM(PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum transA, PLASMA_enum diag,
                   int N, int NRHS, double alpha, double *A, int NB, int NBNBSIZE, int NT,
                   int MT, double *B, int MTB, int NTRHS, int cores_num, int my_core_id)
{
    int k, m, n;
    int next_k;
    int next_m;
    int next_n;

    k = 0;
    m = my_core_id;
    while (m >= NT) {
        k++;
        m = m-NT+k;
    }
    n = 0;

    while (k < NT && m < NT) {
        next_n = n;
        next_m = m;
        next_k = k;

        next_n++;
        if (next_n >= NTRHS) {
            next_m += cores_num;
            while (next_m >= NT && next_k < NT) {
                next_k++;
                next_m = next_m-NT+next_k;
            }
            next_n = 0;
        }

        if (m == k)
        {
            while (progress(m, n) != k-1);
            if (uplo == PlasmaLower) {
                if (transA == PlasmaNoTrans)
                    core_DTRSM(
                        PlasmaLeft, PlasmaLower,
                        PlasmaNoTrans, diag,
                        k == NT-1 ? N-k*NB : NB,
                        n == NTRHS-1 ? NRHS-n*NB : NB,
                        1.0, A(k, k), NB,
                             B(k, n), NB);
                else
                    core_DTRSM(
                        PlasmaLeft, PlasmaLower,
                        PlasmaTrans, diag,
                        k == 0 ? N-(NT-1)*NB : NB,
                        n == NTRHS-1 ? NRHS-n*NB : NB,
                        1.0, A(NT-1-k, NT-1-k), NB,
                             B(NT-1-k, n), NB);
                }
                else {
                    if (transA == PlasmaNoTrans)
                        core_DTRSM(
                            PlasmaLeft, PlasmaUpper,
                            PlasmaNoTrans, diag,
                            k == 0 ? N-(NT-1)*NB : NB,
                            n == NTRHS-1 ? NRHS-n*NB : NB,
                            1.0, A(NT-1-k, NT-1-k), NB,
                                 B(NT-1-k, n), NB);
                    else
                        core_DTRSM(
                            PlasmaLeft, PlasmaUpper,
                            PlasmaTrans, diag,
                            k == NT-1 ? N-k*NB : NB,
                            n == NTRHS-1 ? NRHS-n*NB : NB,
                            1.0, A(k, k), NB,
                                 B(k, n), NB);
                }
                progress(k, n) = k;
        }
        else
        {
            while (progress(k, n) != k);
            while (progress(m, n) != k-1);
            if (uplo == PlasmaLower) {
                if (transA == PlasmaNoTrans)
                    core_DGEMM(
                        PlasmaNoTrans, PlasmaNoTrans,
                        m == NT-1 ? N-m*NB : NB,
                        n == NTRHS-1 ? NRHS-n*NB : NB,
                        NB,
                       -1.0, A(m, k), NB,
                             B(k, n), NB,
                        1.0, B(m, n), NB);
                else
                    core_DGEMM(
                        PlasmaTrans, PlasmaNoTrans,
                        NB,
                        n == NTRHS-1 ? NRHS-n*NB : NB,
                        k == 0 ? N-(NT-1)*NB : NB,
                       -1.0, A(NT-1-k, NT-1-m), NB,
                             B(NT-1-k, n), NB,
                        1.0, B(NT-1-m, n), NB);
                }
                else {
                    if (transA == PlasmaNoTrans)
                        core_DGEMM(
                            PlasmaNoTrans, PlasmaNoTrans,
                            NB,
                            n == NTRHS-1 ? NRHS-n*NB : NB,
                            k == 0 ? N-(NT-1)*NB : NB,
                           -1.0, A(NT-1-m, NT-1-k), NB,
                                 B(NT-1-k, n), NB,
                            1.0, B(NT-1-m, n), NB);
                    else
                        core_DGEMM(
                            PlasmaTrans, PlasmaNoTrans,
                            m == NT-1 ? N-m*NB : NB,
                            n == NTRHS-1 ? NRHS-n*NB : NB,
                            NB,
                           -1.0, A(k, m), NB,
                                 B(k, n), NB,
                            1.0, B(m, n), NB);
                }
                progress(m, n) = k;
        }
        n = next_n;
        m = next_m;
        k = next_k;
    }
}

#undef A
#undef B
#undef progress
