/* ///////////////////////////// P /// L /// A /// S /// M /// A /////////////////////////////// */
/* ///                    PLASMA auxiliary routines (version 2.1.0)                          ///
 * ///                    Author: Emmanuel Agullo                                            ///
 * ///                    Release Date: November, 15th 2009                                  ///
 * ///                    PLASMA is a software package provided by Univ. of Tennessee,       ///
 * ///                    Univ. of California Berkeley and Univ. of Colorado Denver          /// */
/* ///////////////////////////////////////////////////////////////////////////////////////////// */
#include "common.h"
#include "lapack.h"

/* ///////////////////////////////////////////////////////////////////////////////////////////// */
//  Parallel tile DGEMM matrix-matrix operations
#define A(m,n) &((double*)A.mat)[A.bsiz*(m)+A.bsiz*A.lmt*(n)]
#define B(m,n) &((double*)B.mat)[B.bsiz*(m)+B.bsiz*B.lmt*(n)]
#define C(m,n) &((double*)C.mat)[C.bsiz*(m)+C.bsiz*C.lmt*(n)]

void plasma_pdgemm(plasma_context_t *plasma)
{
    PLASMA_enum transA;
    PLASMA_enum transB;
    PLASMA_desc A;
    PLASMA_desc B;
    PLASMA_desc C;
    double alpha;
    double beta;

    int K, K1, K2, X, X1, X2, Y, Y1, Y2;
    int k, m, n;
    int next_m;
    int next_n;

    plasma_unpack_args_7(transA, transB, alpha, A, B, beta, C);
/*
    if (A.n != B.m || C.n =! B.n || C.m != A.m ||
        A.j != B.i || C.j != B.j || C.i != A.i)
        return -1;
*/
    n = 0;
    m = PLASMA_RANK;
    while (m >= C.mt && n < C.nt) {
        n++;
        m = m-C.mt;
    }

    while (n < C.nt) {
        next_m = m;
        next_n = n;

        next_m += PLASMA_SIZE;
        while (next_m >= C.mt && next_n < C.nt) {
            next_n++;
            next_m = next_m-C.mt;
        }

        X1 = m == 0 ? C.i%C.nb : 0;
        Y1 = n == 0 ? C.j%C.nb : 0;
        X2 = m == C.mt-1 ? (C.i+C.m-1)%C.nb+1 : C.nb;
        Y2 = n == C.nt-1 ? (C.j+C.n-1)%C.nb+1 : C.nb;
        X = X2 - X1;
        Y = Y2 - Y1;

        for (k = 0 ; k < A.nt ; k++) {
            K1 = k == 0 ? A.j%A.nb : 0;
            K2 = k == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
            K = K2 - K1;

            CORE_dgemm( transA, transB,
                        X,
                        Y,
                        K,
                        alpha, A(m, k), A.nb,
                               B(k, n), A.nb,
                        k == 0 ? beta : ((double) 1.0),  C(m, n), A.nb);
        }
        m = next_m;
        n = next_n;
    }
}

