/* ///////////////////////////// P /// L /// A /// S /// M /// A /////////////////////////////// */
/* ///                    PLASMA testing routines (version 2.1.0)                            ///
 * ///                    Author: Emmanuel Agullo                                            ///
 * ///                    Release Date: November, 15th 2009                                  ///
 * ///                    PLASMA is a software package provided by Univ. of Tennessee,       ///
 * ///                    Univ. of California Berkeley and Univ. of Colorado Denver          /// */
/* ///////////////////////////////////////////////////////////////////////////////////////////// */

/* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
//  testing_dgemm : Test PLASMA_dgemm routine

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>

#include <cblas.h>
#include <plasma.h>
#include "../src/lapack.h"
#include "../src/core_blas.h"

#ifdef WIN32
#include <float.h>
#define isnan _isnan
#endif

#ifndef max
#define max(a, b) ((a) > (b) ? (a) : (b))
#endif
#ifndef min
#define min(a, b) ((a) < (b) ? (a) : (b))
#endif

int check_solution(PLASMA_enum transA, PLASMA_enum transB, int M, int N, int K,
                   double alpha, double *A, int LDA,
           double *B, int LDB,
                   double beta, double *Cref, double *Cplasma, int LDC);


int IONE=1;
int ISEED[4] = {0,0,0,1};   /* initial seed for dlarnv() */

int main (int argc, char **argv)
{
    /* Check for number of arguments*/
    if ( argc != 10){
        printf(" Proper Usage is : ./testing_dgemm ncores alpha beta M N K LDA LDB LDC with \n - ncores : number of cores \n - alpha : alpha coefficient \n - beta : beta coefficient \n - M : number of rows of matrices A and C \n - N : number of columns of matrices B and C \n - K : number of columns of matrix A / number of rows of matrix B \n - LDA : leading dimension of matrix A \n - LDB : leading dimension of matrix B \n - LDC : leading dimension of matrix C\n");
        exit(1);
    }

    int cores = atoi(argv[1]);
    double alpha = (double) atol(argv[2]);
    double beta = (double) atol(argv[3]);
    int M     = atoi(argv[4]);
    int N     = atoi(argv[5]);
    int K     = atoi(argv[6]);
    int LDA   = atoi(argv[7]);
    int LDB  = atoi(argv[8]);
    int LDC   = atoi(argv[9]);

    double eps;
    int info_solution;
    int i,j;
    int LDAxK = LDA*K;
    int LDBxN = LDB*N;
    int LDCxN = LDC*N;

    double *A = (double *)malloc(LDA*K*sizeof(double));
    double *B = (double *)malloc(LDB*N*sizeof(double));
    double *Cinit = (double *)malloc(LDC*N*sizeof(double));
    double *Cfinal = (double *)malloc(LDC*N*sizeof(double));

    /* Check if unable to allocate memory */
    if ((!A)||(!B)||(!Cinit)||(!Cfinal)){
        printf("Out of Memory \n ");
        exit(0);
    }

    /* Plasma Initialization */
    PLASMA_Init(cores);

    /*
    PLASMA_Disable(PLASMA_AUTOTUNING);
    PLASMA_Set(PLASMA_TILE_SIZE, 6);
    PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, 3);
    */

    eps = dlamch("Epsilon");

    /*----------------------------------------------------------
    *  TESTING DGEMM
    */

    /* Initialize A */
    dlarnv(&IONE, ISEED, &LDAxK, A);

    /* Initialize B */
    dlarnv(&IONE, ISEED, &LDBxN, B);

    /* Initialize C */
    dlarnv(&IONE, ISEED, &LDCxN, Cinit);
    for ( i = 0; i < M; i++)
        for (  j = 0; j < N; j++)
            Cfinal[LDC*j+i] = Cinit[LDC*j+i];

    /* PLASMA DGEMM */
    PLASMA_dgemm(PlasmaNoTrans, PlasmaNoTrans, M, N, K, alpha, A, LDA, B, LDB, beta, Cfinal, LDC);

    printf("\n");
    printf("------ TESTS FOR PLASMA DGEMM ROUTINE -------  \n");
    printf("            Size of the Matrix %d by %d\n", M, N);
    printf("\n");
    printf(" The matrix A is randomly generated for each test.\n");
    printf("============\n");
    printf(" The relative machine precision (eps) is to be %e \n",eps);
    printf(" Computational tests pass if scaled residuals are less than 10.\n");

    /* Check the solution */
    info_solution = check_solution(PlasmaNoTrans, PlasmaNoTrans, M, N, K, alpha, A, LDA, B, LDB, beta, Cinit, Cfinal, LDC);

    if (info_solution == 0) {
        printf("***************************************************\n");
        printf(" ---- TESTING DGEMM ...................... PASSED !\n");
        printf("***************************************************\n");
    }
    else {
        printf("************************************************\n");
        printf(" - TESTING DGEMM ... FAILED !\n");
        printf("************************************************\n");
    }

    free(A); free(B); free(Cinit); free(Cfinal);

    PLASMA_Finalize();

    exit(0);
}

/*--------------------------------------------------------------
 * Check the solution
 */

int check_solution(PLASMA_enum transA, PLASMA_enum transB, int M, int N, int K,
                   double alpha, double *A, int LDA,
           double *B, int LDB,
                   double beta, double *Cref, double *Cplasma, int LDC)
{
    int info_solution;
    double Anorm, Bnorm, Cinitnorm, Cplasmanorm, Crefnorm, Clapacknorm, Rnorm;
    double eps;
    char norm='I';
    double alpha_const, beta_const;

    double *work = (double *)malloc(max(K,max(M, N))* sizeof(double));

    alpha_const = 1.0;
    beta_const  = -1.0;

    Anorm = dlange(&norm, &M, &K, A, &LDA, work);
    Bnorm = dlange(&norm, &K, &N, B, &LDB, work);
    Cinitnorm = dlange(&norm, &M, &N, Cref, &LDC, work);
    Cplasmanorm = dlange(&norm, &M, &N, Cplasma, &LDC, work);

    CORE_dgemm(transA, transB, M, N, K, (alpha), A, LDA, B, LDB, (beta), Cref, LDC);

    Clapacknorm = dlange(&norm, &M, &N, Cref, &LDC, work);

    cblas_daxpy(LDC * N, (beta_const), Cplasma, 1, Cref, 1);

    Rnorm = dlange(&norm, &M, &N, Cref, &LDC, work);

    eps = dlamch("Epsilon");

    printf("Rnorm %e, Anorm %e, Bnorm %e, Cinitnorm %e, Cplasmanorm %e, Clapacknorm %e\n",Rnorm,Anorm,Bnorm,Cinitnorm,Cplasmanorm,Clapacknorm);

    printf("============\n");
    printf("Checking the norm of the difference against reference DGEMM \n");
    printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||B||_oo+||C||_oo).N.eps) = %e \n", Rnorm / ((Anorm + Bnorm + Cinitnorm) * N * eps));

    if (isnan(Rnorm / ((Anorm + Bnorm + Cinitnorm) * N * eps)) || (Rnorm / ((Anorm + Bnorm + Cinitnorm) * N * eps) > 10.0) ) {
         printf("-- The solution is suspicious ! \n");
         info_solution = 1;
    }
    else {
         printf("-- The solution is CORRECT ! \n");
         info_solution= 0 ;
    }

    free(work);

    return info_solution;
}
