/*
 -- PLASMA Test Routine
    University of Tennessee
    November 2008

 -- Purpose
    TESTING PLASMA_DSGESV
*/

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include <cblas.h>
#include <plasma.h>
#include "../src/lapack.h"

int check_solution(int, int , double *, int, double *, double *, int, double);

int IONE=1;
int ISEED[4] = {0,0,0,1};   /* initial seed for dlarnv() */

int main (int argc, char **argv)
{
    /* Check for valid arguments*/
    if (argc != 6){
        printf(" Proper Usage is : ./testing_dsgesv ncores N LDA NRHS LDB with \n - ncores: number of cores \n - N : the size of the matrix \n - LDA : leading dimension of the matrix A \n - NRHS : number of RHS \n - LDB : leading dimension of the matrix B \n");
        exit(1);
    }

    int cores = atoi(argv[1]);
    int N     = atoi(argv[2]);
    int LDA   = atoi(argv[3]);
    int NRHS  = atoi(argv[4]);
    int LDB   = atoi(argv[5]);
    int LDX = LDB;
    int ITER;
    double eps;
    int info_solution;
    int i,j;
    int LDAxN = LDA*N;
    int LDBxNRHS = LDB*NRHS;

    double *A1 = (double *)malloc(LDA*N*(sizeof*A1));
    double *A2 = (double *)malloc(LDA*N*(sizeof*A2));
    double *B1 = (double *)malloc(LDB*NRHS*(sizeof*B1));
    double *B2 = (double *)malloc(LDB*NRHS*(sizeof*B2));
    double *X  = (double *)malloc(LDX*NRHS*(sizeof*X));
    double *L;
    int *IPIV;

    /* Check if unable to allocate memory */
    if ((!A1)||(!A2)||(!B1)||(!B2)||(!X)){
        printf("Out of Memory \n ");
        exit(0);
    }

    /*----------------------------------------------------------
    *  TESTING DSGESV
    */

    /*Plasma Initialize*/
    PLASMA_Init(cores);

    /*
    PLASMA_Disable(PLASMA_AUTOTUNING);
    PLASMA_Set(PLASMA_TILE_SIZE, 6);
    PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, 3);
    */

    /* Initialize A1 and A2 Matrix */
    dlarnv(&IONE, ISEED, &LDAxN, A1);
    for ( i = 0; i < N; i++)
        for (  j = 0; j < N; j++)
            A2[LDA*j+i] = A1[LDA*j+i];

    /* Initialize B1 and B2 */
    dlarnv(&IONE, ISEED, &LDBxNRHS, B1);
    for ( i = 0; i < N; i++)
        for ( j = 0; j < NRHS; j++)
            B2[LDB*j+i] = B1[LDB*j+i];

    /* PLASMA DSGESV */
    PLASMA_Alloc_Workspace_dgesv(N, &L, &IPIV);
    PLASMA_dsgesv(N, NRHS, A2, LDA, L, IPIV, B2, LDB, X, LDX, &ITER);

    eps = dlamch("Epsilon");
    printf("\n");
    printf("------ TESTS FOR PLASMA DSGESV ROUTINE -------  \n");
    printf("            Size of the Matrix %d by %d\n", N, N);
    printf("\n");
    printf(" The matrix A is randomly generated for each test.\n");
    printf("============\n");
    printf(" The relative machine precision (eps) is to be %e \n", eps);
    printf(" Computational tests pass if scaled residuals are less than 10.\n");

    /* Check the factorization and the solution */
    info_solution = check_solution(N, NRHS, A1, LDA, B1, X, LDB, eps);

    if ((info_solution == 0)){
        printf("***************************************************\n");
        printf(" ---- TESTING DSGESV ..................... PASSED !\n");
        printf("***************************************************\n");
    }
    else{
        printf("************************************************\n");
        printf(" ---- TESTING DSGESV ... FAILED !\n");
        printf("************************************************\n");
    }

    free(A1); free(A2); free(B1); free(B2); free(X); free(L); free(IPIV);

    exit(0);
}

/*------------------------------------------------------------------------
 *  Check the accuracy of the solution of the linear system
 */

int check_solution(int N, int NRHS, double *A1, int LDA, double *B1, double *B2, int LDB, double eps )
{
    int info_solution;
    double Rnorm, Anorm, Xnorm, Bnorm;
    char norm='I';
    double alpha, beta;
    double *work = (double *)malloc(N*(sizeof *work));

    alpha = 1.0;
    beta  = -1.0;

    Xnorm = dlange(&norm, &N, &NRHS, B2, &LDB, work);
    Anorm = dlange(&norm, &N, &N, A1, &LDA, work);
    Bnorm = dlange(&norm, &N, &NRHS, B1, &LDB, work);

    cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, NRHS, N, (alpha), A1, LDA, B2, LDB, (beta), B1, LDB);
    Rnorm=dlange(&norm, &N, &NRHS, B1, &LDB, work);

    printf("============\n");
    printf("Checking the Residual of the solution \n");
    printf("-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||_oo).N.eps) = %e \n",Rnorm/((Anorm*Xnorm+Bnorm)*N*eps));

    if (Rnorm/((Anorm*Xnorm+Bnorm)*N*eps)>10.0){
        printf("-- The solution is suspicious ! \n");
        info_solution = 1;
    }
    else{
        printf("-- The solution is CORRECT ! \n");
        info_solution = 0;
    }

    free(work);

    return info_solution;
}
