/**
 *
 * @file testing_zcungesv.c
 *
 *  PLASMA testing routines
 *  PLASMA is a software package provided by Univ. of Tennessee,
 *  Univ. of California Berkeley and Univ. of Colorado Denver
 *
 * @version 2.2.0
 * @author Emmanuel Agullo
 * @date 2009-11-15
 *
 **/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>

#include <cblas.h>
#include <lapack.h>
#include <plasma.h>

#ifndef max
#define max(a, b) ((a) > (b) ? (a) : (b))
#endif
#ifndef min
#define min(a, b) ((a) < (b) ? (a) : (b))
#endif

int check_solution(int, int, PLASMA_Complex64_t*, int, PLASMA_Complex64_t*, PLASMA_Complex64_t*, int, double);

int IONE=1;
int ISEED[4] = {0,0,0,1};   /* initial seed for zlarnv() */

int main (int argc, char **argv)
{
    /* Check for number of arguments*/
    if ( argc != 6 ){
        printf(" Proper Usage is : ./testing_zcgels ncores N LDA NRHS LDB with \n"
               "   - ncores : number of cores \n"
               "   - N    : size of the matrix A\n"
               "   - LDA  : leading dimension of the matrix A \n"
               "   - NRHS : number of RHS \n"
               "   - LDB  : leading dimension of the matrix B\n");
        exit(1);
    }

    int cores = atoi(argv[1]);
    int N     = atoi(argv[2]);
    int LDA   = atoi(argv[3]);
    int NRHS  = atoi(argv[4]);
    int LDB   = atoi(argv[5]);
    int LDX   = LDB;
    int ITER;

    double eps;
    int info_solution;
    int i,j;
    int LDAxN    = LDA*N;
    int LDBxNRHS = LDB*NRHS;

    PLASMA_Complex64_t *A1 = (PLASMA_Complex64_t *)malloc(LDA*N   *sizeof(PLASMA_Complex64_t));
    PLASMA_Complex64_t *A2 = (PLASMA_Complex64_t *)malloc(LDA*N   *sizeof(PLASMA_Complex64_t));
    PLASMA_Complex64_t *B1 = (PLASMA_Complex64_t *)malloc(LDB*NRHS*sizeof(PLASMA_Complex64_t));
    PLASMA_Complex64_t *B2 = (PLASMA_Complex64_t *)malloc(LDB*NRHS*sizeof(PLASMA_Complex64_t));
    PLASMA_Complex64_t *X  = (PLASMA_Complex64_t *)malloc(LDX*NRHS*sizeof(PLASMA_Complex64_t));

    /* Check if unable to allocate memory */
    if ( (!A1) || (!A2) || (!B1) || (!B2) || (!X) ) {
        printf("Out of Memory \n ");
        exit(0);
    }

    /* Plasma Initialization */
    PLASMA_Init(cores);

    /*
    PLASMA_Disable(PLASMA_AUTOTUNING);
    PLASMA_Set(PLASMA_TILE_SIZE, 6);
    PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, 3);
    */

    eps = lapack_dlamch(lapack_eps);

    /*----------------------------------------------------------
    *  TESTING ZCGELS
    */

    /* Initialize A1 and A2 */
    lapack_zlarnv(IONE, ISEED, LDAxN, A1);
    for (i = 0; i < N; i++)
        for (j = 0; j < N; j++)
            A2[LDA*j+i] = A1[LDA*j+i] ;

    /* Initialize B1 and B2 */
    lapack_zlarnv(IONE, ISEED, LDBxNRHS, B1);
    for (i = 0; i < N; i++)
        for (j = 0; j < NRHS; j++)
             B2[LDB*j+i] = B1[LDB*j+i] ;

    printf("\n");
    printf("------ TESTS FOR PLASMA ZCUNGESV ROUTINE -------  \n");
    printf("            Size of the Matrix %d by %d\n", N, N);
    printf("\n");
    printf(" The matrix A is randomly generated for each test.\n");
    printf("============\n");
    printf(" The relative machine precision (eps) is to be %e \n",eps);
    printf(" Computational tests pass if scaled residuals are less than 60.\n");

    /* PLASMA ZCUNGESV */
    PLASMA_zcungesv(PlasmaNoTrans, N, NRHS, A2, LDA, B2, LDB, X, LDX, &ITER);

    printf("\n");
    printf(" Solution obtained with %d iterations\n", ITER);
    printf("\n");
   
    /* Check the orthogonality, factorization and the solution */
    info_solution = check_solution(N, NRHS, A1, LDA, B1, X, LDB, eps);

    if (info_solution == 0) {
        printf("***************************************************\n");
        printf(" ---- TESTING ZCUNGESV.................... PASSED !\n");
        printf("***************************************************\n");
    }
    else {
        printf("************************************************\n");
        printf(" - TESTING ZCUNGESV .. FAILED !\n");
        printf("************************************************\n");
    }

    free(A1); free(A2); free(B1); free(X); 

    PLASMA_Finalize();

    exit(0);
}

/*--------------------------------------------------------------
 * Check the solution
 */

int check_solution(int N, int NRHS, PLASMA_Complex64_t *A1, int LDA, 
                   PLASMA_Complex64_t *B1, PLASMA_Complex64_t *B2, int LDB, double eps)
{
    int info_solution;
    double Rnorm, Anorm, Xnorm, Bnorm;
    PLASMA_Complex64_t alpha, beta;

    PLASMA_Complex64_t *Residual = (PLASMA_Complex64_t *)malloc(N*NRHS*sizeof(PLASMA_Complex64_t));
    double             *work     = (double *)            malloc(N     *sizeof(double));

    alpha = 1.0;
    beta  = -1.0;

    Anorm = lapack_zlange(lapack_inf_norm, N, N,    A1, LDA, work);
    Xnorm = lapack_zlange(lapack_inf_norm, N, NRHS, B2, LDB, work);
    Bnorm = lapack_zlange(lapack_inf_norm, N, NRHS, B1, LDB, work);

    cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, NRHS, N, 
                CBLAS_SADDR(alpha), A1, LDA, B2, LDB, CBLAS_SADDR(beta), B1, LDB);

    memset((void*)Residual, 0, N*NRHS*sizeof(PLASMA_Complex64_t));

    cblas_zgemm(CblasColMajor, CblasConjTrans, CblasNoTrans, N, NRHS, N, 
                CBLAS_SADDR(alpha), A1, LDA, B1, LDB, CBLAS_SADDR(beta), Residual, N);

    Rnorm = lapack_zlange(lapack_inf_norm, N, NRHS, Residual, N, work);

    free(Residual);

    printf("============\n");
    printf("Checking the Residual of the solution \n");
    printf("-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||)_oo.N.eps) = %e \n", Rnorm/((Anorm*Xnorm+Bnorm)*N*eps));

    if (isnan(Rnorm / ((Anorm * Xnorm + Bnorm) * N * eps)) || isinf(Rnorm / ((Anorm * Xnorm + Bnorm) * N * eps)) || (Rnorm / ((Anorm * Xnorm + Bnorm) * N * eps) > 60.0) ) {
         printf("-- The solution is suspicious ! \n");
         info_solution = 1;
    }
    else {
         printf("-- The solution is CORRECT ! \n");
         info_solution= 0 ;
    }

    free(work);

    return info_solution;
}
