/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- */
/*
  time_dgesv.c
 */

/* Define these so that the Microsoft VC compiler stops complaining
   about scanf and friends */
#define _CRT_SECURE_NO_DEPRECATE
#define _CRT_SECURE_NO_WARNINGS

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#if defined( _WIN32 ) || defined( _WIN64 )
#include <windows.h>
#include <time.h>
#include <sys/timeb.h>
#if defined(_MSC_VER) || defined(_MSC_EXTENSIONS)
  #define DELTA_EPOCH_IN_MICROSECS  11644473600000000Ui64
#else
  #define DELTA_EPOCH_IN_MICROSECS  11644473600000000ULL
#endif
struct timezone {
  int  tz_minuteswest; /* minutes W of Greenwich */
  int  tz_dsttime;     /* type of dst correction */
};
int gettimeofday(struct timeval* tv, struct timezone* tz) {
  FILETIME ft;
  unsigned __int64 tmpres = 0;
  static int tzflag;
  if (NULL != tv) {
    GetSystemTimeAsFileTime(&ft);
    tmpres |= ft.dwHighDateTime;
    tmpres <<= 32;
    tmpres |= ft.dwLowDateTime;
    /*converting file time to unix epoch*/
    tmpres /= 10;  /*convert into microseconds*/
    tmpres -= DELTA_EPOCH_IN_MICROSECS; 
    tv->tv_sec = (long)(tmpres / 1000000UL);
    tv->tv_usec = (long)(tmpres % 1000000UL);
  }
  if (NULL != tz) {
    if (!tzflag) {
      _tzset();
      tzflag++;
    }
    tz->tz_minuteswest = _timezone / 60;
    tz->tz_dsttime = _daylight;
  }
  return 0;
}
#else  /* Non-Windows */
#include <unistd.h>
#include <sys/time.h>
#include <sys/resource.h>
#endif

/* struct timeval {time_t tv_sec; suseconds_t tv_usec;}; */
double cWtime(void) { struct timeval tp; gettimeofday( &tp, NULL );
  return tp.tv_sec + 1e-6 * tp.tv_usec; }

#include <cblas.h>
#include <plasma.h>

#include <blas.h>

static int
RandMatGen(int m, int n, double *a, int lda, int seed) {
  int i, j;
  double inv = 1.0 / RAND_MAX;

  if (seed) srand( seed );

  for (j = 0; j < n; j++)
    for (i = 0; i < m; i++)
      a[i + j * lda] = inv * rand();

  return 0;
}

static int
RunTest(int n, int nrhs, double *t_, double *res_, double *anorm, double *xnorm, double *bnorm, int thread_count) {
  double *a, *b, *x, *L;
  int *piv;
  double t;

  PLASMA_Init( thread_count );

  a = (double *)malloc( (sizeof *a) * n * n );
  b = (double *)malloc( (sizeof *a) * n * nrhs );
  x = (double *)malloc( (sizeof *a) * n * nrhs );

  PLASMA_Alloc_Workspace_dgesv(n, &L, &piv);

  RandMatGen( n, n, a, n, 1313 );
  RandMatGen( n, nrhs, x, n, 13131 );

  BLAS_dge_norm( blas_colmajor, blas_inf_norm, n, n, a, n, anorm );
  if (1 == nrhs)
    BLAS_dnorm( blas_inf_norm, n, x, 1, bnorm );
  else
    BLAS_dge_norm( blas_colmajor, blas_inf_norm, n, nrhs, x, n, bnorm );

  t = -cWtime();
  PLASMA_dgesv( n, nrhs, a, n, L, piv, x, n );
  t += cWtime();
  *t_ = t;

  if (1 == nrhs)
    BLAS_dnorm( blas_inf_norm, n, x, 1, xnorm );
  else
    BLAS_dge_norm( blas_colmajor, blas_inf_norm, n, nrhs, x, n, xnorm );

  RandMatGen( n, n, a, n, 1313 );
  RandMatGen( n, nrhs, b, n, 13131 );

  cblas_dgemm( CblasColMajor,CblasNoTrans, CblasNoTrans, n, nrhs, n, 1.0, a, n, x, n, -1.0, b, n);

  if (1 == nrhs)
    BLAS_dnorm( blas_inf_norm, n, b, 1, res_ );
  else
    BLAS_dge_norm( blas_colmajor, blas_inf_norm, n, nrhs, b, n, res_ );

  free( L );
  free( piv );
  free( x );
  free( b );
  free( a );

  PLASMA_Finalize();

  return 0;
}

static int
Test(int n, int thread_count, int gnuplot) {
  int i, j, nrhs = 1;
  double t, res, gflops, eps = BLAS_dfpinfo( blas_eps );
  double anorm, xnorm, bnorm;
  char *s;
  char *env[] = {
    "OMP_NUM_THREADS",
    "MKL_NUM_THREADS",
    "GOTO_NUM_THREADS",
    "ACML_NUM_THREADS",
    "ATLAS_NUM_THREADS",
    "BLAS_NUM_THREADS", ""};

  if (n < 0 || thread_count < 0) {
    printf( "%s %s\n",
      "#   N NRHS threads seconds   Gflop/s  ||Ax-b||    ||A||    ||x||    ||b||",
      "  eps  ||Ax-b||/N/eps/(||A||||x||+||b||)" );

    if (gnuplot) {
      printf( "set title '*_NUM_THREADS: " );
      for (i = 0; env[i][0]; ++i) {
        s = getenv( env[i] );

        if (i) printf( " " ); /* separating space */

        for (j = 0; j < 5 && env[i][j] && env[i][j] != '_'; ++j)
          printf( "%c", env[i][j] );

        if (s)
          printf( "=%s", s );
        else
          printf( "->%s", "?" );
      }
      printf( "'\n" );
      printf( "%s\n%s\n%s\n%s\n", 
        "set xlabel 'Matrix size'",
        "set ylabel 'Gflop/s'",
        "set key bottom",
        "plot '-' using 1:5 title 'PLASMA_dgesv()' with linespoints" );
    }

    return 0;
  }

  printf( "%5d %4d %5d ", n, nrhs, thread_count );
  fflush( stdout );

  RunTest( n, nrhs, &t, &res, &anorm, &xnorm, &bnorm, thread_count );

  gflops = 0.0;
  if (0.0 != t && -0.0 != t)
    gflops = 1e-9 * n * (2.0 * n / 3.0 + 3.0 / 2.0) * n / t;

  printf( "%9.3f %9.2f %9.2e %8.1e %8.1e %8.1e %5.0e %9.2e\n",
    t, gflops, res, anorm, xnorm, bnorm, eps, res / n / eps / (anorm * xnorm + bnorm) );
  fflush( stdout );

  return 0;
}

static int
startswith(const char *s, const char *prefix) {
  size_t n = strlen( prefix );
  if (strncmp( s, prefix, n ))
    return 0;
  return 1;
}

static int
get_range(char *range, int *start_p, int *stop_p, int *step_p) {
  char *s, *s1, buf[21];
  int colon_count, copy_len, nbuf=20, n;
  int start=1000, stop=10000, step=1000;

  colon_count = 0;
  for (s = strchr( range, ':'); s; s = strchr( s+1, ':'))
    colon_count++;

  if (colon_count == 0) { /* No colon in range. */
    if (sscanf( range, "%d", &start ) < 1 || start < 1)
      return -1;
    step = start / 10;
    if (step < 1) step = 1;
    stop = start + 10 * step;

  } else if (colon_count == 1) { /* One colon in range.*/
    /* First, get the second number (after colon): the stop value. */
    s = strchr( range, ':' );
    if (sscanf( s+1, "%d", &stop ) < 1 || stop < 1)
      return -1;

    /* Next, get the first number (before colon): the start value. */
    n = s - range;
    copy_len = n > nbuf ? nbuf : n;
    strncpy( buf, range, copy_len );
    buf[copy_len] = 0;
    if (sscanf( buf, "%d", &start ) < 1 || start > stop || start < 1)
      return -1;

    /* Let's have 10 steps or less. */
    step = (stop - start) / 10;
    if (step < 1)
      step = 1;
  } else if (colon_count == 2) { /* Two colons in range. */
    /* First, get the first number (before the first colon): the start value. */
    s = strchr( range, ':' );
    n = s - range;
    copy_len = n > nbuf ? nbuf : n;
    strncpy( buf, range, copy_len );
    buf[copy_len] = 0;
    if (sscanf( buf, "%d", &start ) < 1 || start < 1)
      return -1;

    /* Next, get the second number (after the first colon): the stop value. */
    s1 = strchr( s+1, ':' );
    n = s1 - (s + 1);
    copy_len = n > nbuf ? nbuf : n;
    strncpy( buf, s+1, copy_len );
    buf[copy_len] = 0;
    if (sscanf( buf, "%d", &stop ) < 1 || stop < start)
      return -1;

    /* Finally, get the third number (after the second colon): the step value. */
    if (sscanf( s1+1, "%d", &step ) < 1 || step < 1)
      return -1;
  } else
    return -1;

  *start_p = start;
  *stop_p = stop;
  *step_p = step;

  return 0;
}

static void
show_help(char *prog_name) {
  printf( "Usage:\n%s [options]\n", prog_name );
  printf( "%s\n", "\nOptions are:" );
  printf( "%s\n", "--threads=C\t\tnumber of threads" );
  printf( "%s\n", "--n_range=R\t\trange of N values: Start:Stop:Step (Example range R: 100:1000:50)" );
  printf( "%s\n", "--gnuplot\t\tproduce output suitable for gnuplot" );
}

static void
get_thread_count(int *thread_count) {
#if defined WIN32 || defined WIN64
  sscanf( getenv( "NUMBER_OF_PROCESSORS" ), "%d", thread_count );
#else
  *thread_count = sysconf(_SC_NPROCESSORS_ONLN);
#endif
}

int
main(int argc, char *argv[]) {
  int i, start=1000, stop=10000, step=1000, thread_count=1;
  int gnuplot = 0;

  get_thread_count( &thread_count );

  for (i = 1; i < argc && argv[i]; ++i) {
    if (startswith( argv[i], "--help" )) {
      show_help( argv[0] );
      return 0;
    } else if (startswith( argv[i], "--n_range=" )) {
      get_range( strchr( argv[i], '=' ) + 1, &start, &stop, &step );
    } else if (startswith( argv[i], "--threads=" )) {
      sscanf( strchr( argv[i], '=' ) + 1, "%d", &thread_count );
    } else if (startswith( argv[i], "--gnuplot" )) {
      gnuplot = 1;
    } else {
      fprintf( stderr, "Unknown option: %s\n", argv[i] );
    }
  }

  if (step < 1) step = 1;

  Test( -1, -1, gnuplot ); /* print header */
  for (i = start; i <= stop; i += step)
    Test( i, thread_count, gnuplot );

  if (gnuplot) {
    printf( "%s\n%s\n",
      "e",
      "pause 10" );
  }

  return 0;
}
