/* -*- C -*- */
/* dseqslv.c
 */

#include <lfc/lfci.h>

LFC_BEGIN_C_DECLS

static void
msg(const char *s) { fprintf( stdout, "%s\n", s ); fflush( stdout ); }

static void
dgenAxb(int n, int nrhs, double *a, int lda, double *b, int ldb) {
  double rcp = 1.0 / RAND_MAX; int i, j;

  srand( 1313 );
  for (j = 0; j < nrhs; j++) {
    for (i = 0; i < n; i++)
      b[i + j*ldb] = rcp * rand();
  }
  for (j = 0; j < n; j++) {
    for (i = 0; i < n; i++)
      a[i + j*lda] = rcp * rand();
  }
}

static int
BLAS_dgemmNN(int m, int n, int k, double alpha, const double *a, int lda,
	     const double *b, int ldb, double beta, double *c, int ldc) {
  int i, j, l;
  double temp;

  if (m < 0) return -4;
  if (n < 0) return -5;
  if (k < 0) return -6;
  if (alpha != alpha) return -7; /* NaN value */
  if (! a) return -8;
  if (lda < m) return -9;
  if (! b) return -10;
  if (ldb < k) return -11;
  if (beta != beta) return -12; /* NaN value */
  if (! c) return -13;
  if (ldc < m) return -14;

  if (1.0 != beta) return -9; /* not supported */

  for (j = 0; j < n; j++) {
    for (l = 0; l < k; l++) {
      temp = alpha * b[l + j * ldb];
      if (temp != 0.0 && temp != -0.0) {
	for (i = 0; i < m; i++) {
	  c[i + j * ldc] += temp * a[i + l * lda];
	}
      }
    }
  }

  return 0;
}	/* BLAS_dgemmNN */

double
depsilon() {
  double one = 1.0, deps = 1.0, half = 1.0 / 2.0, v;

  while (1) {
    v = deps;
    BLAS_dgemmNN( 1, 1, 1, 1.0, &one, 1, &one, 1, 1.0, &v, 1 );
    if (v == one) break;
    deps *= half;
  }

  return deps;
}

int
dseqslv(int n, int nrhs) {
  double *a, *b, *x;
  int *piv;
  int i, j, lda, ldb, info, rv = 0;
  double anrm, rnrm, sres, deps = depsilon();

  /* this is just to test the logic below and in LFC */
  lda = n + 10;
  ldb = n + 11;

  a  = LFC_MALLOC( double, lda * n );
  b  = LFC_MALLOC( double, ldb * nrhs );
  x  = LFC_MALLOC( double, ldb * nrhs );
  piv = LFC_MALLOC(   int, n );

  if (! a || ! b || ! x || ! piv) {
    msg( "Cannot allocate memory." );
    goto end;
  }

  dgenAxb( n, nrhs, a, lda, x, ldb );

  i = LFC_dgesv( LFC_ColMajor, n, nrhs, a, lda, piv, x, ldb, &info );
  
  if (LFC_FAILURE == i) {
    fprintf( stderr, "LFC_dgesv() -> %d\n", info );
    rv = 1;
    goto end;
  }

  dgenAxb( n, nrhs, a, lda, b, ldb );

  BLAS_dgemmNN( n, nrhs, n, -1.0, a, lda, x, ldb, 1.0, b, ldb );

  rnrm = 0.0;
  for (j = 0; j < nrhs; j++)
    for (i = 0; i < n; i++)
      rnrm += fabs( b[i + j*ldb] );

  anrm = 0.0;
  for (j = 0; j < n; j++)
    for (i = 0; i < n; i++)
      anrm += fabs( a[i + j*lda] );

  fprintf( stderr, "||A(%dx%d)||=%g\n", n, n, (double)anrm );
  fprintf( stderr, "||A(%dx%d)x-b||=%g\n", n, n, (double)rnrm );
  fprintf( stderr, "epsilon=%g\n", (double)deps );

  sres = rnrm / anrm / deps / n;

  fprintf( stderr, "||Ax-b||/(||A|| n epsilon)=%g\n", (double)sres );

  if (sres > 10.0) rv = 1;

  end:

  LFC_FREE( piv ); LFC_FREE( x ); LFC_FREE( b );  LFC_FREE( a );

  return rv;
}	/* dseqslv */

LFC_END_C_DECLS
