/* -*- C -*- */
/* sseqslv.c
 */

#include <lfc/lfci.h>

LFC_BEGIN_C_DECLS

static void
msg(const char *s) { fprintf( stdout, "%s\n", s ); fflush( stdout ); }

static void
sgenAxb(int n, int nrhs, float *a, int lda, float *b, int ldb) {
  float rcp = 1.0f / RAND_MAX; int i, j;

  srand( 1313 );
  for (j = 0; j < nrhs; j++) {
    for (i = 0; i < n; i++)
      b[i + j*ldb] = rcp * rand();
  }
  for (j = 0; j < n; j++) {
    for (i = 0; i < n; i++)
      a[i + j*lda] = rcp * rand();
  }
}

static int
BLAS_sgemmNN(int m, int n, int k, float alpha, const float *a, int lda,
	     const float *b, int ldb, float beta, float *c, int ldc) {
  int i, j, l;
  float temp;

  if (m < 0) return -4;
  if (n < 0) return -5;
  if (k < 0) return -6;
  if (alpha != alpha) return -7; /* NaN value */
  if (! a) return -8;
  if (lda < m) return -9;
  if (! b) return -10;
  if (ldb < k) return -11;
  if (beta != beta) return -12; /* NaN value */
  if (! c) return -13;
  if (ldc < m) return -14;

  if (1.0 != beta) return -9; /* not supported */

  for (j = 0; j < n; j++) {
    for (l = 0; l < k; l++) {
      temp = alpha * b[l + j * ldb];
      if (temp != 0.0f && temp != -0.0f) {
	for (i = 0; i < m; i++) {
	  c[i + j * ldc] += temp * a[i + l * lda];
	}
      }
    }
  }

  return 0;
}	/* BLAS_sgemmNN */

float
sepsilon() {
  float one = 1.0f, deps = 1.0f, half = 1.0f / 2.0f, v;

  while (1) {
    v = deps;
    BLAS_sgemmNN( 1, 1, 1, 1.0f, &one, 1, &one, 1, 1.0f, &v, 1 );
    if (v == one) break;
    deps *= half;
  }

  return deps;
}

int
sseqslv(int n, int nrhs) {
  float *a, *b, *x;
  int *piv;
  int i, j, lda, ldb, info, rv = 0;
  float anrm, rnrm, sres, seps = sepsilon();

  /* this is just to test the logic below and in LFC */
  lda = n + 10;
  ldb = n + 11;

  a  = LFC_MALLOC( float, lda * n );
  b  = LFC_MALLOC( float, ldb * nrhs );
  x  = LFC_MALLOC( float, ldb * nrhs );
  piv = LFC_MALLOC(   int, n );

  if (! a || ! b || ! x || ! piv) {
    msg( "Cannot allocate memory." );
    goto end;
  }

  sgenAxb( n, nrhs, a, lda, x, ldb );

  i = LFC_sgesv( LFC_ColMajor, n, nrhs, a, lda, piv, x, ldb, &info );
  
  if (LFC_FAILURE == i) {
    fprintf( stderr, "LFC_sgesv() -> %d\n", info );
    rv = 1;
    goto end;
  }

  sgenAxb( n, nrhs, a, lda, b, ldb );

  BLAS_sgemmNN( n, nrhs, n, -1.0f, a, lda, x, ldb, 1.0f, b, ldb );

  rnrm = 0.0f;
  for (j = 0; j < nrhs; j++)
    for (i = 0; i < n; i++)
      rnrm += fabs( b[i + j*ldb] );

  anrm = 0.0f;
  for (j = 0; j < n; j++)
    for (i = 0; i < n; i++)
      anrm += fabs( a[i + j*lda] );

  fprintf( stderr, "||A(%dx%d)||=%g\n", n, n, (double)anrm );
  fprintf( stderr, "||A(%dx%d)x-b||=%g\n", n, n, (double)rnrm );
  fprintf( stderr, "epsilon=%g\n", (double)seps );

  sres = rnrm / anrm / seps / n;

  fprintf( stderr, "||Ax-b||/(||A|| n epsilon)=%g\n", (double)sres );

  if (sres > 10.0) rv = 1;

  end:

  LFC_FREE( piv ); LFC_FREE( x ); LFC_FREE( b );  LFC_FREE( a );

  return rv;
}	/* sseqslv */

LFC_END_C_DECLS
