#include "kflops.h"
#include <stdio.h>
#include <math.h>
#include <sys/time.h>
#include <sys/resource.h>

Include dependency graph for kflops.c:

Functions
static int	idamax ()
static REAL	ddot ()
static void	daxpy (int n, da, dx, int incx, dy, int incy)
static void	matgen (a, int lda, int n, b,*norma)
static void	dgesl (a, int lda, int n, ipvt, b, int job)
static REAL	ddot (int n, dx, int incx, dy, int incy)
static void	dscal (int n, REAL da, dx, int incx)
static void	dgefa (a, int lda, int n, ipvt, int *info)
static int	idamax (int n, dx, int incx)
static void	dmxpy (int n1, y, int n2, int ldm, x, m)
static REAL	second ()
int	kflops ()
Variables
struct rusage	ru
static REAL	my_time [9][9]

Detailed Description

This file contains a kflops estimator inherited from earlier ICL projects

Definition in file kflops.c.

Function Documentation

static void daxpy	(	int	n,
			da,
		dx	,
		int	incx,
		dy	,
		int	incy
	)			`[static]`

Definition at line 59 of file kflops.c.

{
  int i, ix, iy;

  if(n <= 0)
    return;
  if(da == ZERO)
    return;

  if(incx != 1 || incy != 1) {

    /* code for unequal increments or equal increments not equal to 1 */

    ix = 0;
    iy = 0;
    if(incx < 0)
      ix = (-n + 1) * incx;
    if(incy < 0)
      iy = (-n + 1) * incy;
    for(i = 0; i < n; i++) {
      dy[iy] = dy[iy] + da * dx[ix];
      ix = ix + incx;
      iy = iy + incy;
    }
    return;
  }

  /* code for both increments equal to 1 */

#ifdef ROLL
  for(i = 0; i < n; i++) {
    dy[i] = dy[i] + da * dx[i];
  }
#endif
#ifdef UNROLL

  m = n % 4;
  if(m != 0) {
    for(i = 0; i < m; i++)
      dy[i] = dy[i] + da * dx[i];
    if(n < 4)
      return;
  }
  for(i = m; i < n; i = i + 4) {
    dy[i] = dy[i] + da * dx[i];
    dy[i + 1] = dy[i + 1] + da * dx[i + 1];
    dy[i + 2] = dy[i + 2] + da * dx[i + 2];
    dy[i + 3] = dy[i + 3] + da * dx[i + 3];
  }
#endif
}

Here is the caller graph for this function:

static REAL ddot ( ) [static]

Here is the caller graph for this function:

static REAL ddot	(	int	n,
		dx	,
		int	incx,
		dy	,
		int	incy
	)			`[static]`

Definition at line 273 of file kflops.c.

{
  REAL dtemp;
  int i, ix, iy;

  dtemp = ZERO;

  if(n <= 0)
    return (ZERO);

  if(incx != 1 || incy != 1) {

    /* code for unequal increments or equal increments not equal to 1 */

    ix = 0;
    iy = 0;
    if(incx < 0)
      ix = (-n + 1) * incx;
    if(incy < 0)
      iy = (-n + 1) * incy;
    for(i = 0; i < n; i++) {
      dtemp = dtemp + dx[ix] * dy[iy];
      ix = ix + incx;
      iy = iy + incy;
    }
    return (dtemp);
  }

  /* code for both increments equal to 1 */

#ifdef ROLL
  for(i = 0; i < n; i++)
    dtemp = dtemp + dx[i] * dy[i];
  return (dtemp);
#endif
#ifdef UNROLL

  m = n % 5;
  if(m != 0) {
    for(i = 0; i < m; i++)
      dtemp = dtemp + dx[i] * dy[i];
    if(n < 5)
      return (dtemp);
  }
  for(i = m; i < n; i = i + 5) {
    dtemp = dtemp + dx[i] * dy[i] +
        dx[i + 1] * dy[i + 1] + dx[i + 2] * dy[i + 2] +
        dx[i + 3] * dy[i + 3] + dx[i + 4] * dy[i + 4];
  }
  return (dtemp);
#endif
}

static void dgefa	(	a	,
		int	lda,
		int	n,
		ipvt	,
		int *	info
	)			`[static]`

Definition at line 390 of file kflops.c.

{
  /* internal variables */

  REAL t;
  int j, k, kp1, l, nm1;

  /* gaussian elimination with partial pivoting */

  *info = 0;
  nm1 = n - 1;
  if(nm1 >= 0) {
    for(k = 0; k < nm1; k++) {
      kp1 = k + 1;

      /* find l = pivot index */

      l = idamax(n - k, &a[lda * k + k], 1) + k;
      ipvt[k] = l;

      /* zero pivot implies this column already triangularized */

      if(a[lda * k + l] != ZERO) {

        /* interchange if necessary */

        if(l != k) {
          t = a[lda * k + l];
          a[lda * k + l] = a[lda * k + k];
          a[lda * k + k] = t;
        }

        /* compute multipliers */

        t = -ONE / a[lda * k + k];
        dscal(n - (k + 1), t, &a[lda * k + k + 1], 1);

        /* row elimination with column indexing */

        for(j = kp1; j < n; j++) {
          t = a[lda * j + l];
          if(l != k) {
            a[lda * j + l] = a[lda * j + k];
            a[lda * j + k] = t;
          }
          daxpy(n - (k + 1), t, &a[lda * k + k + 1], 1,
                &a[lda * j + k + 1], 1);
        }
      }
      else {
        *info = k;
      }
    }
  }
  ipvt[n - 1] = n - 1;
  if(a[lda * (n - 1) + (n - 1)] == ZERO)
    *info = n - 1;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void dgesl	(	a	,
		int	lda,
		int	n,
		ipvt	,
		b	,
		int	job
	)			`[static]`

Definition at line 159 of file kflops.c.

                                                       { for (j=0,j<p,j++)
   dgesl(a,lda,n,ipvt,c[j][0],0); }

   linpack. this version dated 08/14/78 . cleve moler, university of new
   mexico, argonne national lab.

   functions

   blas daxpy,ddot */
{
  /* internal variables */

  REAL t;
  int k, kb, l, nm1;

  nm1 = n - 1;
  if(job == 0) {

    /* job = 0 , solve a * x = b first solve l*y = b */

    if(nm1 >= 1) {
      for(k = 0; k < nm1; k++) {
        l = ipvt[k];
        t = b[l];
        if(l != k) {
          b[l] = b[k];
          b[k] = t;
        }
        daxpy(n - (k + 1), t, &a[lda * k + k + 1], 1, &b[k + 1], 1);
      }
    }

    /* now solve u*x = y */

    for(kb = 0; kb < n; kb++) {
      k = n - (kb + 1);
      b[k] = b[k] / a[lda * k + k];
      t = -b[k];
      daxpy(k, t, &a[lda * k + 0], 1, &b[0], 1);
    }
  }
  else {

    /* job = nonzero, solve trans(a) * x = b first solve trans(u)*y = b */

    for(k = 0; k < n; k++) {
      t = ddot(k, &a[lda * k + 0], 1, &b[0], 1);
      b[k] = (b[k] - t) / a[lda * k + k];
    }

    /* now solve trans(l)*x = y */

    if(nm1 >= 1) {
      for(kb = 1; kb < nm1; kb++) {
        k = n - (kb + 1);
        b[k] = b[k] + ddot(n - (k + 1), &a[lda * k + k + 1], 1, &b[k + 1], 1);
        l = ipvt[k];
        if(l != k) {
          t = b[l];
          b[l] = b[k];
          b[k] = t;
        }
      }
    }
  }
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void dmxpy	(	int	n1,
		y	,
		int	n2,
		int	ldm,
		x	,
		m
	)			`[static]`

Definition at line 549 of file kflops.c.

          : multiply matrix m times vector x and add the result to vector y.

   parameters:

   n1 integer, number of elements in vector y, and number of rows in matrix m

   y double [n1], vector of length n1 to which is added the product m*x

   n2 integer, number of elements in vector x, and number of columns in
   matrix m

   ldm integer, leading dimension of array m

   x double [n2], vector of length n2

   m double [ldm][n2], matrix of n1 rows and n2 columns

   ---------------------------------------------------------------------- */
{
  int j, i, jmin;
  /* cleanup odd vector */

  j = n2 % 2;
  if(j >= 1) {
    j = j - 1;
    for(i = 0; i < n1; i++)
      y[i] = (y[i]) + x[j] * m[ldm * j + i];
  }

  /* cleanup odd group of two vectors */

  j = n2 % 4;
  if(j >= 2) {
    j = j - 1;
    for(i = 0; i < n1; i++)
      y[i] = ((y[i])
              + x[j - 1] * m[ldm * (j - 1) + i]) + x[j] * m[ldm * j + i];
  }

  /* cleanup odd group of four vectors */

  j = n2 % 8;
  if(j >= 4) {
    j = j - 1;
    for(i = 0; i < n1; i++)
      y[i] = ((((y[i])
                + x[j - 3] * m[ldm * (j - 3) + i])
               + x[j - 2] * m[ldm * (j - 2) + i])
              + x[j - 1] * m[ldm * (j - 1) + i]) + x[j] * m[ldm * j + i];
  }

  /* cleanup odd group of eight vectors */

  j = n2 % 16;
  if(j >= 8) {
    j = j - 1;
    for(i = 0; i < n1; i++)
      y[i] = ((((((((y[i])
                    + x[j - 7] * m[ldm * (j - 7) + i]) + x[j -
                                                           6] * m[ldm * (j -
                                                                         6) +
                                                                  i])
                  + x[j - 5] * m[ldm * (j - 5) + i]) + x[j - 4] * m[ldm * (j -
                                                                           4)
                                                                    + i])
                + x[j - 3] * m[ldm * (j - 3) + i]) + x[j - 2] * m[ldm * (j -
                                                                         2) +
                                                                  i])
              + x[j - 1] * m[ldm * (j - 1) + i]) + x[j] * m[ldm * j + i];
  }

  /* main loop - groups of sixteen vectors */

  jmin = (n2 % 16) + 16;
  for(j = jmin - 1; j < n2; j = j + 16) {
    for(i = 0; i < n1; i++)
      y[i] = ((((((((((((((((y[i])
                            + x[j - 15] * m[ldm * (j - 15) + i])
                           + x[j - 14] * m[ldm * (j - 14) + i])
                          + x[j - 13] * m[ldm * (j - 13) + i])
                         + x[j - 12] * m[ldm * (j - 12) + i])
                        + x[j - 11] * m[ldm * (j - 11) + i])
                       + x[j - 10] * m[ldm * (j - 10) + i])
                      + x[j - 9] * m[ldm * (j - 9) + i])
                     + x[j - 8] * m[ldm * (j - 8) + i])
                    + x[j - 7] * m[ldm * (j - 7) + i])
                   + x[j - 6] * m[ldm * (j - 6) + i])
                  + x[j - 5] * m[ldm * (j - 5) + i])
                 + x[j - 4] * m[ldm * (j - 4) + i])
                + x[j - 3] * m[ldm * (j - 3) + i])
               + x[j - 2] * m[ldm * (j - 2) + i])
              + x[j - 1] * m[ldm * (j - 1) + i])
          + x[j] * m[ldm * j + i];
  }
}

Here is the caller graph for this function:

static void dscal	(	int	n,
		REAL	da,
		dx	,
		int	incx
	)			`[static]`

Definition at line 337 of file kflops.c.

{
  int i, nincx;

  if(n <= 0)
    return;
  if(incx != 1) {

    /* code for increment not equal to 1 */

    nincx = n * incx;
    for(i = 0; i < nincx; i = i + incx)
      dx[i] = da * dx[i];
    return;
  }

  /* code for increment equal to 1 */

#ifdef ROLL
  for(i = 0; i < n; i++)
    dx[i] = da * dx[i];
#endif
#ifdef UNROLL

  m = n % 5;
  if(m != 0) {
    for(i = 0; i < m; i++)
      dx[i] = da * dx[i];
    if(n < 5)
      return;
  }
  for(i = m; i < n; i = i + 5) {
    dx[i] = da * dx[i];
    dx[i + 1] = da * dx[i + 1];
    dx[i + 2] = da * dx[i + 2];
    dx[i + 3] = da * dx[i + 3];
    dx[i + 4] = da * dx[i + 4];
  }
#endif

}

Here is the caller graph for this function:

static int idamax ( ) [static]

Here is the caller graph for this function:

static int idamax	(	int	n,
		dx	,
		int	incx
	)			`[static]`

Definition at line 495 of file kflops.c.

{
  REAL dmax;
  int i, ix, itemp;

  if(n < 1)
    return (-1);
  if(n == 1)
    return (0);
  if(incx != 1) {

    /* code for increment not equal to 1 */

    ix = 1;
    itemp = 0;
    dmax = fabs((double) dx[0]);
    ix = ix + incx;
    for(i = 1; i < n; i++) {
      if(fabs((double) dx[ix]) > dmax) {
        itemp = i;
        dmax = fabs((double) dx[ix]);
      }
      ix = ix + incx;
    }
  }
  else {

    /* code for increment equal to 1 */

    itemp = 0;
    dmax = fabs((double) dx[0]);
    for(i = 1; i < n; i++) {
      if(fabs((double) dx[i]) > dmax) {
        itemp = i;
        dmax = fabs((double) dx[i]);
      }
    }
  }
  return (itemp);
}

int kflops ( )

Definition at line 692 of file kflops.c.

{
  static REAL aa[SIZE][SIZE], a[SIZE][SIZE + 1], b[SIZE], x[SIZE];
  REAL Newcray, ops, total, norma, normx;
  REAL resid, eps, t1, tm, tm2;
  REAL kflops_epslon(), second(), kf;
  static int ipvt[SIZE], n, i, ntimes, info, lda, ldaa, kflops;

  lda = SIZE + 1;
  ldaa = SIZE;
  Newcray = .056;
  n = SIZE/2;

  ops = (2.0e0 * (n * n * n)) / 3.0 + 2.0 * (n * n);

  matgen((REAL *) a, lda, n, b, &norma);
  t1 = second();
  dgefa((REAL *) a, lda, n, ipvt, &info);
  my_time[0][0] = second() - t1;
  t1 = second();
  dgesl((REAL *) a, lda, n, ipvt, b, 0);
  my_time[1][0] = second() - t1;
  total = my_time[0][0] + my_time[1][0];

  /* compute a residual to verify results.  */

  for(i = 0; i < n; i++) {
    x[i] = b[i];
  }
  matgen((REAL *) a, lda, n, b, &norma);
  for(i = 0; i < n; i++) {
    b[i] = -b[i];
  }
  dmxpy(n, b, n, lda, x, (REAL *) a);
  resid = 0.0;
  normx = 0.0;
  for(i = 0; i < n; i++) {
    resid = (resid > fabs((double) b[i]))
        ? resid : fabs((double) b[i]);
    normx = (normx > fabs((double) x[i]))
        ? normx : fabs((double) x[i]);
  }
  eps = kflops_epslon((REAL) ONE);
  /* residn = resid/( n*norma*normx*eps ); */

  my_time[2][0] = total;
  my_time[3][0] = ops / (1.0e3 * total);
  my_time[4][0] = 2.0e3 / my_time[3][0];
  my_time[5][0] = total / Newcray;

  matgen((REAL *) a, lda, n, b, &norma);
  t1 = second();
  dgefa((REAL *) a, lda, n, ipvt, &info);
  my_time[0][1] = second() - t1;
  t1 = second();
  dgesl((REAL *) a, lda, n, ipvt, b, 0);
  my_time[1][1] = second() - t1;
  total = my_time[0][1] + my_time[1][1];
  my_time[2][1] = total;
  my_time[3][1] = ops / (1.0e3 * total);
  my_time[4][1] = 2.0e3 / my_time[3][1];
  my_time[5][1] = total / Newcray;

  matgen((REAL *) a, lda, n, b, &norma);
  t1 = second();
  dgefa((REAL *) a, lda, n, ipvt, &info);
  my_time[0][2] = second() - t1;
  t1 = second();
  dgesl((REAL *) a, lda, n, ipvt, b, 0);
  my_time[1][2] = second() - t1;
  total = my_time[0][2] + my_time[1][2];
  my_time[2][2] = total;
  my_time[3][2] = ops / (1.0e3 * total);
  my_time[4][2] = 2.0e3 / my_time[3][2];
  my_time[5][2] = total / Newcray;

  ntimes = NTIMES;
  tm2 = 0.0;
  t1 = second();

  for(i = 0; i < ntimes; i++) {
    tm = second();
    matgen((REAL *) a, lda, n, b, &norma);
    tm2 = tm2 + second() - tm;
    dgefa((REAL *) a, lda, n, ipvt, &info);
  }

  my_time[0][3] = (second() - t1 - tm2) / ntimes;
  t1 = second();

  for(i = 0; i < ntimes; i++) {
    dgesl((REAL *) a, lda, n, ipvt, b, 0);
  }

  my_time[1][3] = (second() - t1) / ntimes;
  total = my_time[0][3] + my_time[1][3];
  my_time[2][3] = total;
  my_time[3][3] = ops / (1.0e3 * total);
  my_time[4][3] = 2.0e3 / my_time[3][3];
  my_time[5][3] = total / Newcray;

  matgen((REAL *) aa, ldaa, n, b, &norma);
  t1 = second();
  dgefa((REAL *) aa, ldaa, n, ipvt, &info);
  my_time[0][4] = second() - t1;
  t1 = second();
  dgesl((REAL *) aa, ldaa, n, ipvt, b, 0);
  my_time[1][4] = second() - t1;
  total = my_time[0][4] + my_time[1][4];
  my_time[2][4] = total;
  my_time[3][4] = ops / (1.0e3 * total);
  my_time[4][4] = 2.0e3 / my_time[3][4];
  my_time[5][4] = total / Newcray;

  matgen((REAL *) aa, ldaa, n, b, &norma);
  t1 = second();
  dgefa((REAL *) aa, ldaa, n, ipvt, &info);
  my_time[0][5] = second() - t1;
  t1 = second();
  dgesl((REAL *) aa, ldaa, n, ipvt, b, 0);
  my_time[1][5] = second() - t1;
  total = my_time[0][5] + my_time[1][5];
  my_time[2][5] = total;
  my_time[3][5] = ops / (1.0e3 * total);
  my_time[4][5] = 2.0e3 / my_time[3][5];
  my_time[5][5] = total / Newcray;

  matgen((REAL *) aa, ldaa, n, b, &norma);
  t1 = second();
  dgefa((REAL *) aa, ldaa, n, ipvt, &info);
  my_time[0][6] = second() - t1;
  t1 = second();
  dgesl((REAL *) aa, ldaa, n, ipvt, b, 0);
  my_time[1][6] = second() - t1;
  total = my_time[0][6] + my_time[1][6];
  my_time[2][6] = total;
  my_time[3][6] = ops / (1.0e3 * total);
  my_time[4][6] = 2.0e3 / my_time[3][6];
  my_time[5][6] = total / Newcray;

  ntimes = NTIMES;
  tm2 = 0;
  t1 = second();
  for(i = 0; i < ntimes; i++) {
    tm = second();
    matgen((REAL *) aa, ldaa, n, b, &norma);
    tm2 = tm2 + second() - tm;
    dgefa((REAL *) aa, ldaa, n, ipvt, &info);
  }
  my_time[0][7] = (second() - t1 - tm2) / ntimes;
  t1 = second();
  for(i = 0; i < ntimes; i++) {
    dgesl((REAL *) aa, ldaa, n, ipvt, b, 0);
  }
  my_time[1][7] = (second() - t1) / ntimes;
  total = my_time[0][7] + my_time[1][7];

  my_time[2][7] = total;
  my_time[3][7] = ops / (1.0e3 * total);
  my_time[4][7] = 2.0e3 / my_time[3][7];
  my_time[5][7] = total / Newcray;

  /* the following code sequence implements the semantics of the Fortran
     intrinsics "nint(min(my_time[3][3],my_time[3][7]))" */

  kf = (my_time[3][3] < my_time[3][7]) ? my_time[3][3] : my_time[3][7];
  kf = (kf > ZERO) ? (kf + .5) : (kf - .5);
  if(fabs((double) kf) < ONE)
    kflops = 0;
  else {
    kflops = (int) fabs((double) kf);
    if(kf < ZERO)
      kflops = -kflops;
  }

  return kflops;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void matgen	(	a	,
		int	lda,
		int	n,
		b	,
		*	norma
	)			`[static]`

Definition at line 122 of file kflops.c.

{
  int init, i, j;

  init = 1325;
  *norma = 0.0;
  for(j = 0; j < n; j++) {
    for(i = 0; i < n; i++) {
      init = 3125 * init % 65536;
      a[lda * j + i] = (init - 32768.0) / 16384.0;
      *norma = (a[lda * j + i] > *norma) ? a[lda * j + i] : *norma;
    }
  }
  for(i = 0; i < n; i++) {
    b[i] = 0.0;
  }
  for(j = 0; j < n; j++) {
    for(i = 0; i < n; i++) {
      b[i] = b[i] + a[lda * j + i];
    }
  }
}

Here is the caller graph for this function:

static REAL second ( ) [static]

Definition at line 659 of file kflops.c.

{
  REAL t;

#ifndef MPP                     /* Workstations */


#ifndef NORUSAGE
  getrusage(RUSAGE_SELF, &ru);
#else                           /* HPs */
  if(ClockTick == 0.0)
    ClockTick = (REAL) sysconf(_SC_CLK_TCK);
  if(times(&ru) == -1)
    fprintf(stderr, "second() : Oups\n");
#endif


#ifndef NORUSAGE
  t = (REAL) (ru.ru_utime.tv_sec) + ((REAL) (ru.ru_utime.tv_usec)) / 1.0e6;
#else
  t = (REAL) ((REAL) ru.tms_utime / ClockTick);
#endif
#else                           /* MPPs !! */

#endif                          /* MPP */

  return t;
}

Here is the call graph for this function:

Here is the caller graph for this function:

Variable Documentation

REAL my_time[9][9] [static]

Definition at line 48 of file kflops.c.

struct rusage ru

Definition at line 40 of file kflops.c.

kflops.c File Reference

Functions

Variables

Detailed Description

Function Documentation

Variable Documentation