#include "kflops.h"#include <stdio.h>#include <math.h>#include <sys/time.h>#include <sys/resource.h>
Go to the source code of this file.
Functions | |
| static int | idamax () |
| static REAL | ddot () |
| static void | daxpy (int n, da, dx, int incx, dy, int incy) |
| static void | matgen (a, int lda, int n, b,*norma) |
| static void | dgesl (a, int lda, int n, ipvt, b, int job) |
| static REAL | ddot (int n, dx, int incx, dy, int incy) |
| static void | dscal (int n, REAL da, dx, int incx) |
| static void | dgefa (a, int lda, int n, ipvt, int *info) |
| static int | idamax (int n, dx, int incx) |
| static void | dmxpy (int n1, y, int n2, int ldm, x, m) |
| static REAL | second () |
| int | kflops () |
Variables | |
| struct rusage | ru |
| static REAL | my_time [9][9] |
This file contains a kflops estimator inherited from earlier ICL projects
Definition in file kflops.c.
| static void daxpy | ( | int | n, | |
| da, | ||||
| dx | , | |||
| int | incx, | |||
| dy | , | |||
| int | incy | |||
| ) | [static] |
Definition at line 59 of file kflops.c.
{
int i, ix, iy;
if(n <= 0)
return;
if(da == ZERO)
return;
if(incx != 1 || incy != 1) {
/* code for unequal increments or equal increments not equal to 1 */
ix = 0;
iy = 0;
if(incx < 0)
ix = (-n + 1) * incx;
if(incy < 0)
iy = (-n + 1) * incy;
for(i = 0; i < n; i++) {
dy[iy] = dy[iy] + da * dx[ix];
ix = ix + incx;
iy = iy + incy;
}
return;
}
/* code for both increments equal to 1 */
#ifdef ROLL
for(i = 0; i < n; i++) {
dy[i] = dy[i] + da * dx[i];
}
#endif
#ifdef UNROLL
m = n % 4;
if(m != 0) {
for(i = 0; i < m; i++)
dy[i] = dy[i] + da * dx[i];
if(n < 4)
return;
}
for(i = m; i < n; i = i + 4) {
dy[i] = dy[i] + da * dx[i];
dy[i + 1] = dy[i + 1] + da * dx[i + 1];
dy[i + 2] = dy[i + 2] + da * dx[i + 2];
dy[i + 3] = dy[i + 3] + da * dx[i + 3];
}
#endif
}

| static REAL ddot | ( | ) | [static] |

| static REAL ddot | ( | int | n, | |
| dx | , | |||
| int | incx, | |||
| dy | , | |||
| int | incy | |||
| ) | [static] |
Definition at line 273 of file kflops.c.
{
REAL dtemp;
int i, ix, iy;
dtemp = ZERO;
if(n <= 0)
return (ZERO);
if(incx != 1 || incy != 1) {
/* code for unequal increments or equal increments not equal to 1 */
ix = 0;
iy = 0;
if(incx < 0)
ix = (-n + 1) * incx;
if(incy < 0)
iy = (-n + 1) * incy;
for(i = 0; i < n; i++) {
dtemp = dtemp + dx[ix] * dy[iy];
ix = ix + incx;
iy = iy + incy;
}
return (dtemp);
}
/* code for both increments equal to 1 */
#ifdef ROLL
for(i = 0; i < n; i++)
dtemp = dtemp + dx[i] * dy[i];
return (dtemp);
#endif
#ifdef UNROLL
m = n % 5;
if(m != 0) {
for(i = 0; i < m; i++)
dtemp = dtemp + dx[i] * dy[i];
if(n < 5)
return (dtemp);
}
for(i = m; i < n; i = i + 5) {
dtemp = dtemp + dx[i] * dy[i] +
dx[i + 1] * dy[i + 1] + dx[i + 2] * dy[i + 2] +
dx[i + 3] * dy[i + 3] + dx[i + 4] * dy[i + 4];
}
return (dtemp);
#endif
}
| static void dgefa | ( | a | , | |
| int | lda, | |||
| int | n, | |||
| ipvt | , | |||
| int * | info | |||
| ) | [static] |
Definition at line 390 of file kflops.c.
{
/* internal variables */
REAL t;
int j, k, kp1, l, nm1;
/* gaussian elimination with partial pivoting */
*info = 0;
nm1 = n - 1;
if(nm1 >= 0) {
for(k = 0; k < nm1; k++) {
kp1 = k + 1;
/* find l = pivot index */
l = idamax(n - k, &a[lda * k + k], 1) + k;
ipvt[k] = l;
/* zero pivot implies this column already triangularized */
if(a[lda * k + l] != ZERO) {
/* interchange if necessary */
if(l != k) {
t = a[lda * k + l];
a[lda * k + l] = a[lda * k + k];
a[lda * k + k] = t;
}
/* compute multipliers */
t = -ONE / a[lda * k + k];
dscal(n - (k + 1), t, &a[lda * k + k + 1], 1);
/* row elimination with column indexing */
for(j = kp1; j < n; j++) {
t = a[lda * j + l];
if(l != k) {
a[lda * j + l] = a[lda * j + k];
a[lda * j + k] = t;
}
daxpy(n - (k + 1), t, &a[lda * k + k + 1], 1,
&a[lda * j + k + 1], 1);
}
}
else {
*info = k;
}
}
}
ipvt[n - 1] = n - 1;
if(a[lda * (n - 1) + (n - 1)] == ZERO)
*info = n - 1;
}


| static void dgesl | ( | a | , | |
| int | lda, | |||
| int | n, | |||
| ipvt | , | |||
| b | , | |||
| int | job | |||
| ) | [static] |
Definition at line 159 of file kflops.c.
{ for (j=0,j<p,j++)
dgesl(a,lda,n,ipvt,c[j][0],0); }
linpack. this version dated 08/14/78 . cleve moler, university of new
mexico, argonne national lab.
functions
blas daxpy,ddot */
{
/* internal variables */
REAL t;
int k, kb, l, nm1;
nm1 = n - 1;
if(job == 0) {
/* job = 0 , solve a * x = b first solve l*y = b */
if(nm1 >= 1) {
for(k = 0; k < nm1; k++) {
l = ipvt[k];
t = b[l];
if(l != k) {
b[l] = b[k];
b[k] = t;
}
daxpy(n - (k + 1), t, &a[lda * k + k + 1], 1, &b[k + 1], 1);
}
}
/* now solve u*x = y */
for(kb = 0; kb < n; kb++) {
k = n - (kb + 1);
b[k] = b[k] / a[lda * k + k];
t = -b[k];
daxpy(k, t, &a[lda * k + 0], 1, &b[0], 1);
}
}
else {
/* job = nonzero, solve trans(a) * x = b first solve trans(u)*y = b */
for(k = 0; k < n; k++) {
t = ddot(k, &a[lda * k + 0], 1, &b[0], 1);
b[k] = (b[k] - t) / a[lda * k + k];
}
/* now solve trans(l)*x = y */
if(nm1 >= 1) {
for(kb = 1; kb < nm1; kb++) {
k = n - (kb + 1);
b[k] = b[k] + ddot(n - (k + 1), &a[lda * k + k + 1], 1, &b[k + 1], 1);
l = ipvt[k];
if(l != k) {
t = b[l];
b[l] = b[k];
b[k] = t;
}
}
}
}
}


| static void dmxpy | ( | int | n1, | |
| y | , | |||
| int | n2, | |||
| int | ldm, | |||
| x | , | |||
| m | ||||
| ) | [static] |
Definition at line 549 of file kflops.c.
: multiply matrix m times vector x and add the result to vector y. parameters: n1 integer, number of elements in vector y, and number of rows in matrix m y double [n1], vector of length n1 to which is added the product m*x n2 integer, number of elements in vector x, and number of columns in matrix m ldm integer, leading dimension of array m x double [n2], vector of length n2 m double [ldm][n2], matrix of n1 rows and n2 columns ---------------------------------------------------------------------- */ { int j, i, jmin; /* cleanup odd vector */ j = n2 % 2; if(j >= 1) { j = j - 1; for(i = 0; i < n1; i++) y[i] = (y[i]) + x[j] * m[ldm * j + i]; } /* cleanup odd group of two vectors */ j = n2 % 4; if(j >= 2) { j = j - 1; for(i = 0; i < n1; i++) y[i] = ((y[i]) + x[j - 1] * m[ldm * (j - 1) + i]) + x[j] * m[ldm * j + i]; } /* cleanup odd group of four vectors */ j = n2 % 8; if(j >= 4) { j = j - 1; for(i = 0; i < n1; i++) y[i] = ((((y[i]) + x[j - 3] * m[ldm * (j - 3) + i]) + x[j - 2] * m[ldm * (j - 2) + i]) + x[j - 1] * m[ldm * (j - 1) + i]) + x[j] * m[ldm * j + i]; } /* cleanup odd group of eight vectors */ j = n2 % 16; if(j >= 8) { j = j - 1; for(i = 0; i < n1; i++) y[i] = ((((((((y[i]) + x[j - 7] * m[ldm * (j - 7) + i]) + x[j - 6] * m[ldm * (j - 6) + i]) + x[j - 5] * m[ldm * (j - 5) + i]) + x[j - 4] * m[ldm * (j - 4) + i]) + x[j - 3] * m[ldm * (j - 3) + i]) + x[j - 2] * m[ldm * (j - 2) + i]) + x[j - 1] * m[ldm * (j - 1) + i]) + x[j] * m[ldm * j + i]; } /* main loop - groups of sixteen vectors */ jmin = (n2 % 16) + 16; for(j = jmin - 1; j < n2; j = j + 16) { for(i = 0; i < n1; i++) y[i] = ((((((((((((((((y[i]) + x[j - 15] * m[ldm * (j - 15) + i]) + x[j - 14] * m[ldm * (j - 14) + i]) + x[j - 13] * m[ldm * (j - 13) + i]) + x[j - 12] * m[ldm * (j - 12) + i]) + x[j - 11] * m[ldm * (j - 11) + i]) + x[j - 10] * m[ldm * (j - 10) + i]) + x[j - 9] * m[ldm * (j - 9) + i]) + x[j - 8] * m[ldm * (j - 8) + i]) + x[j - 7] * m[ldm * (j - 7) + i]) + x[j - 6] * m[ldm * (j - 6) + i]) + x[j - 5] * m[ldm * (j - 5) + i]) + x[j - 4] * m[ldm * (j - 4) + i]) + x[j - 3] * m[ldm * (j - 3) + i]) + x[j - 2] * m[ldm * (j - 2) + i]) + x[j - 1] * m[ldm * (j - 1) + i]) + x[j] * m[ldm * j + i]; } }

| static void dscal | ( | int | n, | |
| REAL | da, | |||
| dx | , | |||
| int | incx | |||
| ) | [static] |
Definition at line 337 of file kflops.c.
{
int i, nincx;
if(n <= 0)
return;
if(incx != 1) {
/* code for increment not equal to 1 */
nincx = n * incx;
for(i = 0; i < nincx; i = i + incx)
dx[i] = da * dx[i];
return;
}
/* code for increment equal to 1 */
#ifdef ROLL
for(i = 0; i < n; i++)
dx[i] = da * dx[i];
#endif
#ifdef UNROLL
m = n % 5;
if(m != 0) {
for(i = 0; i < m; i++)
dx[i] = da * dx[i];
if(n < 5)
return;
}
for(i = m; i < n; i = i + 5) {
dx[i] = da * dx[i];
dx[i + 1] = da * dx[i + 1];
dx[i + 2] = da * dx[i + 2];
dx[i + 3] = da * dx[i + 3];
dx[i + 4] = da * dx[i + 4];
}
#endif
}

| static int idamax | ( | ) | [static] |

| static int idamax | ( | int | n, | |
| dx | , | |||
| int | incx | |||
| ) | [static] |
Definition at line 495 of file kflops.c.
{
REAL dmax;
int i, ix, itemp;
if(n < 1)
return (-1);
if(n == 1)
return (0);
if(incx != 1) {
/* code for increment not equal to 1 */
ix = 1;
itemp = 0;
dmax = fabs((double) dx[0]);
ix = ix + incx;
for(i = 1; i < n; i++) {
if(fabs((double) dx[ix]) > dmax) {
itemp = i;
dmax = fabs((double) dx[ix]);
}
ix = ix + incx;
}
}
else {
/* code for increment equal to 1 */
itemp = 0;
dmax = fabs((double) dx[0]);
for(i = 1; i < n; i++) {
if(fabs((double) dx[i]) > dmax) {
itemp = i;
dmax = fabs((double) dx[i]);
}
}
}
return (itemp);
}
| int kflops | ( | ) |
Definition at line 692 of file kflops.c.
{
static REAL aa[SIZE][SIZE], a[SIZE][SIZE + 1], b[SIZE], x[SIZE];
REAL Newcray, ops, total, norma, normx;
REAL resid, eps, t1, tm, tm2;
REAL kflops_epslon(), second(), kf;
static int ipvt[SIZE], n, i, ntimes, info, lda, ldaa, kflops;
lda = SIZE + 1;
ldaa = SIZE;
Newcray = .056;
n = SIZE/2;
ops = (2.0e0 * (n * n * n)) / 3.0 + 2.0 * (n * n);
matgen((REAL *) a, lda, n, b, &norma);
t1 = second();
dgefa((REAL *) a, lda, n, ipvt, &info);
my_time[0][0] = second() - t1;
t1 = second();
dgesl((REAL *) a, lda, n, ipvt, b, 0);
my_time[1][0] = second() - t1;
total = my_time[0][0] + my_time[1][0];
/* compute a residual to verify results. */
for(i = 0; i < n; i++) {
x[i] = b[i];
}
matgen((REAL *) a, lda, n, b, &norma);
for(i = 0; i < n; i++) {
b[i] = -b[i];
}
dmxpy(n, b, n, lda, x, (REAL *) a);
resid = 0.0;
normx = 0.0;
for(i = 0; i < n; i++) {
resid = (resid > fabs((double) b[i]))
? resid : fabs((double) b[i]);
normx = (normx > fabs((double) x[i]))
? normx : fabs((double) x[i]);
}
eps = kflops_epslon((REAL) ONE);
/* residn = resid/( n*norma*normx*eps ); */
my_time[2][0] = total;
my_time[3][0] = ops / (1.0e3 * total);
my_time[4][0] = 2.0e3 / my_time[3][0];
my_time[5][0] = total / Newcray;
matgen((REAL *) a, lda, n, b, &norma);
t1 = second();
dgefa((REAL *) a, lda, n, ipvt, &info);
my_time[0][1] = second() - t1;
t1 = second();
dgesl((REAL *) a, lda, n, ipvt, b, 0);
my_time[1][1] = second() - t1;
total = my_time[0][1] + my_time[1][1];
my_time[2][1] = total;
my_time[3][1] = ops / (1.0e3 * total);
my_time[4][1] = 2.0e3 / my_time[3][1];
my_time[5][1] = total / Newcray;
matgen((REAL *) a, lda, n, b, &norma);
t1 = second();
dgefa((REAL *) a, lda, n, ipvt, &info);
my_time[0][2] = second() - t1;
t1 = second();
dgesl((REAL *) a, lda, n, ipvt, b, 0);
my_time[1][2] = second() - t1;
total = my_time[0][2] + my_time[1][2];
my_time[2][2] = total;
my_time[3][2] = ops / (1.0e3 * total);
my_time[4][2] = 2.0e3 / my_time[3][2];
my_time[5][2] = total / Newcray;
ntimes = NTIMES;
tm2 = 0.0;
t1 = second();
for(i = 0; i < ntimes; i++) {
tm = second();
matgen((REAL *) a, lda, n, b, &norma);
tm2 = tm2 + second() - tm;
dgefa((REAL *) a, lda, n, ipvt, &info);
}
my_time[0][3] = (second() - t1 - tm2) / ntimes;
t1 = second();
for(i = 0; i < ntimes; i++) {
dgesl((REAL *) a, lda, n, ipvt, b, 0);
}
my_time[1][3] = (second() - t1) / ntimes;
total = my_time[0][3] + my_time[1][3];
my_time[2][3] = total;
my_time[3][3] = ops / (1.0e3 * total);
my_time[4][3] = 2.0e3 / my_time[3][3];
my_time[5][3] = total / Newcray;
matgen((REAL *) aa, ldaa, n, b, &norma);
t1 = second();
dgefa((REAL *) aa, ldaa, n, ipvt, &info);
my_time[0][4] = second() - t1;
t1 = second();
dgesl((REAL *) aa, ldaa, n, ipvt, b, 0);
my_time[1][4] = second() - t1;
total = my_time[0][4] + my_time[1][4];
my_time[2][4] = total;
my_time[3][4] = ops / (1.0e3 * total);
my_time[4][4] = 2.0e3 / my_time[3][4];
my_time[5][4] = total / Newcray;
matgen((REAL *) aa, ldaa, n, b, &norma);
t1 = second();
dgefa((REAL *) aa, ldaa, n, ipvt, &info);
my_time[0][5] = second() - t1;
t1 = second();
dgesl((REAL *) aa, ldaa, n, ipvt, b, 0);
my_time[1][5] = second() - t1;
total = my_time[0][5] + my_time[1][5];
my_time[2][5] = total;
my_time[3][5] = ops / (1.0e3 * total);
my_time[4][5] = 2.0e3 / my_time[3][5];
my_time[5][5] = total / Newcray;
matgen((REAL *) aa, ldaa, n, b, &norma);
t1 = second();
dgefa((REAL *) aa, ldaa, n, ipvt, &info);
my_time[0][6] = second() - t1;
t1 = second();
dgesl((REAL *) aa, ldaa, n, ipvt, b, 0);
my_time[1][6] = second() - t1;
total = my_time[0][6] + my_time[1][6];
my_time[2][6] = total;
my_time[3][6] = ops / (1.0e3 * total);
my_time[4][6] = 2.0e3 / my_time[3][6];
my_time[5][6] = total / Newcray;
ntimes = NTIMES;
tm2 = 0;
t1 = second();
for(i = 0; i < ntimes; i++) {
tm = second();
matgen((REAL *) aa, ldaa, n, b, &norma);
tm2 = tm2 + second() - tm;
dgefa((REAL *) aa, ldaa, n, ipvt, &info);
}
my_time[0][7] = (second() - t1 - tm2) / ntimes;
t1 = second();
for(i = 0; i < ntimes; i++) {
dgesl((REAL *) aa, ldaa, n, ipvt, b, 0);
}
my_time[1][7] = (second() - t1) / ntimes;
total = my_time[0][7] + my_time[1][7];
my_time[2][7] = total;
my_time[3][7] = ops / (1.0e3 * total);
my_time[4][7] = 2.0e3 / my_time[3][7];
my_time[5][7] = total / Newcray;
/* the following code sequence implements the semantics of the Fortran
intrinsics "nint(min(my_time[3][3],my_time[3][7]))" */
kf = (my_time[3][3] < my_time[3][7]) ? my_time[3][3] : my_time[3][7];
kf = (kf > ZERO) ? (kf + .5) : (kf - .5);
if(fabs((double) kf) < ONE)
kflops = 0;
else {
kflops = (int) fabs((double) kf);
if(kf < ZERO)
kflops = -kflops;
}
return kflops;
}


| static void matgen | ( | a | , | |
| int | lda, | |||
| int | n, | |||
| b | , | |||
| * | norma | |||
| ) | [static] |
Definition at line 122 of file kflops.c.
{
int init, i, j;
init = 1325;
*norma = 0.0;
for(j = 0; j < n; j++) {
for(i = 0; i < n; i++) {
init = 3125 * init % 65536;
a[lda * j + i] = (init - 32768.0) / 16384.0;
*norma = (a[lda * j + i] > *norma) ? a[lda * j + i] : *norma;
}
}
for(i = 0; i < n; i++) {
b[i] = 0.0;
}
for(j = 0; j < n; j++) {
for(i = 0; i < n; i++) {
b[i] = b[i] + a[lda * j + i];
}
}
}

| static REAL second | ( | ) | [static] |
Definition at line 659 of file kflops.c.
{
REAL t;
#ifndef MPP /* Workstations */
#ifndef NORUSAGE
getrusage(RUSAGE_SELF, &ru);
#else /* HPs */
if(ClockTick == 0.0)
ClockTick = (REAL) sysconf(_SC_CLK_TCK);
if(times(&ru) == -1)
fprintf(stderr, "second() : Oups\n");
#endif
#ifndef NORUSAGE
t = (REAL) (ru.ru_utime.tv_sec) + ((REAL) (ru.ru_utime.tv_usec)) / 1.0e6;
#else
t = (REAL) ((REAL) ru.tms_utime / ClockTick);
#endif
#else /* MPPs !! */
#endif /* MPP */
return t;
}


1.6.3-20100507