PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
timing.c
Go to the documentation of this file.
1 
17 #if defined( _WIN32 ) || defined( _WIN64 )
18 #define int64_t __int64
19 #endif
20 
21 #include <math.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 
26 #ifdef PLASMA_EZTRACE
27 #include <eztrace.h>
28 #endif
29 
30 #if defined( _WIN32 ) || defined( _WIN64 )
31 #include <windows.h>
32 #include <time.h>
33 #include <sys/timeb.h>
34 #if defined(_MSC_VER) || defined(_MSC_EXTENSIONS)
35 #define DELTA_EPOCH_IN_MICROSECS 11644473600000000Ui64
36 #else
37 #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
38 #endif
39 
40 struct timezone
41 {
42  int tz_minuteswest; /* minutes W of Greenwich */
43  int tz_dsttime; /* type of dst correction */
44 };
45 
46 int gettimeofday(struct timeval* tv, struct timezone* tz)
47 {
48  FILETIME ft;
49  unsigned __int64 tmpres = 0;
50  static int tzflag;
51 
52  if (NULL != tv)
53  {
54  GetSystemTimeAsFileTime(&ft);
55  tmpres |= ft.dwHighDateTime;
56  tmpres <<= 32;
57  tmpres |= ft.dwLowDateTime;
58 
59  /*converting file time to unix epoch*/
60  tmpres /= 10; /*convert into microseconds*/
61  tmpres -= DELTA_EPOCH_IN_MICROSECS;
62 
63  tv->tv_sec = (long)(tmpres / 1000000UL);
64  tv->tv_usec = (long)(tmpres % 1000000UL);
65  }
66  if (NULL != tz)
67  {
68  if (!tzflag)
69  {
70  _tzset();
71  tzflag++;
72  }
73  tz->tz_minuteswest = _timezone / 60;
74  tz->tz_dsttime = _daylight;
75  }
76  return 0;
77 }
78 
79 #else /* Non-Windows */
80 #include <unistd.h>
81 #include <sys/time.h>
82 #include <sys/resource.h>
83 #endif
84 
85 #include <cblas.h>
86 #include <lapacke.h>
87 #include <plasma.h>
88 #include <core_blas.h>
89 #include "flops.h"
90 #include "timing.h"
91 #include "auxiliary.h"
92 
93 static int RunTest(int *iparam, _PREC *dparam, real_Double_t *t_);
94 
95 real_Double_t cWtime(void);
96 
97 int ISEED[4] = {0,0,0,1}; /* initial seed for zlarnv() */
98 
99 /*
100  * struct timeval {time_t tv_sec; suseconds_t tv_usec;};
101  */
103 {
104  struct timeval tp;
105  gettimeofday( &tp, NULL );
106  return tp.tv_sec + 1e-6 * tp.tv_usec;
107 }
108 
109 static int
110 Test(int64_t n, int *iparam) {
111  int i, j, iter;
112  int thrdnbr, niter;
113  int64_t M, N, K, NRHS;
114  real_Double_t *t, gflops;
115  _PREC eps = _LAMCH( 'e' );
116  _PREC dparam[IPARAM_DNBPARAM];
117  real_Double_t fmuls, fadds, fp_per_mul, fp_per_add;
118  double sumgf, sumgf2, sumt, sd;
119  char *s;
120  char *env[] = {
121  "OMP_NUM_THREADS",
122  "MKL_NUM_THREADS",
123  "GOTO_NUM_THREADS",
124  "ACML_NUM_THREADS",
125  "ATLAS_NUM_THREADS",
126  "BLAS_NUM_THREADS", ""
127  };
128  int gnuplot = 0;
129 
130  memset( &dparam, 0, IPARAM_DNBPARAM * sizeof(_PREC) );
131 
132  thrdnbr = iparam[IPARAM_THRDNBR];
133  niter = iparam[IPARAM_NITER];
134 
135  M = iparam[IPARAM_M];
136  N = iparam[IPARAM_N];
137  K = iparam[IPARAM_K];
138  NRHS = K;
139  (void)M;(void)N;(void)K;(void)NRHS;
140 
141  if (n < 0 || thrdnbr < 0) {
142  if (iparam[IPARAM_CHECK] )
143  printf( "# N NRHS threads seconds Gflop/s Deviation ||Ax-b|| ||A|| ||x|| ||b||"
144  " eps ||Ax-b||/N/eps/(||A||||x||+||b||)\n" );
145  else
146  printf( "# N NRHS threads seconds Gflop/s Deviation\n" );
147 
148  if (gnuplot) {
149  printf( "set title '%d_NUM_THREADS: ", thrdnbr );
150  for (i = 0; env[i][0]; ++i) {
151  s = getenv( env[i] );
152 
153  if (i) printf( " " ); /* separating space */
154 
155  for (j = 0; j < 5 && env[i][j] && env[i][j] != '_'; ++j)
156  printf( "%c", env[i][j] );
157 
158  if (s)
159  printf( "=%s", s );
160  else
161  printf( "->%s", "?" );
162  }
163  printf( "'\n" );
164  printf( "%s\n%s\n%s\n%s\n%s%s%s\n",
165  "set xlabel 'Matrix size'",
166  "set ylabel 'Gflop/s'",
167  "set key bottom",
168  gnuplot > 1 ? "set terminal png giant\nset output 'timeplot.png'" : "",
169  "plot '-' using 1:5 title '", _NAME, "' with linespoints" );
170  }
171 
172  return 0;
173  }
174 
175  printf( "%5d %4d %5d ", iparam[IPARAM_N], iparam[IPARAM_K], iparam[IPARAM_THRDNBR] );
176  fflush( stdout );
177 
178  t = (real_Double_t*)malloc(niter*sizeof(real_Double_t));
179 
180  if (sizeof(_TYPE) == sizeof(_PREC)) {
181  fp_per_mul = 1;
182  fp_per_add = 1;
183  } else {
184  fp_per_mul = 6;
185  fp_per_add = 2;
186  }
187 
188  fadds = (double)(_FADDS);
189  fmuls = (double)(_FMULS);
190  gflops = 0.0;
191 
192  if ( iparam[IPARAM_WARMUP] ) {
193  RunTest( iparam, dparam, &(t[0]));
194  }
195 
196  sumgf = 0.0;
197  sumgf2 = 0.0;
198  sumt = 0.0;
199 
200  for (iter = 0; iter < niter; iter++)
201  {
202  if( iter == 0 ) {
203  if ( iparam[IPARAM_TRACE] )
204  iparam[IPARAM_TRACE] = 2;
205  if ( iparam[IPARAM_DAG] )
206  iparam[IPARAM_DAG] = 2;
207 
208  RunTest( iparam, dparam, &(t[iter]));
209 
210  iparam[IPARAM_TRACE] = 0;
211  iparam[IPARAM_DAG] = 0;
212  }
213  else
214  RunTest( iparam, dparam, &(t[iter]));
215 
216  gflops = 1e-9 * (fmuls * fp_per_mul + fadds * fp_per_add) / t[iter];
217  sumt += t[iter];
218  sumgf += gflops;
219  sumgf2 += gflops*gflops;
220  }
221 
222  gflops = sumgf/niter;
223  sd = sqrt((sumgf2 - (sumgf*sumgf)/niter)/niter);
224 
225  if ( iparam[IPARAM_CHECK] )
226  printf( "%9.3f %9.2f %9.2f %8.5e %8.5e %8.5e %8.5e %8.5e %8.5e\n",
227  sumt/niter, gflops, sd, dparam[IPARAM_RES], dparam[IPARAM_ANORM], dparam[IPARAM_XNORM], dparam[IPARAM_BNORM], eps,
228  dparam[IPARAM_RES] / n / eps / (dparam[IPARAM_ANORM] * dparam[IPARAM_XNORM] + dparam[IPARAM_BNORM] ));
229  else
230  printf( "%9.3f %9.2f %9.2f\n", sumt/niter, gflops, sd );
231 
232  fflush( stdout );
233  free(t);
234 
235  return 0;
236 }
237 
238 static int
239 startswith(const char *s, const char *prefix) {
240  size_t n = strlen( prefix );
241  if (strncmp( s, prefix, n ))
242  return 0;
243  return 1;
244 }
245 
246 static int
247 get_range(char *range, int *start_p, int *stop_p, int *step_p) {
248  char *s, *s1, buf[21];
249  int colon_count, copy_len, nbuf=20, n;
250  int start=1000, stop=10000, step=1000;
251 
252  colon_count = 0;
253  for (s = strchr( range, ':'); s; s = strchr( s+1, ':'))
254  colon_count++;
255 
256  if (colon_count == 0) { /* No colon in range. */
257  if (sscanf( range, "%d", &start ) < 1 || start < 1)
258  return -1;
259  step = start / 10;
260  if (step < 1) step = 1;
261  stop = start + 10 * step;
262 
263  } else if (colon_count == 1) { /* One colon in range.*/
264  /* First, get the second number (after colon): the stop value. */
265  s = strchr( range, ':' );
266  if (sscanf( s+1, "%d", &stop ) < 1 || stop < 1)
267  return -1;
268 
269  /* Next, get the first number (before colon): the start value. */
270  n = s - range;
271  copy_len = n > nbuf ? nbuf : n;
272  strncpy( buf, range, copy_len );
273  buf[copy_len] = 0;
274  if (sscanf( buf, "%d", &start ) < 1 || start > stop || start < 1)
275  return -1;
276 
277  /* Let's have 10 steps or less. */
278  step = (stop - start) / 10;
279  if (step < 1)
280  step = 1;
281  } else if (colon_count == 2) { /* Two colons in range. */
282  /* First, get the first number (before the first colon): the start value. */
283  s = strchr( range, ':' );
284  n = s - range;
285  copy_len = n > nbuf ? nbuf : n;
286  strncpy( buf, range, copy_len );
287  buf[copy_len] = 0;
288  if (sscanf( buf, "%d", &start ) < 1 || start < 1)
289  return -1;
290 
291  /* Next, get the second number (after the first colon): the stop value. */
292  s1 = strchr( s+1, ':' );
293  n = s1 - (s + 1);
294  copy_len = n > nbuf ? nbuf : n;
295  strncpy( buf, s+1, copy_len );
296  buf[copy_len] = 0;
297  if (sscanf( buf, "%d", &stop ) < 1 || stop < start)
298  return -1;
299 
300  /* Finally, get the third number (after the second colon): the step value. */
301  if (sscanf( s1+1, "%d", &step ) < 1 || step < 1)
302  return -1;
303  } else
304 
305  return -1;
306 
307  *start_p = start;
308  *stop_p = stop;
309  *step_p = step;
310 
311  return 0;
312 }
313 
314 static void
315 show_help(char *prog_name) {
316  printf( "Usage:\n%s [options]\n\n", prog_name );
317  printf( "Options are:\n" );
318  printf( " --threads=C Number of threads (default: 1)\n" );
319  printf( " --n_range=R Range of N values: Start:Stop:Step (default: 500:5000:500)\n" );
320  // printf( " --gnuplot produce output suitable for gnuplot" );
321  printf( " --[no]check Check result (default: nocheck)\n" );
322  printf( " --[no]warmup Perform a warmup run to pre-load libraries (default: warmup)\n");
323  printf( " --niter=N Number of iterations (default: 1)\n");
324  printf( " --nb=N Nb size. Not used if autotuning is activated (default: 128)\n");
325  printf( " --ib=N IB size. Not used if autotuning is activated (default: 32)\n");
326  printf( " --[no]dyn Activate Dynamic scheduling (default: nodyn)\n");
327  printf( " --[no]atun Activate autotuning (default: noatun)\n");
328  printf( " --ifmt Input format. 0: CM, 1: CCRB, 2: CRRB, 3: RCRB, 4: RRRB, 5: RM (default: 0)\n");
329  printf( " --ofmt Output format. 0: CM, 1: CCRB, 2: CRRB, 3: RCRB, 4: RRRB, 5: RM (default: 1)\n");
330  printf( " --thrdbypb Number of threads per subproblem for inplace transformation (default: 1)\n");
331 }
332 static void
333 get_thread_count(int *thrdnbr) {
334 #if defined WIN32 || defined WIN64
335  sscanf( getenv( "NUMBER_OF_PROCESSORS" ), "%d", thrdnbr );
336 #else
337  *thrdnbr = sysconf(_SC_NPROCESSORS_ONLN);
338 #endif
339 }
340 
341 int
342 main(int argc, char *argv[]) {
343  int i, m, mx, nx;
344  int start = 500;
345  int stop = 5000;
346  int step = 500;
347  int iparam[IPARAM_SIZEOF];
348 
349  iparam[IPARAM_THRDNBR ] = 1;
350  iparam[IPARAM_THRDNBR_SUBGRP] = 1;
351  iparam[IPARAM_SCHEDULER ] = 0;
352  iparam[IPARAM_M ] = -1;
353  iparam[IPARAM_N ] = 500;
354  iparam[IPARAM_K ] = 1;
355  iparam[IPARAM_LDA ] = 500;
356  iparam[IPARAM_LDB ] = 500;
357  iparam[IPARAM_LDC ] = 500;
358  iparam[IPARAM_MB ] = 128;
359  iparam[IPARAM_NB ] = 128;
360  iparam[IPARAM_IB ] = 32;
361  iparam[IPARAM_NITER ] = 1;
362  iparam[IPARAM_WARMUP ] = 1;
363  iparam[IPARAM_CHECK ] = 0;
364  iparam[IPARAM_VERBOSE ] = 0;
365  iparam[IPARAM_AUTOTUNING ] = 0;
366  iparam[IPARAM_INPUTFMT ] = 0;
367  iparam[IPARAM_OUTPUTFMT ] = 0;
368  iparam[IPARAM_TRACE ] = 0;
369  iparam[IPARAM_DAG ] = 0;
370  iparam[IPARAM_ASYNC ] = 1;
371  iparam[IPARAM_MX ] = -1;
372  iparam[IPARAM_NX ] = -1;
373  iparam[IPARAM_RHBLK ] = 0;
374  iparam[IPARAM_MX ] = -1;
375  iparam[IPARAM_NX ] = -1;
376  iparam[IPARAM_RHBLK ] = 0;
377 
378  get_thread_count( &(iparam[IPARAM_THRDNBR]) );
379 
380  for (i = 1; i < argc && argv[i]; ++i) {
381  if (startswith( argv[i], "--help" )) {
382  show_help( argv[0] );
383  return EXIT_SUCCESS;
384  } else if (startswith( argv[i], "--n_range=" )) {
385  get_range( strchr( argv[i], '=' ) + 1, &start, &stop, &step );
386  } else if (startswith( argv[i], "--threads=" )) {
387  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_THRDNBR]) );
388  /* } else if (startswith( argv[i], "--gnuplot-png" )) { */
389  /* gnuplot = 2; */
390  /* } else if (startswith( argv[i], "--gnuplot" )) { */
391  /* gnuplot = 1; */
392  } else if (startswith( argv[i], "--check" )) {
393  iparam[IPARAM_CHECK] = 1;
394  } else if (startswith( argv[i], "--nocheck" )) {
395  iparam[IPARAM_CHECK] = 0;
396  } else if (startswith( argv[i], "--warmup" )) {
397  iparam[IPARAM_WARMUP] = 1;
398  } else if (startswith( argv[i], "--nowarmup" )) {
399  iparam[IPARAM_WARMUP] = 0;
400  } else if (startswith( argv[i], "--dyn" )) {
401  iparam[IPARAM_SCHEDULER] = 1;
402  } else if (startswith( argv[i], "--nodyn" )) {
403  iparam[IPARAM_SCHEDULER] = 0;
404  } else if (startswith( argv[i], "--atun" )) {
405  iparam[IPARAM_AUTOTUNING] = 1;
406  } else if (startswith( argv[i], "--noatun" )) {
407  iparam[IPARAM_AUTOTUNING] = 0;
408  } else if (startswith( argv[i], "--trace" )) {
409  iparam[IPARAM_TRACE] = 1;
410  } else if (startswith( argv[i], "--notrace" )) {
411  iparam[IPARAM_TRACE] = 0;
412  } else if (startswith( argv[i], "--dag" )) {
413  iparam[IPARAM_DAG] = 1;
414  } else if (startswith( argv[i], "--nodag" )) {
415  iparam[IPARAM_DAG] = 0;
416  } else if (startswith( argv[i], "--sync" )) {
417  iparam[IPARAM_ASYNC] = 0;
418  } else if (startswith( argv[i], "--async" )) {
419  iparam[IPARAM_ASYNC] = 1;
420  } else if (startswith( argv[i], "--m=" )) {
421  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_M]) );
422  } else if (startswith( argv[i], "--nb=" )) {
423  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_NB]) );
424  iparam[IPARAM_MB] = iparam[IPARAM_NB];
425  } else if (startswith( argv[i], "--nrhs=" )) {
426  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_K]) );
427  } else if (startswith( argv[i], "--ib=" )) {
428  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_IB]) );
429  } else if (startswith( argv[i], "--ifmt=" )) {
430  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_INPUTFMT]) );
431  } else if (startswith( argv[i], "--ofmt=" )) {
432  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_OUTPUTFMT]) );
433  } else if (startswith( argv[i], "--thrdbypb=" )) {
434  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_THRDNBR_SUBGRP]) );
435  } else if (startswith( argv[i], "--niter=" )) {
436  sscanf( strchr( argv[i], '=' ) + 1, "%d", &iparam[IPARAM_NITER] );
437  } else if (startswith( argv[i], "--mx=" )) {
438  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_MX]) );
439  } else if (startswith( argv[i], "--nx=" )) {
440  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_NX]) );
441  } else if (startswith( argv[i], "--rhblk=" )) {
442  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_RHBLK]) );
443  } else if (startswith( argv[i], "--mx=" )) {
444  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_MX]) );
445  } else if (startswith( argv[i], "--nx=" )) {
446  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_NX]) );
447  } else if (startswith( argv[i], "--rhblk=" )) {
448  sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_RHBLK]) );
449  } else {
450  fprintf( stderr, "Unknown option: %s\n", argv[i] );
451  }
452  }
453 
454  m = iparam[IPARAM_M];
455  mx = iparam[IPARAM_MX];
456  nx = iparam[IPARAM_NX];
457 
458  /* Initialize Plasma */
459  PLASMA_Init( iparam[IPARAM_THRDNBR] );
460 
461  if ( iparam[IPARAM_SCHEDULER] )
463  else
465 
466  /* if ( !iparam[IPARAM_AUTOTUNING] ) { */
470  /* } else { */
471  /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[IPARAM_NB] ); */
472  /* PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[IPARAM_IB] ); */
473  /* } */
474 
475  /* Householder mode */
476  if (iparam[IPARAM_RHBLK] < 1) {
478  } else {
481  }
482 
483  if (step < 1) step = 1;
484 
485  Test( -1, iparam ); /* print header */
486  for (i = start; i <= stop; i += step)
487  {
488  if ( nx > 0 ) {
489  iparam[IPARAM_M] = i;
490  iparam[IPARAM_N] = max(1, i/nx);
491  } else if ( mx > 0 ) {
492  iparam[IPARAM_M] = max(1, i/mx);
493  iparam[IPARAM_N] = i;
494  } else {
495  if ( m == -1 )
496  iparam[IPARAM_M] = i;
497  iparam[IPARAM_N] = i;
498  }
499  Test( iparam[IPARAM_N], iparam );
500  }
501 
502  PLASMA_Finalize();
503 
504  /* if (gnuplot) { */
505  /* printf( "%s\n%s\n", */
506  /* "e", */
507  /* gnuplot > 1 ? "" : "pause 10" ); */
508  /* } */
509 
510  return EXIT_SUCCESS;
511 }