17 #if defined( _WIN32 ) || defined( _WIN64 )
18 #define int64_t __int64
30 #if defined( _WIN32 ) || defined( _WIN64 )
33 #include <sys/timeb.h>
34 #if defined(_MSC_VER) || defined(_MSC_EXTENSIONS)
35 #define DELTA_EPOCH_IN_MICROSECS 11644473600000000Ui64
37 #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
46 int gettimeofday(
struct timeval* tv,
struct timezone* tz)
49 unsigned __int64 tmpres = 0;
54 GetSystemTimeAsFileTime(&ft);
55 tmpres |= ft.dwHighDateTime;
57 tmpres |= ft.dwLowDateTime;
61 tmpres -= DELTA_EPOCH_IN_MICROSECS;
63 tv->tv_sec = (long)(tmpres / 1000000UL);
64 tv->tv_usec = (long)(tmpres % 1000000UL);
73 tz->tz_minuteswest = _timezone / 60;
74 tz->tz_dsttime = _daylight;
82 #include <sys/resource.h>
105 gettimeofday( &tp, NULL );
106 return tp.tv_sec + 1e-6 * tp.tv_usec;
110 Test(int64_t n,
int *iparam) {
113 int64_t M, N, K, NRHS;
118 double sumgf, sumgf2, sumt, sd;
126 "BLAS_NUM_THREADS",
""
139 (void)M;(void)N;(void)K;(void)NRHS;
141 if (n < 0 || thrdnbr < 0) {
143 printf(
"# N NRHS threads seconds Gflop/s Deviation ||Ax-b|| ||A|| ||x|| ||b||"
144 " eps ||Ax-b||/N/eps/(||A||||x||+||b||)\n" );
146 printf(
"# N NRHS threads seconds Gflop/s Deviation\n" );
149 printf(
"set title '%d_NUM_THREADS: ", thrdnbr );
150 for (i = 0; env[i][0]; ++i) {
151 s = getenv( env[i] );
153 if (i) printf(
" " );
155 for (j = 0; j < 5 && env[i][j] && env[i][j] !=
'_'; ++j)
156 printf(
"%c", env[i][j] );
161 printf(
"->%s",
"?" );
164 printf(
"%s\n%s\n%s\n%s\n%s%s%s\n",
165 "set xlabel 'Matrix size'",
166 "set ylabel 'Gflop/s'",
168 gnuplot > 1 ?
"set terminal png giant\nset output 'timeplot.png'" :
"",
169 "plot '-' using 1:5 title '",
_NAME,
"' with linespoints" );
193 RunTest( iparam, dparam, &(t[0]));
200 for (iter = 0; iter < niter; iter++)
208 RunTest( iparam, dparam, &(t[iter]));
214 RunTest( iparam, dparam, &(t[iter]));
216 gflops = 1e-9 * (fmuls * fp_per_mul + fadds * fp_per_add) / t[iter];
219 sumgf2 += gflops*gflops;
222 gflops = sumgf/niter;
223 sd = sqrt((sumgf2 - (sumgf*sumgf)/niter)/niter);
225 if ( iparam[IPARAM_CHECK] )
226 printf(
"%9.3f %9.2f %9.2f %8.5e %8.5e %8.5e %8.5e %8.5e %8.5e\n",
228 dparam[IPARAM_RES] / n / eps / (dparam[IPARAM_ANORM] * dparam[IPARAM_XNORM] + dparam[IPARAM_BNORM] ));
230 printf(
"%9.3f %9.2f %9.2f\n", sumt/niter, gflops, sd );
239 startswith(
const char *s,
const char *prefix) {
240 size_t n = strlen( prefix );
241 if (strncmp( s, prefix, n ))
247 get_range(
char *range,
int *start_p,
int *stop_p,
int *step_p) {
248 char *s, *s1, buf[21];
249 int colon_count, copy_len, nbuf=20, n;
250 int start=1000, stop=10000, step=1000;
253 for (s = strchr( range,
':'); s; s = strchr( s+1,
':'))
256 if (colon_count == 0) {
257 if (sscanf( range,
"%d", &start ) < 1 || start < 1)
260 if (step < 1) step = 1;
261 stop = start + 10 * step;
263 }
else if (colon_count == 1) {
265 s = strchr( range,
':' );
266 if (sscanf( s+1,
"%d", &stop ) < 1 || stop < 1)
271 copy_len = n > nbuf ? nbuf : n;
272 strncpy( buf, range, copy_len );
274 if (sscanf( buf,
"%d", &start ) < 1 || start > stop || start < 1)
278 step = (stop - start) / 10;
281 }
else if (colon_count == 2) {
283 s = strchr( range,
':' );
285 copy_len = n > nbuf ? nbuf : n;
286 strncpy( buf, range, copy_len );
288 if (sscanf( buf,
"%d", &start ) < 1 || start < 1)
292 s1 = strchr( s+1,
':' );
294 copy_len = n > nbuf ? nbuf : n;
295 strncpy( buf, s+1, copy_len );
297 if (sscanf( buf,
"%d", &stop ) < 1 || stop < start)
301 if (sscanf( s1+1,
"%d", &step ) < 1 || step < 1)
315 show_help(
char *prog_name) {
316 printf(
"Usage:\n%s [options]\n\n", prog_name );
317 printf(
"Options are:\n" );
318 printf(
" --threads=C Number of threads (default: 1)\n" );
319 printf(
" --n_range=R Range of N values: Start:Stop:Step (default: 500:5000:500)\n" );
321 printf(
" --[no]check Check result (default: nocheck)\n" );
322 printf(
" --[no]warmup Perform a warmup run to pre-load libraries (default: warmup)\n");
323 printf(
" --niter=N Number of iterations (default: 1)\n");
324 printf(
" --nb=N Nb size. Not used if autotuning is activated (default: 128)\n");
325 printf(
" --ib=N IB size. Not used if autotuning is activated (default: 32)\n");
326 printf(
" --[no]dyn Activate Dynamic scheduling (default: nodyn)\n");
327 printf(
" --[no]atun Activate autotuning (default: noatun)\n");
328 printf(
" --ifmt Input format. 0: CM, 1: CCRB, 2: CRRB, 3: RCRB, 4: RRRB, 5: RM (default: 0)\n");
329 printf(
" --ofmt Output format. 0: CM, 1: CCRB, 2: CRRB, 3: RCRB, 4: RRRB, 5: RM (default: 1)\n");
330 printf(
" --thrdbypb Number of threads per subproblem for inplace transformation (default: 1)\n");
333 get_thread_count(
int *thrdnbr) {
334 #if defined WIN32 || defined WIN64
335 sscanf( getenv(
"NUMBER_OF_PROCESSORS" ),
"%d", thrdnbr );
337 *thrdnbr = sysconf(_SC_NPROCESSORS_ONLN);
380 for (i = 1; i < argc && argv[i]; ++i) {
381 if (startswith( argv[i],
"--help" )) {
382 show_help( argv[0] );
384 }
else if (startswith( argv[i],
"--n_range=" )) {
385 get_range( strchr( argv[i],
'=' ) + 1, &start, &stop, &step );
386 }
else if (startswith( argv[i],
"--threads=" )) {
387 sscanf( strchr( argv[i],
'=' ) + 1,
"%d", &(iparam[IPARAM_THRDNBR]) );
392 }
else if (startswith( argv[i],
"--check" )) {
394 }
else if (startswith( argv[i],
"--nocheck" )) {
396 }
else if (startswith( argv[i],
"--warmup" )) {
398 }
else if (startswith( argv[i],
"--nowarmup" )) {
400 }
else if (startswith( argv[i],
"--dyn" )) {
402 }
else if (startswith( argv[i],
"--nodyn" )) {
404 }
else if (startswith( argv[i],
"--atun" )) {
406 }
else if (startswith( argv[i],
"--noatun" )) {
408 }
else if (startswith( argv[i],
"--trace" )) {
410 }
else if (startswith( argv[i],
"--notrace" )) {
412 }
else if (startswith( argv[i],
"--dag" )) {
414 }
else if (startswith( argv[i],
"--nodag" )) {
416 }
else if (startswith( argv[i],
"--sync" )) {
418 }
else if (startswith( argv[i],
"--async" )) {
420 }
else if (startswith( argv[i],
"--m=" )) {
421 sscanf( strchr( argv[i],
'=' ) + 1,
"%d", &(iparam[
IPARAM_M]) );
422 }
else if (startswith( argv[i],
"--nb=" )) {
423 sscanf( strchr( argv[i],
'=' ) + 1,
"%d", &(iparam[
IPARAM_NB]) );
425 }
else if (startswith( argv[i],
"--nrhs=" )) {
426 sscanf( strchr( argv[i],
'=' ) + 1,
"%d", &(iparam[
IPARAM_K]) );
427 }
else if (startswith( argv[i],
"--ib=" )) {
428 sscanf( strchr( argv[i],
'=' ) + 1,
"%d", &(iparam[
IPARAM_IB]) );
429 }
else if (startswith( argv[i],
"--ifmt=" )) {
430 sscanf( strchr( argv[i],
'=' ) + 1,
"%d", &(iparam[
IPARAM_INPUTFMT]) );
431 }
else if (startswith( argv[i],
"--ofmt=" )) {
433 }
else if (startswith( argv[i],
"--thrdbypb=" )) {
435 }
else if (startswith( argv[i],
"--niter=" )) {
436 sscanf( strchr( argv[i],
'=' ) + 1,
"%d", &iparam[
IPARAM_NITER] );
437 }
else if (startswith( argv[i],
"--mx=" )) {
438 sscanf( strchr( argv[i],
'=' ) + 1,
"%d", &(iparam[
IPARAM_MX]) );
439 }
else if (startswith( argv[i],
"--nx=" )) {
440 sscanf( strchr( argv[i],
'=' ) + 1,
"%d", &(iparam[
IPARAM_NX]) );
441 }
else if (startswith( argv[i],
"--rhblk=" )) {
442 sscanf( strchr( argv[i],
'=' ) + 1,
"%d", &(iparam[
IPARAM_RHBLK]) );
443 }
else if (startswith( argv[i],
"--mx=" )) {
444 sscanf( strchr( argv[i],
'=' ) + 1,
"%d", &(iparam[
IPARAM_MX]) );
445 }
else if (startswith( argv[i],
"--nx=" )) {
446 sscanf( strchr( argv[i],
'=' ) + 1,
"%d", &(iparam[
IPARAM_NX]) );
447 }
else if (startswith( argv[i],
"--rhblk=" )) {
448 sscanf( strchr( argv[i],
'=' ) + 1,
"%d", &(iparam[
IPARAM_RHBLK]) );
450 fprintf( stderr,
"Unknown option: %s\n", argv[i] );
483 if (step < 1) step = 1;
486 for (i = start; i <= stop; i += step)
491 }
else if ( mx > 0 ) {