Go to the source code of this file.
|
| void | print_header (FILE *fp, char *prec, char *kernel) |
| |
| void | resultline (int i, int kernel, int EventSet, FILE *fp) |
| |
| void | exec_flops (int precision, int EventSet, FILE *fp) |
| |
| double | normalize_double (int n, double *xd) |
| |
| void | cholesky_double (int n, double *ld, double *ad) |
| |
| void | exec_double_norm (int EventSet, FILE *fp) |
| |
| void | exec_double_cholesky (int EventSet, FILE *fp) |
| |
| void | exec_double_gemm (int EventSet, FILE *fp) |
| |
| void | keep_double_vec_res (int n, double *xd) |
| |
| void | keep_double_mat_res (int n, double *ld) |
| |
| float | normalize_single (int n, float *xs) |
| |
| void | cholesky_single (int n, float *ls, float *as) |
| |
| void | exec_single_norm (int EventSet, FILE *fp) |
| |
| void | exec_single_cholesky (int EventSet, FILE *fp) |
| |
| void | exec_single_gemm (int EventSet, FILE *fp) |
| |
| void | keep_single_vec_res (int n, float *xs) |
| |
| void | keep_single_mat_res (int n, float *ls) |
| |
| void | gemm_single (int n, float *cs, float *as, float *bs) |
| |
| void | gemm_double (int n, double *cd, double *ad, double *bd) |
| |
| void | flops_driver (char *papi_event_name, hw_desc_t *hw_desc, char *outdir) |
| |
◆ _GNU_SOURCE
◆ CHOLESKY
◆ DOUBLE
◆ FMA
◆ GEMM
◆ HALF
◆ MAXDIM
◆ NORMALIZE
◆ SINGLE
◆ cholesky_double()
| void cholesky_double |
( |
int |
n, |
|
|
double * |
ld, |
|
|
double * |
ad |
|
) |
| |
Definition at line 276 of file flops.c.
276 {
277
279 double sum = 0.0;
280
281 for (
i = 0;
i < n;
i++) {
282 for (j = 0; j <=
i; j++) {
283 sum = 0.0;
284 for (k = 0; k < j; k++) {
285 sum += ld[
i * n + k] * ld[j * n + k];
286 }
287
289 ld[
i * n + j] = sqrt(ad[
i * n +
i] - sum);
290 } else {
291 ld[
i * n + j] = ((double)1.0)/ld[j * n + j] * (ad[
i * n + j] - sum);
292 }
293 }
294 }
295}
◆ cholesky_single()
| void cholesky_single |
( |
int |
n, |
|
|
float * |
ls, |
|
|
float * |
as |
|
) |
| |
Definition at line 219 of file flops.c.
219 {
220
222 float sum = 0.0;
223
224 for (
i = 0;
i < n;
i++) {
225 for (j = 0; j <=
i; j++) {
226 sum = 0.0;
227 for (k = 0; k < j; k++) {
228 sum += ls[
i * n + k] * ls[j * n + k];
229 }
230
232 ls[
i * n + j] = sqrtf(as[
i * n +
i] - sum);
233 } else {
234 ls[
i * n + j] = ((float)1.0)/ls[j * n + j] * (as[
i * n + j] - sum);
235 }
236 }
237 }
238}
◆ exec_double_cholesky()
| void exec_double_cholesky |
( |
int |
EventSet, |
|
|
FILE * |
fp |
|
) |
| |
Definition at line 353 of file flops.c.
353 {
354
356 double *ad=NULL, *ld=NULL;
357 double sumd = 0.0;
358
359
361
362
365
366
367 for ( n = 0; n <
MAXDIM; n++ ) {
368
369 for (
i = 0;
i < n;
i++ ) {
370 for ( j = 0; j <
i; j++ ) {
373
374 ad[
i * n + j] = ((double)random())/((
double)RAND_MAX) * (
double)1.1;
375 ad[j * n +
i] = ad[
i * n + j];
376 }
379 }
380
381
382 for (
i = 0;
i < n;
i++ ) {
383 sumd = 0.0;
384 for ( j = 0; j < n; j++ ) {
385 sumd += fabs(ad[
i * n + j]);
386 }
387 ad[
i * n +
i] = sumd + (double)1.1;
388 }
389
390
392 return;
393 }
394
395
397 usleep(1);
398
399
401
403 }
404
405 free( ad );
406 free( ld );
407}
Start counting hardware events in an event set.
void resultline(int i, int kernel, int EventSet, FILE *fp)
void cholesky_double(int n, double *ld, double *ad)
void print_header(FILE *fp, char *prec, char *kernel)
void keep_double_mat_res(int n, double *ld)
◆ exec_double_gemm()
| void exec_double_gemm |
( |
int |
EventSet, |
|
|
FILE * |
fp |
|
) |
| |
Definition at line 409 of file flops.c.
409 {
410
412 double *ad=NULL, *bd=NULL, *cd=NULL;
413
414
416
417
421
422
423 for ( n = 0; n <
MAXDIM; n++ ) {
424
425 for (
i = 0;
i < n;
i++ ) {
426 for ( j = 0; j < n; j++ ) {
428 ad[
i * n + j] = ((double)random())/((
double)RAND_MAX) * (
double)1.1;
429 bd[
i * n + j] = ((double)random())/((
double)RAND_MAX) * (
double)1.1;
430 }
431 }
432
433
435 return;
436 }
437
438
440 usleep(1);
441
442
444
446 }
447
448 free( ad );
449 free( bd );
450 free( cd );
451}
void gemm_double(int n, double *cd, double *ad, double *bd)
◆ exec_double_norm()
| void exec_double_norm |
( |
int |
EventSet, |
|
|
FILE * |
fp |
|
) |
| |
Definition at line 316 of file flops.c.
316 {
317
319 double *xd=NULL;
320
321
323
324
325 xd = malloc(
MAXDIM *
sizeof(
double) );
326
327
328 for ( n = 0; n <
MAXDIM; n++ ) {
329
330 for (
i = 0;
i < n;
i++ ) {
331 xd[
i] = ((double)random())/((
double)RAND_MAX) * (
double)1.1;
332 }
333
334
336 return;
337 }
338
339
341 usleep(1);
342
343
345
347 }
348
349
350 free( xd );
351}
double normalize_double(int n, double *xd)
void keep_double_vec_res(int n, double *xd)
◆ exec_flops()
| void exec_flops |
( |
int |
precision, |
|
|
int |
EventSet, |
|
|
FILE * |
fp |
|
) |
| |
Definition at line 813 of file flops.c.
813 {
814
815
816 switch(precision) {
821 break;
826 break;
828#if defined(ARM)
832#endif
833 break;
834 default:
835 ;
836 }
837
838 return;
839}
void exec_double_gemm(int EventSet, FILE *fp)
void exec_double_norm(int EventSet, FILE *fp)
void exec_single_norm(int EventSet, FILE *fp)
void exec_single_cholesky(int EventSet, FILE *fp)
void exec_double_cholesky(int EventSet, FILE *fp)
void exec_single_gemm(int EventSet, FILE *fp)
◆ exec_single_cholesky()
| void exec_single_cholesky |
( |
int |
EventSet, |
|
|
FILE * |
fp |
|
) |
| |
Definition at line 518 of file flops.c.
518 {
519
521 float *as=NULL, *ls=NULL;
522 float sums = 0.0;
523
524
526
527
530
531
532 for ( n = 0; n <
MAXDIM; n++ ) {
533
534 for (
i = 0;
i < n;
i++ ) {
535 for ( j = 0; j <
i; j++ ) {
538
539 as[
i * n + j] = ((float)random())/((
float)RAND_MAX) * (
float)1.1;
540 as[j * n +
i] = as[
i * n + j];
541 }
544 }
545
546
547 for (
i = 0;
i < n;
i++ ) {
548 sums = 0.0;
549 for ( j = 0; j < n; j++ ) {
550 sums += fabs(as[
i * n + j]);
551 }
552 as[
i * n +
i] = sums + (float)1.1;
553 }
554
555
557 return;
558 }
559
560
562 usleep(1);
563
564
566
568 }
569
570 free( as );
571 free( ls );
572}
void cholesky_single(int n, float *ls, float *as)
void keep_single_mat_res(int n, float *ls)
◆ exec_single_gemm()
| void exec_single_gemm |
( |
int |
EventSet, |
|
|
FILE * |
fp |
|
) |
| |
Definition at line 574 of file flops.c.
574 {
575
577 float *as=NULL, *bs=NULL, *cs=NULL;
578
579
581
582
586
587
588 for ( n = 0; n <
MAXDIM; n++ ) {
589
590 for (
i = 0;
i < n;
i++ ) {
591 for ( j = 0; j < n; j++ ) {
593 as[
i * n + j] = ((float)random())/((
float)RAND_MAX) * (
float)1.1;
594 bs[
i * n + j] = ((float)random())/((
float)RAND_MAX) * (
float)1.1;
595 }
596 }
597
598
600 return;
601 }
602
603
605 usleep(1);
606
607
609
611 }
612
613 free( as );
614 free( bs );
615 free( cs );
616}
void gemm_single(int n, float *cs, float *as, float *bs)
◆ exec_single_norm()
| void exec_single_norm |
( |
int |
EventSet, |
|
|
FILE * |
fp |
|
) |
| |
Definition at line 481 of file flops.c.
481 {
482
484 float *xs=NULL;
485
486
488
489
490 xs = malloc(
MAXDIM *
sizeof(
float) );
491
492
493 for ( n = 0; n <
MAXDIM; n++ ) {
494
495 for (
i = 0;
i < n;
i++ ) {
496 xs[
i] = ((float)random())/((
float)RAND_MAX) * (
float)1.1;
497 }
498
499
501 return;
502 }
503
504
506 usleep(1);
507
508
510
512 }
513
514
515 free( xs );
516}
void keep_single_vec_res(int n, float *xs)
float normalize_single(int n, float *xs)
◆ flops_driver()
| void flops_driver |
( |
char * |
papi_event_name, |
|
|
hw_desc_t * |
hw_desc, |
|
|
char * |
outdir |
|
) |
| |
Definition at line 841 of file flops.c.
841 {
844 FILE* ofp_papi;
845 const char *sufx = ".flops";
846 char *papiFileName;
847
848 (void)hw_desc;
849
850 int l = strlen(outdir)+strlen(papi_event_name)+strlen(sufx);
851 if (NULL == (papiFileName = (char *)calloc( 1+l, sizeof(char)))) {
852 return;
853 }
854 if (l != (sprintf(papiFileName, "%s%s%s", outdir, papi_event_name, sufx))) {
855 goto error0;
856 }
857 if (NULL == (ofp_papi = fopen(papiFileName,"w"))) {
858 fprintf(
stderr,
"Failed to open file %s.\n", papiFileName);
859 goto error0;
860 }
861
864 goto error1;
865 }
866
869 goto error1;
870 }
871
875
878 goto error1;
879 }
882 goto error1;
883 }
884
885error1:
887error0:
888 free(papiFileName);
889 return;
890}
add PAPI preset or native hardware event by name to an EventSet
Empty and destroy an EventSet.
Create a new empty PAPI EventSet.
Empty and destroy an EventSet.
void exec_flops(int precision, int EventSet, FILE *fp)
int fclose(FILE *__stream)
◆ gemm_double()
| void gemm_double |
( |
int |
n, |
|
|
double * |
cd, |
|
|
double * |
ad, |
|
|
double * |
bd |
|
) |
| |
Definition at line 298 of file flops.c.
298 {
299
301 DP_SCALAR_TYPE argI, argJ, argK;
302
303 for (
i = 0;
i < n;
i++) {
304 for (j = 0; j < n; j++) {
305 argK = SET_VEC_SD(0.0);
306 for (k = 0; k < n; k++) {
307 argI = SET_VEC_SD(ad[
i * n + k]);
308 argJ = SET_VEC_SD(bd[k * n + j]);
309 FMA_VEC_SD(argK, argI, argJ, argK);
310 }
311 cd[
i * n + j] = ((
double*)&argK)[0];
312 }
313 }
314}
◆ gemm_single()
| void gemm_single |
( |
int |
n, |
|
|
float * |
cs, |
|
|
float * |
as, |
|
|
float * |
bs |
|
) |
| |
Definition at line 240 of file flops.c.
240 {
241
243 SP_SCALAR_TYPE argI, argJ, argK;
244
245 for (
i = 0;
i < n;
i++) {
246 for (j = 0; j < n; j++) {
247 argK = SET_VEC_SS(0.0);
248 for (k = 0; k < n; k++) {
249 argI = SET_VEC_SS(as[
i * n + k]);
250 argJ = SET_VEC_SS(bs[k * n + j]);
251 FMA_VEC_SS(argK, argI, argJ, argK);
252 }
253 cs[
i * n + j] = ((
float*)&argK)[0];
254 }
255 }
256}
◆ keep_double_mat_res()
| void keep_double_mat_res |
( |
int |
n, |
|
|
double * |
ld |
|
) |
| |
Definition at line 466 of file flops.c.
466 {
467
469 double sum = 0.0;
470 for(
i = 0;
i < n; ++
i ) {
471 for( j = 0; j < n; ++j ) {
472 sum += ld[
i * n + j];
473 }
474 }
475
476 if( 1.2345 == sum ) {
477 fprintf(
stderr,
"Side-effect to disable dead code elimination by the compiler. Please ignore.\n");
478 }
479}
◆ keep_double_vec_res()
| void keep_double_vec_res |
( |
int |
n, |
|
|
double * |
xd |
|
) |
| |
Definition at line 453 of file flops.c.
453 {
454
456 double sum = 0.0;
457 for(
i = 0;
i < n; ++
i ) {
459 }
460
461 if( 1.2345 == sum ) {
462 fprintf(
stderr,
"Side-effect to disable dead code elimination by the compiler. Please ignore.\n");
463 }
464}
◆ keep_single_mat_res()
| void keep_single_mat_res |
( |
int |
n, |
|
|
float * |
ls |
|
) |
| |
Definition at line 631 of file flops.c.
631 {
632
634 float sum = 0.0;
635 for(
i = 0;
i < n; ++
i ) {
636 for( j = 0; j < n; ++j ) {
637 sum += ls[
i * n + j];
638 }
639 }
640
641 if( 1.2345 == sum ) {
642 fprintf(
stderr,
"Side-effect to disable dead code elimination by the compiler. Please ignore.\n");
643 }
644}
◆ keep_single_vec_res()
| void keep_single_vec_res |
( |
int |
n, |
|
|
float * |
xs |
|
) |
| |
Definition at line 618 of file flops.c.
618 {
619
621 float sum = 0.0;
622 for(
i = 0;
i < n; ++
i ) {
624 }
625
626 if( 1.2345 == sum ) {
627 fprintf(
stderr,
"Side-effect to disable dead code elimination by the compiler. Please ignore.\n");
628 }
629}
◆ normalize_double()
| double normalize_double |
( |
int |
n, |
|
|
double * |
xd |
|
) |
| |
Definition at line 258 of file flops.c.
258 {
259
260 if ( 0 == n )
261 return 0.0;
262
265
266 for (
i = 0;
i < n;
i++ )
268
270 for (
i = 0;
i < n;
i++ )
272
274}
◆ normalize_single()
| float normalize_single |
( |
int |
n, |
|
|
float * |
xs |
|
) |
| |
Definition at line 201 of file flops.c.
201 {
202
203 if ( 0 == n )
204 return 0.0;
205
208
209 for (
i = 0;
i < n;
i++ )
211
213 for (
i = 0;
i < n;
i++ )
215
217}
◆ print_header()
| void print_header |
( |
FILE * |
fp, |
|
|
char * |
prec, |
|
|
char * |
kernel |
|
) |
| |
Definition at line 59 of file flops.c.
59 {
60
61 fprintf(
fp,
"#%s %s\n", prec, kernel);
62 fprintf(
fp,
"#N RawEvtCnt NormdEvtCnt ExpectedAdd ExpectedSub ExpectedMul ExpectedDiv ExpectedSqrt ExpectedFMA ExpectedTotal\n");
63}
◆ resultline()
| void resultline |
( |
int |
i, |
|
|
int |
kernel, |
|
|
int |
EventSet, |
|
|
FILE * |
fp |
|
) |
| |
Definition at line 65 of file flops.c.
65 {
66
67 long long flpins = 0, denom;
68 long long papi, all, add, sub, mul, div, sqrt, fma;
70
72 return;
73 }
74
75 switch(kernel) {
78 denom = all;
80 sub = 0;
84 sqrt = 0;
85 } else {
86 sqrt = 1;
87 }
88 fma = 0;
89 break;
93 denom = 1;
94 } else {
95 denom = all;
96 }
97 add = 0;
98 sub = 0;
99 mul = 0;
100 div = 0;
101 sqrt = 0;
103 break;
105 all =
i*(2*
i*
i+9*
i+1)/6.0;
107 denom = 1;
108 } else {
109 denom = all;
110 }
111 add =
i*(
i-1)*(
i+1)/6.0;
113 mul =
i*(
i-1)*(
i+4)/6.0;
116 fma = 0;
117 break;
118 default:
119 all = -1;
120 denom = -1;
121 add = -1;
122 sub = -1;
123 mul = -1;
124 div = -1;
125 sqrt = -1;
126 fma = -1;
127 }
128
129 papi = flpins <<
FMA;
130
131 fprintf(
fp,
"%d %lld %.17g %lld %lld %lld %lld %lld %lld %lld\n",
i, papi, ((
double)papi)/((
double)denom), add, sub, mul, div, sqrt, fma, all);
132}
Stop counting hardware events in an event set.