PAPI 7.1.0.0
Loading...
Searching...
No Matches
flops.c
Go to the documentation of this file.
1#define _GNU_SOURCE
2#include <unistd.h>
3#include <stdio.h>
4#include <stdlib.h>
5#include <string.h>
6#include <math.h>
7#include <papi.h>
8#include "flops.h"
9
10#define DOUBLE 2
11#define SINGLE 1
12#define HALF 0
13
14#define CHOLESKY 3
15#define GEMM 2
16#define NORMALIZE 1
17
18#define MAXDIM 51
19
20#if defined(mips)
21#define FMA 1
22#elif (defined(sparc) && defined(sun))
23#define FMA 1
24#else
25#define FMA 0
26#endif
27
28/* Function prototypes. */
29void print_header( FILE *fp, char *prec, char *kernel );
30void resultline( int i, int kernel, int EventSet, FILE *fp );
31void exec_flops( int precision, int EventSet, FILE *fp );
32
33double normalize_double( int n, double *xd );
34void cholesky_double( int n, double *ld, double *ad );
35void exec_double_norm( int EventSet, FILE *fp );
36void exec_double_cholesky( int EventSet, FILE *fp );
37void exec_double_gemm( int EventSet, FILE *fp );
38void keep_double_vec_res( int n, double *xd );
39void keep_double_mat_res( int n, double *ld );
40
41float normalize_single( int n, float *xs );
42void cholesky_single( int n, float *ls, float *as );
43void exec_single_norm( int EventSet, FILE *fp );
44void exec_single_cholesky( int EventSet, FILE *fp );
45void exec_single_gemm( int EventSet, FILE *fp );
46void keep_single_vec_res( int n, float *xs );
47void keep_single_mat_res( int n, float *ls );
48
49#if defined(ARM)
50half normalize_half( int n, half *xh );
51void cholesky_half( int n, half *lh, half *ah );
52void exec_half_norm( int EventSet, FILE *fp );
53void exec_half_cholesky( int EventSet, FILE *fp );
54void exec_half_gemm( int EventSet, FILE *fp );
55void keep_half_vec_res( int n, half *xh );
56void keep_half_mat_res( int n, half *lh );
57#endif
58
59void print_header( FILE *fp, char *prec, char *kernel ) {
60
61 fprintf(fp, "#%s %s\n", prec, kernel);
62 fprintf(fp, "#N RawEvtCnt NormdEvtCnt ExpectedAdd ExpectedSub ExpectedMul ExpectedDiv ExpectedSqrt ExpectedFMA ExpectedTotal\n");
63}
64
65void resultline( int i, int kernel, int EventSet, FILE *fp ) {
66
67 long long flpins = 0, denom;
68 long long papi, all, add, sub, mul, div, sqrt, fma;
69 int retval;
70
71 if ( (retval=PAPI_stop(EventSet, &flpins)) != PAPI_OK ) {
72 return;
73 }
74
75 switch(kernel) {
76 case NORMALIZE:
77 all = 3*i+1;
78 denom = all;
79 add = i;
80 sub = 0;
81 mul = i;
82 div = i;
83 if ( 0 == i ) {
84 sqrt = 0;
85 } else {
86 sqrt = 1;
87 }
88 fma = 0;
89 break;
90 case GEMM:
91 all = 2*i*i*i;
92 if ( 0 == i ) {
93 denom = 1;
94 } else {
95 denom = all;
96 }
97 add = 0;
98 sub = 0;
99 mul = 0;
100 div = 0;
101 sqrt = 0;
102 fma = i*i*i; // Need to derive.
103 break;
104 case CHOLESKY:
105 all = i*(2*i*i+9*i+1)/6.0;
106 if ( 0 == i ) {
107 denom = 1;
108 } else {
109 denom = all;
110 }
111 add = i*(i-1)*(i+1)/6.0;
112 sub = i*(i+1)/2.0;
113 mul = i*(i-1)*(i+4)/6.0;
114 div = i*(i-1)/2.0;
115 sqrt = i;
116 fma = 0;
117 break;
118 default:
119 all = -1;
120 denom = -1;
121 add = -1;
122 sub = -1;
123 mul = -1;
124 div = -1;
125 sqrt = -1;
126 fma = -1;
127 }
128
129 papi = flpins << FMA;
130
131 fprintf(fp, "%d %lld %.17g %lld %lld %lld %lld %lld %lld %lld\n", i, papi, ((double)papi)/((double)denom), add, sub, mul, div, sqrt, fma, all);
132}
133
134#if defined(ARM)
135
136half normalize_half( int n, half *xh ) {
137
138 if ( 0 == n )
139 return 0.0;
140
141 half aa = 0.0;
142 half buff = 0.0;
143 int i;
144
145 for ( i = 0; i < n; i++ ) {
146 buff = xh[i] * xh[i];
147 aa += buff;
148 }
149
150 aa = SQRT_VEC_SH(aa);
151 for ( i = 0; i < n; i++ )
152 xh[i] = xh[i]/aa;
153
154 return ( aa );
155}
156
157void cholesky_half( int n, half *lh, half *ah ) {
158
159 int i, j, k;
160 half sum = 0.0;
161 half buff = 0.0;
162
163 for (i = 0; i < n; i++) {
164 for (j = 0; j <= i; j++) {
165 sum = 0.0;
166 for (k = 0; k < j; k++) {
167 buff = lh[i * n + k] * lh[j * n + k];
168 sum += buff;
169 }
170
171 if( i == j ) {
172 buff = ah[i * n + i] - sum;
173 lh[i * n + j] = SQRT_VEC_SH(buff);
174 } else {
175 buff = ah[i * n + i] - sum;
176 sum = ((half)1.0);
177 sum = sum/lh[j * n + j];
178 lh[i * n + j] = sum * buff;
179 }
180 }
181 }
182}
183
184void gemm_half( int n, half *ch, half *ah, half *bh ) {
185
186 int i, j, k;
187 half sum = 0.0;
188
189 for (i = 0; i < n; i++) {
190 for (j = 0; j < n; j++) {
191 sum = 0.0;
192 for (k = 0; k < n; k++) {
193 FMA_VEC_SH(sum, ah[i * n + k], bh[k * n + j], sum);
194 }
195 ch[i * n + j] = sum;
196 }
197 }
198}
199#endif
200
201float normalize_single( int n, float *xs ) {
202
203 if ( 0 == n )
204 return 0.0;
205
206 float aa = 0.0;
207 int i;
208
209 for ( i = 0; i < n; i++ )
210 aa = aa + xs[i] * xs[i];
211
212 aa = sqrtf(aa);
213 for ( i = 0; i < n; i++ )
214 xs[i] = xs[i]/aa;
215
216 return ( aa );
217}
218
219void cholesky_single( int n, float *ls, float *as ) {
220
221 int i, j, k;
222 float sum = 0.0;
223
224 for (i = 0; i < n; i++) {
225 for (j = 0; j <= i; j++) {
226 sum = 0.0;
227 for (k = 0; k < j; k++) {
228 sum += ls[i * n + k] * ls[j * n + k];
229 }
230
231 if( i == j ) {
232 ls[i * n + j] = sqrtf(as[i * n + i] - sum);
233 } else {
234 ls[i * n + j] = ((float)1.0)/ls[j * n + j] * (as[i * n + j] - sum);
235 }
236 }
237 }
238}
239
240void gemm_single( int n, float *cs, float *as, float *bs ) {
241
242 int i, j, k;
243 SP_SCALAR_TYPE argI, argJ, argK;
244
245 for (i = 0; i < n; i++) {
246 for (j = 0; j < n; j++) {
247 argK = SET_VEC_SS(0.0);
248 for (k = 0; k < n; k++) {
249 argI = SET_VEC_SS(as[i * n + k]);
250 argJ = SET_VEC_SS(bs[k * n + j]);
251 FMA_VEC_SS(argK, argI, argJ, argK);
252 }
253 cs[i * n + j] = ((float*)&argK)[0];
254 }
255 }
256}
257
258double normalize_double( int n, double *xd ) {
259
260 if ( 0 == n )
261 return 0.0;
262
263 double aa = 0.0;
264 int i;
265
266 for ( i = 0; i < n; i++ )
267 aa = aa + xd[i] * xd[i];
268
269 aa = sqrt(aa);
270 for ( i = 0; i < n; i++ )
271 xd[i] = xd[i]/aa;
272
273 return ( aa );
274}
275
276void cholesky_double( int n, double *ld, double *ad ) {
277
278 int i, j, k;
279 double sum = 0.0;
280
281 for (i = 0; i < n; i++) {
282 for (j = 0; j <= i; j++) {
283 sum = 0.0;
284 for (k = 0; k < j; k++) {
285 sum += ld[i * n + k] * ld[j * n + k];
286 }
287
288 if( i == j ) {
289 ld[i * n + j] = sqrt(ad[i * n + i] - sum);
290 } else {
291 ld[i * n + j] = ((double)1.0)/ld[j * n + j] * (ad[i * n + j] - sum);
292 }
293 }
294 }
295}
296
297
298void gemm_double( int n, double *cd, double *ad, double *bd ) {
299
300 int i, j, k;
301 DP_SCALAR_TYPE argI, argJ, argK;
302
303 for (i = 0; i < n; i++) {
304 for (j = 0; j < n; j++) {
305 argK = SET_VEC_SD(0.0);
306 for (k = 0; k < n; k++) {
307 argI = SET_VEC_SD(ad[i * n + k]);
308 argJ = SET_VEC_SD(bd[k * n + j]);
309 FMA_VEC_SD(argK, argI, argJ, argK);
310 }
311 cd[i * n + j] = ((double*)&argK)[0];
312 }
313 }
314}
315
316void exec_double_norm( int EventSet, FILE *fp ) {
317
318 int i, n, retval;
319 double *xd=NULL;
320
321 /* Print info about the computational kernel. */
322 print_header( fp, "Double-Precision", "Vector Normalization" );
323
324 /* Allocate the linear arrays. */
325 xd = malloc( MAXDIM * sizeof(double) );
326
327 /* Step through the different array sizes. */
328 for ( n = 0; n < MAXDIM; n++ ) {
329 /* Initialize the needed arrays at this size. */
330 for ( i = 0; i < n; i++ ) {
331 xd[i] = ((double)random())/((double)RAND_MAX) * (double)1.1;
332 }
333
334 /* Reset PAPI count. */
335 if ( (retval = PAPI_start( EventSet )) != PAPI_OK ) {
336 return;
337 }
338
339 /* Run the kernel. */
340 normalize_double( n, xd );
341 usleep(1);
342
343 /* Stop and print count. */
345
346 keep_double_vec_res( n, xd );
347 }
348
349 /* Free dynamically allocated memory. */
350 free( xd );
351}
352
353void exec_double_cholesky( int EventSet, FILE *fp ) {
354
355 int i, j, n, retval;
356 double *ad=NULL, *ld=NULL;
357 double sumd = 0.0;
358
359 /* Print info about the computational kernel. */
360 print_header( fp, "Double-Precision", "Cholesky Decomposition" );
361
362 /* Allocate the matrices. */
363 ad = malloc( MAXDIM * MAXDIM * sizeof(double) );
364 ld = malloc( MAXDIM * MAXDIM * sizeof(double) );
365
366 /* Step through the different array sizes. */
367 for ( n = 0; n < MAXDIM; n++ ) {
368 /* Initialize the needed arrays at this size. */
369 for ( i = 0; i < n; i++ ) {
370 for ( j = 0; j < i; j++ ) {
371 ld[i * n + j] = 0.0;
372 ld[j * n + i] = 0.0;
373
374 ad[i * n + j] = ((double)random())/((double)RAND_MAX) * (double)1.1;
375 ad[j * n + i] = ad[i * n + j];
376 }
377 ad[i * n + i] = 0.0;
378 ld[i * n + i] = 0.0;
379 }
380
381 /* Guarantee diagonal dominance for successful Cholesky. */
382 for ( i = 0; i < n; i++ ) {
383 sumd = 0.0;
384 for ( j = 0; j < n; j++ ) {
385 sumd += fabs(ad[i * n + j]);
386 }
387 ad[i * n + i] = sumd + (double)1.1;
388 }
389
390 /* Reset PAPI count. */
391 if ( (retval = PAPI_start( EventSet )) != PAPI_OK ) {
392 return;
393 }
394
395 /* Run the kernel. */
396 cholesky_double( n, ld, ad );
397 usleep(1);
398
399 /* Stop and print count. */
401
402 keep_double_mat_res( n, ld );
403 }
404
405 free( ad );
406 free( ld );
407}
408
409void exec_double_gemm( int EventSet, FILE *fp ) {
410
411 int i, j, n, retval;
412 double *ad=NULL, *bd=NULL, *cd=NULL;
413
414 /* Print info about the computational kernel. */
415 print_header( fp, "Double-Precision", "GEMM" );
416
417 /* Allocate the matrices. */
418 ad = malloc( MAXDIM * MAXDIM * sizeof(double) );
419 bd = malloc( MAXDIM * MAXDIM * sizeof(double) );
420 cd = malloc( MAXDIM * MAXDIM * sizeof(double) );
421
422 /* Step through the different array sizes. */
423 for ( n = 0; n < MAXDIM; n++ ) {
424 /* Initialize the needed arrays at this size. */
425 for ( i = 0; i < n; i++ ) {
426 for ( j = 0; j < n; j++ ) {
427 cd[i * n + j] = 0.0;
428 ad[i * n + j] = ((double)random())/((double)RAND_MAX) * (double)1.1;
429 bd[i * n + j] = ((double)random())/((double)RAND_MAX) * (double)1.1;
430 }
431 }
432
433 /* Reset PAPI count. */
434 if ( (retval = PAPI_start( EventSet )) != PAPI_OK ) {
435 return;
436 }
437
438 /* Run the kernel. */
439 gemm_double( n, cd, ad, bd );
440 usleep(1);
441
442 /* Stop and print count. */
443 resultline( n, GEMM, EventSet, fp );
444
445 keep_double_mat_res( n, cd );
446 }
447
448 free( ad );
449 free( bd );
450 free( cd );
451}
452
453void keep_double_vec_res( int n, double *xd ) {
454
455 int i;
456 double sum = 0.0;
457 for( i = 0; i < n; ++i ) {
458 sum += xd[i];
459 }
460
461 if( 1.2345 == sum ) {
462 fprintf(stderr, "Side-effect to disable dead code elimination by the compiler. Please ignore.\n");
463 }
464}
465
466void keep_double_mat_res( int n, double *ld ) {
467
468 int i, j;
469 double sum = 0.0;
470 for( i = 0; i < n; ++i ) {
471 for( j = 0; j < n; ++j ) {
472 sum += ld[i * n + j];
473 }
474 }
475
476 if( 1.2345 == sum ) {
477 fprintf(stderr, "Side-effect to disable dead code elimination by the compiler. Please ignore.\n");
478 }
479}
480
481void exec_single_norm( int EventSet, FILE *fp ) {
482
483 int i, n, retval;
484 float *xs=NULL;
485
486 /* Print info about the computational kernel. */
487 print_header( fp, "Single-Precision", "Vector Normalization" );
488
489 /* Allocate the linear arrays. */
490 xs = malloc( MAXDIM * sizeof(float) );
491
492 /* Step through the different array sizes. */
493 for ( n = 0; n < MAXDIM; n++ ) {
494 /* Initialize the needed arrays at this size. */
495 for ( i = 0; i < n; i++ ) {
496 xs[i] = ((float)random())/((float)RAND_MAX) * (float)1.1;
497 }
498
499 /* Reset PAPI count. */
500 if ( (retval = PAPI_start( EventSet )) != PAPI_OK ) {
501 return;
502 }
503
504 /* Run the kernel. */
505 normalize_single( n, xs );
506 usleep(1);
507
508 /* Stop and print count. */
510
511 keep_single_vec_res( n, xs );
512 }
513
514 /* Free dynamically allocated memory. */
515 free( xs );
516}
517
518void exec_single_cholesky( int EventSet, FILE *fp ) {
519
520 int i, j, n, retval;
521 float *as=NULL, *ls=NULL;
522 float sums = 0.0;
523
524 /* Print info about the computational kernel. */
525 print_header( fp, "Single-Precision", "Cholesky Decomposition" );
526
527 /* Allocate the matrices. */
528 as = malloc( MAXDIM * MAXDIM * sizeof(float) );
529 ls = malloc( MAXDIM * MAXDIM * sizeof(float) );
530
531 /* Step through the different array sizes. */
532 for ( n = 0; n < MAXDIM; n++ ) {
533 /* Initialize the needed arrays at this size. */
534 for ( i = 0; i < n; i++ ) {
535 for ( j = 0; j < i; j++ ) {
536 ls[i * n + j] = 0.0;
537 ls[j * n + i] = 0.0;
538
539 as[i * n + j] = ((float)random())/((float)RAND_MAX) * (float)1.1;
540 as[j * n + i] = as[i * n + j];
541 }
542 as[i * n + i] = 0.0;
543 ls[i * n + i] = 0.0;
544 }
545
546 /* Guarantee diagonal dominance for successful Cholesky. */
547 for ( i = 0; i < n; i++ ) {
548 sums = 0.0;
549 for ( j = 0; j < n; j++ ) {
550 sums += fabs(as[i * n + j]);
551 }
552 as[i * n + i] = sums + (float)1.1;
553 }
554
555 /* Reset PAPI count. */
556 if ( (retval = PAPI_start( EventSet )) != PAPI_OK ) {
557 return;
558 }
559
560 /* Run the kernel. */
561 cholesky_single( n, ls, as );
562 usleep(1);
563
564 /* Stop and print count. */
566
567 keep_single_mat_res( n, ls );
568 }
569
570 free( as );
571 free( ls );
572}
573
574void exec_single_gemm( int EventSet, FILE *fp ) {
575
576 int i, j, n, retval;
577 float *as=NULL, *bs=NULL, *cs=NULL;
578
579 /* Print info about the computational kernel. */
580 print_header( fp, "Single-Precision", "GEMM" );
581
582 /* Allocate the matrices. */
583 as = malloc( MAXDIM * MAXDIM * sizeof(float) );
584 bs = malloc( MAXDIM * MAXDIM * sizeof(float) );
585 cs = malloc( MAXDIM * MAXDIM * sizeof(float) );
586
587 /* Step through the different array sizes. */
588 for ( n = 0; n < MAXDIM; n++ ) {
589 /* Initialize the needed arrays at this size. */
590 for ( i = 0; i < n; i++ ) {
591 for ( j = 0; j < n; j++ ) {
592 cs[i * n + j] = 0.0;
593 as[i * n + j] = ((float)random())/((float)RAND_MAX) * (float)1.1;
594 bs[i * n + j] = ((float)random())/((float)RAND_MAX) * (float)1.1;
595 }
596 }
597
598 /* Reset PAPI count. */
599 if ( (retval = PAPI_start( EventSet )) != PAPI_OK ) {
600 return;
601 }
602
603 /* Run the kernel. */
604 gemm_single( n, cs, as, bs );
605 usleep(1);
606
607 /* Stop and print count. */
608 resultline( n, GEMM, EventSet, fp );
609
610 keep_single_mat_res( n, cs );
611 }
612
613 free( as );
614 free( bs );
615 free( cs );
616}
617
618void keep_single_vec_res( int n, float *xs ) {
619
620 int i;
621 float sum = 0.0;
622 for( i = 0; i < n; ++i ) {
623 sum += xs[i];
624 }
625
626 if( 1.2345 == sum ) {
627 fprintf(stderr, "Side-effect to disable dead code elimination by the compiler. Please ignore.\n");
628 }
629}
630
631void keep_single_mat_res( int n, float *ls ) {
632
633 int i, j;
634 float sum = 0.0;
635 for( i = 0; i < n; ++i ) {
636 for( j = 0; j < n; ++j ) {
637 sum += ls[i * n + j];
638 }
639 }
640
641 if( 1.2345 == sum ) {
642 fprintf(stderr, "Side-effect to disable dead code elimination by the compiler. Please ignore.\n");
643 }
644}
645
646#if defined(ARM)
647void exec_half_norm( int EventSet, FILE *fp ) {
648
649 int i, n, retval;
650 half *xh=NULL;
651
652 /* Print info about the computational kernel. */
653 print_header( fp, "Half-Precision", "Vector Normalization" );
654
655 /* Allocate the linear arrays. */
656 xh = malloc( MAXDIM * sizeof(half) );
657
658 /* Step through the different array sizes. */
659 for ( n = 0; n < MAXDIM; n++ ) {
660 /* Initialize the needed arrays at this size. */
661 for ( i = 0; i < n; i++ ) {
662 xh[i] = ((half)random())/((half)RAND_MAX) * (half)1.1;
663 }
664
665 /* Reset PAPI count. */
666 if ( (retval = PAPI_start( EventSet )) != PAPI_OK ) {
667 return;
668 }
669
670 /* Run the kernel. */
671 normalize_half( n, xh );
672 usleep(1);
673
674 /* Stop and print count. */
676
677 keep_half_vec_res( n, xh );
678 }
679
680 /* Free dynamically allocated memory. */
681 free( xh );
682}
683
684void exec_half_cholesky( int EventSet, FILE *fp ) {
685
686 int i, j, n, retval;
687 half *ah=NULL, *lh=NULL;
688 half sumh = 0.0;
689
690 /* Print info about the computational kernel. */
691 print_header( fp, "Half-Precision", "Cholesky Decomposition" );
692
693 /* Allocate the matrices. */
694 ah = malloc( MAXDIM * MAXDIM * sizeof(half) );
695 lh = malloc( MAXDIM * MAXDIM * sizeof(half) );
696
697 /* Step through the different array sizes. */
698 for ( n = 0; n < MAXDIM; n++ ) {
699 /* Initialize the needed arrays at this size. */
700 for ( i = 0; i < n; i++ ) {
701 for ( j = 0; j < i; j++ ) {
702 lh[i * n + j] = 0.0;
703 lh[j * n + i] = 0.0;
704
705 ah[i * n + j] = ((half)random())/((half)RAND_MAX) * (half)1.1;
706 ah[j * n + i] = ah[i * n + j];
707 }
708 ah[i * n + i] = 0.0;
709 lh[i * n + i] = 0.0;
710 }
711
712 /* Guarantee diagonal dominance for successful Cholesky. */
713 for ( i = 0; i < n; i++ ) {
714 sumh = 0.0;
715 for ( j = 0; j < n; j++ ) {
716 sumh += fabs(ah[i * n + j]);
717 }
718 ah[i * n + i] = sumh + (half)1.1;
719 }
720
721 /* Reset PAPI count. */
722 if ( (retval = PAPI_start( EventSet )) != PAPI_OK ) {
723 return;
724 }
725
726 /* Run the kernel. */
727 cholesky_half( n, lh, ah );
728 usleep(1);
729
730 /* Stop and print count. */
732
733 keep_half_mat_res( n, lh );
734 }
735
736 free( ah );
737 free( lh );
738}
739
740void exec_half_gemm( int EventSet, FILE *fp ) {
741
742 int i, j, n, retval;
743 half *ah=NULL, *bh=NULL, *ch=NULL;
744
745 /* Print info about the computational kernel. */
746 print_header( fp, "Half-Precision", "GEMM" );
747
748 /* Allocate the matrices. */
749 ah = malloc( MAXDIM * MAXDIM * sizeof(half) );
750 bh = malloc( MAXDIM * MAXDIM * sizeof(half) );
751 ch = malloc( MAXDIM * MAXDIM * sizeof(half) );
752
753 /* Step through the different array sizes. */
754 for ( n = 0; n < MAXDIM; n++ ) {
755 /* Initialize the needed arrays at this size. */
756 for ( i = 0; i < n; i++ ) {
757 for ( j = 0; j < n; j++ ) {
758 ch[i * n + j] = 0.0;
759 ah[i * n + j] = ((half)random())/((half)RAND_MAX) * (half)1.1;
760 bh[i * n + j] = ((half)random())/((half)RAND_MAX) * (half)1.1;
761 }
762 }
763
764 /* Reset PAPI count. */
765 if ( (retval = PAPI_start( EventSet )) != PAPI_OK ) {
766 return;
767 }
768
769 /* Run the kernel. */
770 gemm_half( n, ch, ah, bh );
771 usleep(1);
772
773 /* Stop and print count. */
774 resultline( n, GEMM, EventSet, fp );
775
776 keep_half_mat_res( n, ch );
777 }
778
779 free( ah );
780 free( bh );
781 free( ch );
782}
783
784void keep_half_vec_res( int n, half *xh ) {
785
786 int i;
787 half sum = 0.0;
788 for( i = 0; i < n; ++i ) {
789 sum += xh[i];
790 }
791
792 if( 1.2345 == sum ) {
793 fprintf(stderr, "Side-effect to disable dead code elimination by the compiler. Please ignore.\n");
794 }
795}
796
797void keep_half_mat_res( int n, half *lh ) {
798
799 int i, j;
800 half sum = 0.0;
801 for( i = 0; i < n; ++i ) {
802 for( j = 0; j < n; ++j ) {
803 sum += lh[i * n + j];
804 }
805 }
806
807 if( 1.2345 == sum ) {
808 fprintf(stderr, "Side-effect to disable dead code elimination by the compiler. Please ignore.\n");
809 }
810}
811#endif
812
813void exec_flops( int precision, int EventSet, FILE *fp ) {
814
815 /* Vector Normalization and Cholesky Decomposition tests. */
816 switch(precision) {
817 case DOUBLE:
821 break;
822 case SINGLE:
826 break;
827 case HALF:
828#if defined(ARM)
829 exec_half_norm(EventSet, fp);
830 exec_half_cholesky(EventSet, fp);
831 exec_half_gemm(EventSet, fp);
832#endif
833 break;
834 default:
835 ;
836 }
837
838 return;
839}
840
841void flops_driver( char* papi_event_name, hw_desc_t *hw_desc, char* outdir ) {
842 int retval = PAPI_OK;
843 int EventSet = PAPI_NULL;
844 FILE* ofp_papi;
845 const char *sufx = ".flops";
846 char *papiFileName;
847
848 (void)hw_desc;
849
850 int l = strlen(outdir)+strlen(papi_event_name)+strlen(sufx);
851 if (NULL == (papiFileName = (char *)calloc( 1+l, sizeof(char)))) {
852 return;
853 }
854 if (l != (sprintf(papiFileName, "%s%s%s", outdir, papi_event_name, sufx))) {
855 goto error0;
856 }
857 if (NULL == (ofp_papi = fopen(papiFileName,"w"))) {
858 fprintf(stderr, "Failed to open file %s.\n", papiFileName);
859 goto error0;
860 }
861
863 if (retval != PAPI_OK ){
864 goto error1;
865 }
866
867 retval = PAPI_add_named_event( EventSet, papi_event_name );
868 if (retval != PAPI_OK ){
869 goto error1;
870 }
871
872 exec_flops(HALF, EventSet, ofp_papi);
873 exec_flops(SINGLE, EventSet, ofp_papi);
874 exec_flops(DOUBLE, EventSet, ofp_papi);
875
877 if (retval != PAPI_OK ){
878 goto error1;
879 }
881 if (retval != PAPI_OK ){
882 goto error1;
883 }
884
885error1:
886 fclose(ofp_papi);
887error0:
888 free(papiFileName);
889 return;
890}
int i
double aa[N]
Definition: byte_profile.c:35
add PAPI preset or native hardware event by name to an EventSet
Empty and destroy an EventSet.
Create a new empty PAPI EventSet.
Empty and destroy an EventSet.
Start counting hardware events in an event set.
Stop counting hardware events in an event set.
#define CHOLESKY
Definition: flops.c:14
void keep_single_vec_res(int n, float *xs)
Definition: flops.c:618
#define NORMALIZE
Definition: flops.c:16
#define HALF
Definition: flops.c:12
void resultline(int i, int kernel, int EventSet, FILE *fp)
Definition: flops.c:65
void cholesky_single(int n, float *ls, float *as)
Definition: flops.c:219
void cholesky_double(int n, double *ld, double *ad)
Definition: flops.c:276
#define MAXDIM
Definition: flops.c:18
#define GEMM
Definition: flops.c:15
void exec_double_gemm(int EventSet, FILE *fp)
Definition: flops.c:409
void exec_double_norm(int EventSet, FILE *fp)
Definition: flops.c:316
void flops_driver(char *papi_event_name, hw_desc_t *hw_desc, char *outdir)
Definition: flops.c:841
void gemm_single(int n, float *cs, float *as, float *bs)
Definition: flops.c:240
void keep_single_mat_res(int n, float *ls)
Definition: flops.c:631
void exec_single_norm(int EventSet, FILE *fp)
Definition: flops.c:481
float normalize_single(int n, float *xs)
Definition: flops.c:201
void gemm_double(int n, double *cd, double *ad, double *bd)
Definition: flops.c:298
#define DOUBLE
Definition: flops.c:10
void exec_single_cholesky(int EventSet, FILE *fp)
Definition: flops.c:518
double normalize_double(int n, double *xd)
Definition: flops.c:258
void print_header(FILE *fp, char *prec, char *kernel)
Definition: flops.c:59
void keep_double_vec_res(int n, double *xd)
Definition: flops.c:453
void keep_double_mat_res(int n, double *ld)
Definition: flops.c:466
#define SINGLE
Definition: flops.c:11
void exec_double_cholesky(int EventSet, FILE *fp)
Definition: flops.c:353
#define FMA
Definition: flops.c:25
void exec_flops(int precision, int EventSet, FILE *fp)
Definition: flops.c:813
void exec_single_gemm(int EventSet, FILE *fp)
Definition: flops.c:574
#define PAPI_OK
Definition: f90papi.h:73
#define PAPI_NULL
Definition: f90papi.h:78
static int EventSet
Definition: init_fini.c:8
Return codes and api definitions.
FILE * stderr
int fclose(FILE *__stream)
static FILE * fp
int retval
Definition: zero_fork.c:53