22#elif (defined(sparc) && defined(sun))
50half normalize_half(
int n, half *xh );
51void cholesky_half(
int n, half *lh, half *ah );
53void exec_half_cholesky(
int EventSet, FILE *
fp );
55void keep_half_vec_res(
int n, half *xh );
56void keep_half_mat_res(
int n, half *lh );
61 fprintf(
fp,
"#%s %s\n", prec, kernel);
62 fprintf(
fp,
"#N RawEvtCnt NormdEvtCnt ExpectedAdd ExpectedSub ExpectedMul ExpectedDiv ExpectedSqrt ExpectedFMA ExpectedTotal\n");
67 long long flpins = 0, denom;
68 long long papi, all, add, sub, mul, div, sqrt, fma;
105 all =
i*(2*
i*
i+9*
i+1)/6.0;
111 add =
i*(
i-1)*(
i+1)/6.0;
113 mul =
i*(
i-1)*(
i+4)/6.0;
129 papi = flpins <<
FMA;
131 fprintf(
fp,
"%d %lld %.17g %lld %lld %lld %lld %lld %lld %lld\n",
i, papi, ((
double)papi)/((
double)denom), add, sub, mul, div, sqrt, fma, all);
136half normalize_half(
int n, half *xh ) {
145 for (
i = 0;
i < n;
i++ ) {
146 buff = xh[
i] * xh[
i];
150 aa = SQRT_VEC_SH(
aa);
151 for (
i = 0;
i < n;
i++ )
157void cholesky_half(
int n, half *lh, half *ah ) {
163 for (
i = 0;
i < n;
i++) {
164 for (j = 0; j <=
i; j++) {
166 for (k = 0; k < j; k++) {
167 buff = lh[
i * n + k] * lh[j * n + k];
172 buff = ah[
i * n +
i] - sum;
173 lh[
i * n + j] = SQRT_VEC_SH(buff);
175 buff = ah[
i * n +
i] - sum;
177 sum = sum/lh[j * n + j];
178 lh[
i * n + j] = sum * buff;
184void gemm_half(
int n, half *ch, half *ah, half *bh ) {
189 for (
i = 0;
i < n;
i++) {
190 for (j = 0; j < n; j++) {
192 for (k = 0; k < n; k++) {
193 FMA_VEC_SH(sum, ah[
i * n + k], bh[k * n + j], sum);
209 for (
i = 0;
i < n;
i++ )
213 for (
i = 0;
i < n;
i++ )
224 for (
i = 0;
i < n;
i++) {
225 for (j = 0; j <=
i; j++) {
227 for (k = 0; k < j; k++) {
228 sum += ls[
i * n + k] * ls[j * n + k];
232 ls[
i * n + j] = sqrtf(as[
i * n +
i] - sum);
234 ls[
i * n + j] = ((float)1.0)/ls[j * n + j] * (as[
i * n + j] - sum);
243 SP_SCALAR_TYPE argI, argJ, argK;
245 for (
i = 0;
i < n;
i++) {
246 for (j = 0; j < n; j++) {
247 argK = SET_VEC_SS(0.0);
248 for (k = 0; k < n; k++) {
249 argI = SET_VEC_SS(as[
i * n + k]);
250 argJ = SET_VEC_SS(bs[k * n + j]);
251 FMA_VEC_SS(argK, argI, argJ, argK);
253 cs[
i * n + j] = ((
float*)&argK)[0];
266 for (
i = 0;
i < n;
i++ )
270 for (
i = 0;
i < n;
i++ )
281 for (
i = 0;
i < n;
i++) {
282 for (j = 0; j <=
i; j++) {
284 for (k = 0; k < j; k++) {
285 sum += ld[
i * n + k] * ld[j * n + k];
289 ld[
i * n + j] = sqrt(ad[
i * n +
i] - sum);
291 ld[
i * n + j] = ((double)1.0)/ld[j * n + j] * (ad[
i * n + j] - sum);
301 DP_SCALAR_TYPE argI, argJ, argK;
303 for (
i = 0;
i < n;
i++) {
304 for (j = 0; j < n; j++) {
305 argK = SET_VEC_SD(0.0);
306 for (k = 0; k < n; k++) {
307 argI = SET_VEC_SD(ad[
i * n + k]);
308 argJ = SET_VEC_SD(bd[k * n + j]);
309 FMA_VEC_SD(argK, argI, argJ, argK);
311 cd[
i * n + j] = ((
double*)&argK)[0];
325 xd = malloc(
MAXDIM *
sizeof(
double) );
328 for ( n = 0; n <
MAXDIM; n++ ) {
330 for (
i = 0;
i < n;
i++ ) {
331 xd[
i] = ((double)random())/((
double)RAND_MAX) * (
double)1.1;
356 double *ad=NULL, *ld=NULL;
367 for ( n = 0; n <
MAXDIM; n++ ) {
369 for (
i = 0;
i < n;
i++ ) {
370 for ( j = 0; j <
i; j++ ) {
374 ad[
i * n + j] = ((double)random())/((
double)RAND_MAX) * (
double)1.1;
375 ad[j * n +
i] = ad[
i * n + j];
382 for (
i = 0;
i < n;
i++ ) {
384 for ( j = 0; j < n; j++ ) {
385 sumd += fabs(ad[
i * n + j]);
387 ad[
i * n +
i] = sumd + (double)1.1;
412 double *ad=NULL, *bd=NULL, *cd=NULL;
423 for ( n = 0; n <
MAXDIM; n++ ) {
425 for (
i = 0;
i < n;
i++ ) {
426 for ( j = 0; j < n; j++ ) {
428 ad[
i * n + j] = ((double)random())/((
double)RAND_MAX) * (
double)1.1;
429 bd[
i * n + j] = ((double)random())/((
double)RAND_MAX) * (
double)1.1;
457 for(
i = 0;
i < n; ++
i ) {
461 if( 1.2345 == sum ) {
462 fprintf(
stderr,
"Side-effect to disable dead code elimination by the compiler. Please ignore.\n");
470 for(
i = 0;
i < n; ++
i ) {
471 for( j = 0; j < n; ++j ) {
472 sum += ld[
i * n + j];
476 if( 1.2345 == sum ) {
477 fprintf(
stderr,
"Side-effect to disable dead code elimination by the compiler. Please ignore.\n");
490 xs = malloc(
MAXDIM *
sizeof(
float) );
493 for ( n = 0; n <
MAXDIM; n++ ) {
495 for (
i = 0;
i < n;
i++ ) {
496 xs[
i] = ((float)random())/((
float)RAND_MAX) * (
float)1.1;
521 float *as=NULL, *ls=NULL;
532 for ( n = 0; n <
MAXDIM; n++ ) {
534 for (
i = 0;
i < n;
i++ ) {
535 for ( j = 0; j <
i; j++ ) {
539 as[
i * n + j] = ((float)random())/((
float)RAND_MAX) * (
float)1.1;
540 as[j * n +
i] = as[
i * n + j];
547 for (
i = 0;
i < n;
i++ ) {
549 for ( j = 0; j < n; j++ ) {
550 sums += fabs(as[
i * n + j]);
552 as[
i * n +
i] = sums + (float)1.1;
577 float *as=NULL, *bs=NULL, *cs=NULL;
588 for ( n = 0; n <
MAXDIM; n++ ) {
590 for (
i = 0;
i < n;
i++ ) {
591 for ( j = 0; j < n; j++ ) {
593 as[
i * n + j] = ((float)random())/((
float)RAND_MAX) * (
float)1.1;
594 bs[
i * n + j] = ((float)random())/((
float)RAND_MAX) * (
float)1.1;
622 for(
i = 0;
i < n; ++
i ) {
626 if( 1.2345 == sum ) {
627 fprintf(
stderr,
"Side-effect to disable dead code elimination by the compiler. Please ignore.\n");
635 for(
i = 0;
i < n; ++
i ) {
636 for( j = 0; j < n; ++j ) {
637 sum += ls[
i * n + j];
641 if( 1.2345 == sum ) {
642 fprintf(
stderr,
"Side-effect to disable dead code elimination by the compiler. Please ignore.\n");
647void exec_half_norm(
int EventSet, FILE *
fp ) {
656 xh = malloc(
MAXDIM *
sizeof(half) );
659 for ( n = 0; n <
MAXDIM; n++ ) {
661 for (
i = 0;
i < n;
i++ ) {
662 xh[
i] = ((half)random())/((half)RAND_MAX) * (half)1.1;
671 normalize_half( n, xh );
677 keep_half_vec_res( n, xh );
684void exec_half_cholesky(
int EventSet, FILE *
fp ) {
687 half *ah=NULL, *lh=NULL;
698 for ( n = 0; n <
MAXDIM; n++ ) {
700 for (
i = 0;
i < n;
i++ ) {
701 for ( j = 0; j <
i; j++ ) {
705 ah[
i * n + j] = ((half)random())/((half)RAND_MAX) * (half)1.1;
706 ah[j * n +
i] = ah[
i * n + j];
713 for (
i = 0;
i < n;
i++ ) {
715 for ( j = 0; j < n; j++ ) {
716 sumh += fabs(ah[
i * n + j]);
718 ah[
i * n +
i] = sumh + (half)1.1;
727 cholesky_half( n, lh, ah );
733 keep_half_mat_res( n, lh );
740void exec_half_gemm(
int EventSet, FILE *
fp ) {
743 half *ah=NULL, *bh=NULL, *ch=NULL;
754 for ( n = 0; n <
MAXDIM; n++ ) {
756 for (
i = 0;
i < n;
i++ ) {
757 for ( j = 0; j < n; j++ ) {
759 ah[
i * n + j] = ((half)random())/((half)RAND_MAX) * (half)1.1;
760 bh[
i * n + j] = ((half)random())/((half)RAND_MAX) * (half)1.1;
770 gemm_half( n, ch, ah, bh );
776 keep_half_mat_res( n, ch );
784void keep_half_vec_res(
int n, half *xh ) {
788 for(
i = 0;
i < n; ++
i ) {
792 if( 1.2345 == sum ) {
793 fprintf(
stderr,
"Side-effect to disable dead code elimination by the compiler. Please ignore.\n");
797void keep_half_mat_res(
int n, half *lh ) {
801 for(
i = 0;
i < n; ++
i ) {
802 for( j = 0; j < n; ++j ) {
803 sum += lh[
i * n + j];
807 if( 1.2345 == sum ) {
808 fprintf(
stderr,
"Side-effect to disable dead code elimination by the compiler. Please ignore.\n");
845 const char *sufx =
".flops";
850 int l = strlen(outdir)+strlen(papi_event_name)+strlen(sufx);
851 if (NULL == (papiFileName = (
char *)calloc( 1+l,
sizeof(
char)))) {
854 if (l != (sprintf(papiFileName,
"%s%s%s", outdir, papi_event_name, sufx))) {
857 if (NULL == (ofp_papi = fopen(papiFileName,
"w"))) {
858 fprintf(
stderr,
"Failed to open file %s.\n", papiFileName);
add PAPI preset or native hardware event by name to an EventSet
Empty and destroy an EventSet.
Create a new empty PAPI EventSet.
Empty and destroy an EventSet.
Start counting hardware events in an event set.
Stop counting hardware events in an event set.
void keep_single_vec_res(int n, float *xs)
void resultline(int i, int kernel, int EventSet, FILE *fp)
void cholesky_single(int n, float *ls, float *as)
void cholesky_double(int n, double *ld, double *ad)
void exec_double_gemm(int EventSet, FILE *fp)
void exec_double_norm(int EventSet, FILE *fp)
void flops_driver(char *papi_event_name, hw_desc_t *hw_desc, char *outdir)
void gemm_single(int n, float *cs, float *as, float *bs)
void keep_single_mat_res(int n, float *ls)
void exec_single_norm(int EventSet, FILE *fp)
float normalize_single(int n, float *xs)
void gemm_double(int n, double *cd, double *ad, double *bd)
void exec_single_cholesky(int EventSet, FILE *fp)
double normalize_double(int n, double *xd)
void print_header(FILE *fp, char *prec, char *kernel)
void keep_double_vec_res(int n, double *xd)
void keep_double_mat_res(int n, double *ld)
void exec_double_cholesky(int EventSet, FILE *fp)
void exec_flops(int precision, int EventSet, FILE *fp)
void exec_single_gemm(int EventSet, FILE *fp)
Return codes and api definitions.
int fclose(FILE *__stream)