Include dependency graph for flops.c:

Macros
#define	_GNU_SOURCE

#define	DOUBLE 2

#define	SINGLE 1

#define	HALF 0

#define	CHOLESKY 3

#define	GEMM 2

#define	NORMALIZE 1

#define	MAXDIM 51

#define	FMA 0

Functions
void	print_header (FILE fp, char prec, char *kernel)

void	resultline (int i, int kernel, int EventSet, FILE *fp)

void	exec_flops (int precision, int EventSet, FILE *fp)

double	normalize_double (int n, double *xd)

void	cholesky_double (int n, double ld, double ad)

void	exec_double_norm (int EventSet, FILE *fp)

void	exec_double_cholesky (int EventSet, FILE *fp)

void	exec_double_gemm (int EventSet, FILE *fp)

void	keep_double_vec_res (int n, double *xd)

void	keep_double_mat_res (int n, double *ld)

float	normalize_single (int n, float *xs)

void	cholesky_single (int n, float ls, float as)

void	exec_single_norm (int EventSet, FILE *fp)

void	exec_single_cholesky (int EventSet, FILE *fp)

void	exec_single_gemm (int EventSet, FILE *fp)

void	keep_single_vec_res (int n, float *xs)

void	keep_single_mat_res (int n, float *ls)

void	gemm_single (int n, float cs, float as, float *bs)

void	gemm_double (int n, double cd, double ad, double *bd)

void	flops_driver (char papi_event_name, hw_desc_t hw_desc, char *outdir)

Macro Definition Documentation

◆ _GNU_SOURCE

#define _GNU_SOURCE

Definition at line 1 of file flops.c.

◆ CHOLESKY

#define CHOLESKY 3

Definition at line 14 of file flops.c.

◆ DOUBLE

#define DOUBLE 2

Definition at line 10 of file flops.c.

◆ FMA

#define FMA 0

Definition at line 25 of file flops.c.

◆ GEMM

#define GEMM 2

Definition at line 15 of file flops.c.

◆ HALF

#define HALF 0

Definition at line 12 of file flops.c.

◆ MAXDIM

#define MAXDIM 51

Definition at line 18 of file flops.c.

◆ NORMALIZE

#define NORMALIZE 1

Definition at line 16 of file flops.c.

◆ SINGLE

#define SINGLE 1

Definition at line 11 of file flops.c.

Function Documentation

◆ cholesky_double()

void cholesky_double	(	int	n,
		double *	ld,
		double *	ad
	)

Definition at line 276 of file flops.c.

                                                      {
 
    int i, j, k;
    double sum = 0.0;
 
    for (i = 0; i < n; i++) {
        for (j = 0; j <= i; j++) {
            sum = 0.0;
            for (k = 0; k < j; k++) {
                sum += ld[i * n + k] * ld[j * n + k];
            }
 
            if( i == j ) {
                ld[i * n + j] = sqrt(ad[i * n + i] - sum);
            } else {
                ld[i * n + j] = ((double)1.0)/ld[j * n + j] * (ad[i * n + j] - sum);
            }
        }
    }
}

Here is the caller graph for this function:

◆ cholesky_single()

void cholesky_single	(	int	n,
		float *	ls,
		float *	as
	)

Definition at line 219 of file flops.c.

                                                    {
 
    int i, j, k;
    float sum = 0.0;
 
    for (i = 0; i < n; i++) {
        for (j = 0; j <= i; j++) {
            sum = 0.0;
            for (k = 0; k < j; k++) {
                sum += ls[i * n + k] * ls[j * n + k];
            }
 
            if( i == j ) {
                ls[i * n + j] = sqrtf(as[i * n + i] - sum);
            } else {
                ls[i * n + j] = ((float)1.0)/ls[j * n + j] * (as[i * n + j] - sum);
            }
        }
    }
}

Here is the caller graph for this function:

◆ exec_double_cholesky()

void exec_double_cholesky	(	int	EventSet,
		FILE *	fp
	)

Definition at line 353 of file flops.c.

                                                    {
 
    int i, j, n, retval;
    double *ad=NULL, *ld=NULL;
    double sumd = 0.0;
 
    /* Print info about the computational kernel. */
    print_header( fp, "Double-Precision", "Cholesky Decomposition" );
 
    /* Allocate the matrices. */
    ad = malloc( MAXDIM * MAXDIM * sizeof(double) );
    ld = malloc( MAXDIM * MAXDIM * sizeof(double) );
 
    /* Step through the different array sizes. */
    for ( n = 0; n < MAXDIM; n++ ) {
        /* Initialize the needed arrays at this size. */
        for ( i = 0; i < n; i++ ) {
            for ( j = 0; j < i; j++ ) {
                ld[i * n + j] = 0.0;
                ld[j * n + i] = 0.0;
 
                ad[i * n + j] = ((double)random())/((double)RAND_MAX) * (double)1.1;
                ad[j * n + i] = ad[i * n + j];
            }
            ad[i * n + i] = 0.0;
            ld[i * n + i] = 0.0;
        }
 
        /* Guarantee diagonal dominance for successful Cholesky. */
        for ( i = 0; i < n; i++ ) {
            sumd = 0.0;
            for ( j = 0; j < n; j++ ) {
                sumd += fabs(ad[i * n + j]);
            }
            ad[i * n + i] = sumd + (double)1.1;
        }
 
        /* Reset PAPI count. */
        if ( (retval = PAPI_start( EventSet )) != PAPI_OK ) {
            return;
        }
 
        /* Run the kernel. */
        cholesky_double( n, ld, ad );
        usleep(1);
 
        /* Stop and print count. */
        resultline( n, CHOLESKY, EventSet, fp );
 
        keep_double_mat_res( n, ld );
    }
 
    free( ad );
    free( ld );
}

Here is the call graph for this function:

Here is the caller graph for this function:

◆ exec_double_gemm()

void exec_double_gemm	(	int	EventSet,
		FILE *	fp
	)

Definition at line 409 of file flops.c.

                                                {
 
    int i, j, n, retval;
    double *ad=NULL, *bd=NULL, *cd=NULL;
 
    /* Print info about the computational kernel. */
    print_header( fp, "Double-Precision", "GEMM" );
 
    /* Allocate the matrices. */
    ad = malloc( MAXDIM * MAXDIM * sizeof(double) );
    bd = malloc( MAXDIM * MAXDIM * sizeof(double) );
    cd = malloc( MAXDIM * MAXDIM * sizeof(double) );
 
    /* Step through the different array sizes. */
    for ( n = 0; n < MAXDIM; n++ ) {
        /* Initialize the needed arrays at this size. */
        for ( i = 0; i < n; i++ ) {
            for ( j = 0; j < n; j++ ) {
                cd[i * n + j] = 0.0;
                ad[i * n + j] = ((double)random())/((double)RAND_MAX) * (double)1.1;
                bd[i * n + j] = ((double)random())/((double)RAND_MAX) * (double)1.1;
            }
        }
 
        /* Reset PAPI count. */
        if ( (retval = PAPI_start( EventSet )) != PAPI_OK ) {
            return;
        }
 
        /* Run the kernel. */
        gemm_double( n, cd, ad, bd );
        usleep(1);
 
        /* Stop and print count. */
        resultline( n, GEMM, EventSet, fp );
 
        keep_double_mat_res( n, cd );
    }
 
    free( ad );
    free( bd );
    free( cd );
}

Here is the call graph for this function:

Here is the caller graph for this function:

◆ exec_double_norm()

void exec_double_norm	(	int	EventSet,
		FILE *	fp
	)

Definition at line 316 of file flops.c.

                                                {
 
    int i, n, retval;
    double *xd=NULL;
 
    /* Print info about the computational kernel. */
    print_header( fp, "Double-Precision", "Vector Normalization" );
 
    /* Allocate the linear arrays. */
    xd = malloc( MAXDIM * sizeof(double) );
 
    /* Step through the different array sizes. */
    for ( n = 0; n < MAXDIM; n++ ) {
        /* Initialize the needed arrays at this size. */
        for ( i = 0; i < n; i++ ) {
            xd[i] = ((double)random())/((double)RAND_MAX) * (double)1.1;
        }
 
        /* Reset PAPI count. */
        if ( (retval = PAPI_start( EventSet )) != PAPI_OK ) {
            return;
        }
 
        /* Run the kernel. */
        normalize_double( n, xd );
        usleep(1);
 
        /* Stop and print count. */
        resultline( n, NORMALIZE, EventSet, fp );
 
        keep_double_vec_res( n, xd );
    }
 
    /* Free dynamically allocated memory. */
    free( xd );
}

Here is the call graph for this function:

Here is the caller graph for this function:

◆ exec_flops()

void exec_flops	(	int	precision,
		int	EventSet,
		FILE *	fp
	)

Definition at line 813 of file flops.c.

                                                         {
 
    /* Vector Normalization and Cholesky Decomposition tests. */
    switch(precision) {
      case DOUBLE:
          exec_double_norm(EventSet, fp);
          exec_double_cholesky(EventSet, fp);
          exec_double_gemm(EventSet, fp);
          break;
      case SINGLE:
          exec_single_norm(EventSet, fp);
          exec_single_cholesky(EventSet, fp);
          exec_single_gemm(EventSet, fp);
          break;
      case HALF:
#if defined(ARM)
          exec_half_norm(EventSet, fp);
          exec_half_cholesky(EventSet, fp);
          exec_half_gemm(EventSet, fp);
#endif
          break;
      default:
          ;
    }
 
    return;
}

Here is the call graph for this function:

Here is the caller graph for this function:

◆ exec_single_cholesky()

void exec_single_cholesky	(	int	EventSet,
		FILE *	fp
	)

Definition at line 518 of file flops.c.

                                                    {
 
    int i, j, n, retval;
    float *as=NULL, *ls=NULL;
    float sums = 0.0;
 
    /* Print info about the computational kernel. */
    print_header( fp, "Single-Precision", "Cholesky Decomposition" );
 
    /* Allocate the matrices. */
    as = malloc( MAXDIM * MAXDIM * sizeof(float) );
    ls = malloc( MAXDIM * MAXDIM * sizeof(float) );
 
    /* Step through the different array sizes. */
    for ( n = 0; n < MAXDIM; n++ ) {
        /* Initialize the needed arrays at this size. */
        for ( i = 0; i < n; i++ ) {
            for ( j = 0; j < i; j++ ) {
                ls[i * n + j] = 0.0;
                ls[j * n + i] = 0.0;
 
                as[i * n + j] = ((float)random())/((float)RAND_MAX) * (float)1.1;
                as[j * n + i] = as[i * n + j];
            }
            as[i * n + i] = 0.0;
            ls[i * n + i] = 0.0;
        }
 
        /* Guarantee diagonal dominance for successful Cholesky. */
        for ( i = 0; i < n; i++ ) {
            sums = 0.0;
            for ( j = 0; j < n; j++ ) {
                sums += fabs(as[i * n + j]);
            }
            as[i * n + i] = sums + (float)1.1;
        }
 
        /* Reset PAPI count. */
        if ( (retval = PAPI_start( EventSet )) != PAPI_OK ) {
            return;
        }
 
        /* Run the kernel. */
        cholesky_single( n, ls, as );
        usleep(1);
 
        /* Stop and print count. */
        resultline( n, CHOLESKY, EventSet, fp );
 
        keep_single_mat_res( n, ls );
    }
 
    free( as );
    free( ls );
}

Here is the call graph for this function:

Here is the caller graph for this function:

◆ exec_single_gemm()

void exec_single_gemm	(	int	EventSet,
		FILE *	fp
	)

Definition at line 574 of file flops.c.

                                                {
 
    int i, j, n, retval;
    float *as=NULL, *bs=NULL, *cs=NULL;
 
    /* Print info about the computational kernel. */
    print_header( fp, "Single-Precision", "GEMM" );
 
    /* Allocate the matrices. */
    as = malloc( MAXDIM * MAXDIM * sizeof(float) );
    bs = malloc( MAXDIM * MAXDIM * sizeof(float) );
    cs = malloc( MAXDIM * MAXDIM * sizeof(float) );
 
    /* Step through the different array sizes. */
    for ( n = 0; n < MAXDIM; n++ ) {
        /* Initialize the needed arrays at this size. */
        for ( i = 0; i < n; i++ ) {
            for ( j = 0; j < n; j++ ) {
                cs[i * n + j] = 0.0;
                as[i * n + j] = ((float)random())/((float)RAND_MAX) * (float)1.1;
                bs[i * n + j] = ((float)random())/((float)RAND_MAX) * (float)1.1;
            }
        }
 
        /* Reset PAPI count. */
        if ( (retval = PAPI_start( EventSet )) != PAPI_OK ) {
            return;
        }
 
        /* Run the kernel. */
        gemm_single( n, cs, as, bs );
        usleep(1);
 
        /* Stop and print count. */
        resultline( n, GEMM, EventSet, fp );
 
        keep_single_mat_res( n, cs );
    }
 
    free( as );
    free( bs );
    free( cs );
}

Here is the call graph for this function:

Here is the caller graph for this function:

◆ exec_single_norm()

void exec_single_norm	(	int	EventSet,
		FILE *	fp
	)

Definition at line 481 of file flops.c.

                                                {
 
    int i, n, retval;
    float *xs=NULL;
 
    /* Print info about the computational kernel. */
    print_header( fp, "Single-Precision", "Vector Normalization" );
 
    /* Allocate the linear arrays. */
    xs = malloc( MAXDIM * sizeof(float) );
 
    /* Step through the different array sizes. */
    for ( n = 0; n < MAXDIM; n++ ) {
        /* Initialize the needed arrays at this size. */
        for ( i = 0; i < n; i++ ) {
            xs[i] = ((float)random())/((float)RAND_MAX) * (float)1.1;
        }
 
        /* Reset PAPI count. */
        if ( (retval = PAPI_start( EventSet )) != PAPI_OK ) {
            return;
        }
 
        /* Run the kernel. */
        normalize_single( n, xs );
        usleep(1);
 
        /* Stop and print count. */
        resultline( n, NORMALIZE, EventSet, fp );
 
        keep_single_vec_res( n, xs );
    }
 
    /* Free dynamically allocated memory. */
    free( xs );
}

Here is the call graph for this function:

Here is the caller graph for this function:

◆ flops_driver()

void flops_driver	(	char *	papi_event_name,
		hw_desc_t *	hw_desc,
		char *	outdir
	)

Definition at line 841 of file flops.c.

                                                                             {
    int retval = PAPI_OK;
    int EventSet = PAPI_NULL;
    FILE* ofp_papi;
    const char *sufx = ".flops";
    char *papiFileName;
 
    (void)hw_desc;
 
    int l = strlen(outdir)+strlen(papi_event_name)+strlen(sufx);
    if (NULL == (papiFileName = (char *)calloc( 1+l, sizeof(char)))) {
        return;
    }
    if (l != (sprintf(papiFileName, "%s%s%s", outdir, papi_event_name, sufx))) {
        goto error0;
    }
    if (NULL == (ofp_papi = fopen(papiFileName,"w"))) {
        fprintf(stderr, "Failed to open file %s.\n", papiFileName);
        goto error0;
    }
  
    retval = PAPI_create_eventset( &EventSet );
    if (retval != PAPI_OK ){
        goto error1;
    }
 
    retval = PAPI_add_named_event( EventSet, papi_event_name );
    if (retval != PAPI_OK ){
        goto error1;
    }
 
    exec_flops(HALF,   EventSet, ofp_papi);
    exec_flops(SINGLE, EventSet, ofp_papi);
    exec_flops(DOUBLE, EventSet, ofp_papi);
 
    retval = PAPI_cleanup_eventset( EventSet );
    if (retval != PAPI_OK ){
        goto error1;
    }
    retval = PAPI_destroy_eventset( &EventSet );
    if (retval != PAPI_OK ){
        goto error1;
    }
 
error1:
    fclose(ofp_papi);
error0:
    free(papiFileName);
    return;
}

Here is the call graph for this function:

Here is the caller graph for this function:

◆ gemm_double()

void gemm_double	(	int	n,
		double *	cd,
		double *	ad,
		double *	bd
	)

Definition at line 298 of file flops.c.

                                                              {
 
    int i, j, k;
    DP_SCALAR_TYPE argI, argJ, argK;
 
    for (i = 0; i < n; i++) {
        for (j = 0; j < n; j++) {
            argK = SET_VEC_SD(0.0);
            for (k = 0; k < n; k++) {
                argI = SET_VEC_SD(ad[i * n + k]);
                argJ = SET_VEC_SD(bd[k * n + j]);
                FMA_VEC_SD(argK, argI, argJ, argK);
            }
            cd[i * n + j] = ((double*)&argK)[0];
        }
    }
}

Here is the caller graph for this function:

◆ gemm_single()

void gemm_single	(	int	n,
		float *	cs,
		float *	as,
		float *	bs
	)

Definition at line 240 of file flops.c.

                                                           {
 
    int i, j, k;
    SP_SCALAR_TYPE argI, argJ, argK;
 
    for (i = 0; i < n; i++) {
        for (j = 0; j < n; j++) {
            argK = SET_VEC_SS(0.0);
            for (k = 0; k < n; k++) {
                argI = SET_VEC_SS(as[i * n + k]);
                argJ = SET_VEC_SS(bs[k * n + j]);
                FMA_VEC_SS(argK, argI, argJ, argK);
            }
            cs[i * n + j] = ((float*)&argK)[0];
        }
    }
}

Here is the caller graph for this function:

◆ keep_double_mat_res()

void keep_double_mat_res	(	int	n,
		double *	ld
	)

Definition at line 466 of file flops.c.

                                              {
 
    int i, j;
    double sum = 0.0;
    for( i = 0; i < n; ++i ) {
        for( j = 0; j < n; ++j ) {
            sum += ld[i * n + j];
        }
    }
    
    if( 1.2345 == sum ) {
        fprintf(stderr, "Side-effect to disable dead code elimination by the compiler. Please ignore.\n");
    }
}

Here is the caller graph for this function:

◆ keep_double_vec_res()

void keep_double_vec_res	(	int	n,
		double *	xd
	)

Definition at line 453 of file flops.c.

                                              {
 
    int i;
    double sum = 0.0;
    for( i = 0; i < n; ++i ) {
        sum += xd[i];
    }
    
    if( 1.2345 == sum ) {
        fprintf(stderr, "Side-effect to disable dead code elimination by the compiler. Please ignore.\n");
    }
}

Here is the caller graph for this function:

◆ keep_single_mat_res()

void keep_single_mat_res	(	int	n,
		float *	ls
	)

Definition at line 631 of file flops.c.

                                             {
 
    int i, j;
    float sum = 0.0;
    for( i = 0; i < n; ++i ) {
        for( j = 0; j < n; ++j ) {
            sum += ls[i * n + j];
        }
    }
    
    if( 1.2345 == sum ) {
        fprintf(stderr, "Side-effect to disable dead code elimination by the compiler. Please ignore.\n");
    }
}

Here is the caller graph for this function:

◆ keep_single_vec_res()

void keep_single_vec_res	(	int	n,
		float *	xs
	)

Definition at line 618 of file flops.c.

                                             {
 
    int i;
    float sum = 0.0;
    for( i = 0; i < n; ++i ) {
        sum += xs[i];
    }
    
    if( 1.2345 == sum ) {
        fprintf(stderr, "Side-effect to disable dead code elimination by the compiler. Please ignore.\n");
    }
}

Here is the caller graph for this function:

◆ normalize_double()

double normalize_double	(	int	n,
		double *	xd
	)

Definition at line 258 of file flops.c.

                                             {
 
    if ( 0 == n )
        return 0.0;
 
    double aa = 0.0;
    int i;
 
    for ( i = 0; i < n; i++ )
        aa = aa + xd[i] * xd[i];
 
    aa = sqrt(aa);
    for ( i = 0; i < n; i++ )
        xd[i] = xd[i]/aa;
 
    return ( aa );
}

Here is the caller graph for this function:

◆ normalize_single()

float normalize_single	(	int	n,
		float *	xs
	)

Definition at line 201 of file flops.c.

                                           {
 
    if ( 0 == n )
        return 0.0;
 
    float aa = 0.0;
    int i;
 
    for ( i = 0; i < n; i++ )
        aa = aa + xs[i] * xs[i];
 
    aa = sqrtf(aa);
    for ( i = 0; i < n; i++ )
        xs[i] = xs[i]/aa;
 
    return ( aa );
}

Here is the caller graph for this function:

◆ print_header()

void print_header	(	FILE *	fp,
		char *	prec,
		char *	kernel
	)

Definition at line 59 of file flops.c.

                                                        {
 
    fprintf(fp, "#%s %s\n", prec, kernel);
    fprintf(fp, "#N RawEvtCnt NormdEvtCnt ExpectedAdd ExpectedSub ExpectedMul ExpectedDiv ExpectedSqrt ExpectedFMA ExpectedTotal\n");
}

Here is the caller graph for this function:

◆ resultline()

void resultline	(	int	i,
		int	kernel,
		int	EventSet,
		FILE *	fp
	)

Definition at line 65 of file flops.c.

                                                             {
 
    long long flpins = 0, denom;
    long long papi, all, add, sub, mul, div, sqrt, fma;
    int retval;
 
    if ( (retval=PAPI_stop(EventSet, &flpins)) != PAPI_OK ) {
        return;
    }
 
    switch(kernel) {
      case NORMALIZE:
          all  = 3*i+1;
          denom = all;
          add  = i;
          sub  = 0;
          mul  = i;
          div  = i;
          if ( 0 == i ) {
              sqrt = 0;
          } else {
              sqrt = 1;
          }
          fma  = 0;
          break;
      case GEMM:
          all  = 2*i*i*i;
          if ( 0 == i ) {
              denom = 1;
          } else {
              denom = all;
          }
          add  = 0;
          sub  = 0;
          mul  = 0;
          div  = 0;
          sqrt = 0;
          fma  = i*i*i; // Need to derive.
          break;
      case CHOLESKY:
          all  = i*(2*i*i+9*i+1)/6.0;
          if ( 0 == i ) {
              denom = 1;
          } else {
              denom = all;
          }
          add  = i*(i-1)*(i+1)/6.0;
          sub  = i*(i+1)/2.0;
          mul  = i*(i-1)*(i+4)/6.0;
          div  = i*(i-1)/2.0;
          sqrt = i;
          fma  = 0;
          break;
      default:
          all   = -1;
          denom = -1;
          add   = -1;
          sub   = -1;
          mul   = -1;
          div   = -1;
          sqrt  = -1;
          fma   = -1;
    }
 
    papi = flpins << FMA;
 
    fprintf(fp, "%d %lld %.17g %lld %lld %lld %lld %lld %lld %lld\n", i, papi, ((double)papi)/((double)denom), add, sub, mul, div, sqrt, fma, all);
}

Here is the caller graph for this function:

Macros

Functions

Macro Definition Documentation

◆ _GNU_SOURCE

◆ CHOLESKY

◆ DOUBLE

◆ FMA

◆ GEMM

◆ HALF

◆ MAXDIM

◆ NORMALIZE

◆ SINGLE

Function Documentation

◆ cholesky_double()

◆ cholesky_single()

◆ exec_double_cholesky()

◆ exec_double_gemm()

◆ exec_double_norm()

◆ exec_flops()

◆ exec_single_cholesky()

◆ exec_single_gemm()

◆ exec_single_norm()

◆ flops_driver()

◆ gemm_double()

◆ gemm_single()

◆ keep_double_mat_res()

◆ keep_double_vec_res()

◆ keep_single_mat_res()

◆ keep_single_vec_res()

◆ normalize_double()

◆ normalize_single()

◆ print_header()

◆ resultline()