16int main(
int argc,
char*argv[])
18 int cmbtotal = 0, ct = 0, track = 0, ret = 0;
19 int i, nevts = 0, status;
20 int *cards = NULL, *indexmemo = NULL;
21 char **allevts = NULL, **basenames = NULL;
24 int nprocs = 1,
myid = 0;
27 MPI_Init(&argc, &argv);
28 MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
29 MPI_Comm_rank(MPI_COMM_WORLD, &
myid);
62 fprintf(
stderr,
"Could not initialize event stock. Exiting...\n");
88 for(
i = 0;
i < ct; ++
i)
95 fprintf(
stderr,
"Could not populate event stock. Exiting...\n");
104 cmbtotal =
check_cards(params, &indexmemo, basenames, cards, ct, nevts, data);
111 for(
i = 0;
i < ct; ++
i)
123 if (NULL == (allevts = (
char**)malloc(cmbtotal*
sizeof(
char*)))) {
124 fprintf(
stderr,
"Failed to allocate memory.\n");
132 char *conf_file_name =
".cat_cfg";
141 int numSetThreads = 1;
142 char* envVarDefined = getenv(
"OMP_NUM_THREADS");
143 if (NULL == envVarDefined) {
144 omp_set_num_threads(hw_desc->
numcpus);
146 #pragma omp parallel default(shared)
148 if(!omp_get_thread_num()) {
149 numSetThreads = omp_get_num_threads();
153 if (numSetThreads != hw_desc->
numcpus) {
154 fprintf(
stderr,
"Warning! Failed to set default number of threads to number of CPUs in a single socket.\n");
166 for(
i = 0;
i < ct; ++
i)
174 for(
i = 0;
i < cmbtotal; ++
i)
184 MPI_Barrier(MPI_COMM_WORLD);
192 return omp_get_thread_num();
198 int i, j, minim, n, cmbtotal = 0;
200 int mode = params.
mode;
207 if (NULL == ((*indexmemo) = (
int*)malloc(ct*
sizeof(
int)))) {
208 fprintf(
stderr,
"Failed to allocate memory.\n");
214 for(
i = 0;
i < ct; ++
i)
216 if(NULL == basenames[
i])
218 (*indexmemo)[
i] = -1;
224 for(j = 0; j < nevts; ++j)
227 if(strcmp(basenames[
i],
name) == 0)
236 if(cards[
i] != 0 && j == nevts)
238 fprintf(
stderr,
"The provided event '%s' is either not in the architecture or contains qualifiers.\n" \
239 "If the latter, use '0' in place of the provided '%d'.\n", basenames[
i], cards[
i]);
246 fprintf(
stderr,
"The qualifier count (provided for event '%s') cannot be negative.\n", basenames[
i]);
254 for(
i = 0;
i < ct; ++
i)
264 if((*indexmemo)[
i] != -1)
277 if(cards[
i] > n || cards[
i] < 0)
281 cmbtotal +=
comb(n, minim);
287 for(
i = 0;
i < nevts; ++
i)
300 cmbtotal +=
comb(n, minim);
330 if( NULL != meminfo ) {
335 for ( j = 0; j < 2; j++ ) {
361 size_t linelen=0, len;
366 int ret_val = (
int)getline(&line, &linelen, input);
371 pos = strchr(line,
'#');
377 pos = strchr(line,
'=');
382 len = strcspn(line,
" =");
383 *
key = (
char *)calloc((1+len),
sizeof(char));
384 strncpy(*
key, line, len);
387 status = sscanf(pos,
"= %lld", value);
389 fprintf(
stderr,
"Malformed line in conf file: '%s'\n", line);
409 input = fopen(conf_file_name,
"r");
422 }
else if( ret_val > 0 ){
428 if( !strcmp(
key,
"AUTO_DISCOVERY_MODE") && (value == 1) ){
431 }
else if( !strcmp(
key,
"L1_DCACHE_LINE_SIZE") || !strcmp(
key,
"L1_UCACHE_LINE_SIZE") ){
433 }
else if( !strcmp(
key,
"L2_DCACHE_LINE_SIZE") || !strcmp(
key,
"L2_UCACHE_LINE_SIZE") ){
435 }
else if( !strcmp(
key,
"L3_DCACHE_LINE_SIZE") || !strcmp(
key,
"L3_UCACHE_LINE_SIZE") ){
437 }
else if( !strcmp(
key,
"L4_DCACHE_LINE_SIZE") || !strcmp(
key,
"L4_UCACHE_LINE_SIZE") ){
439 }
else if( !strcmp(
key,
"L1_DCACHE_SIZE") || !strcmp(
key,
"L1_UCACHE_SIZE") ){
442 }
else if( !strcmp(
key,
"L2_DCACHE_SIZE") || !strcmp(
key,
"L2_UCACHE_SIZE") ){
445 }
else if( !strcmp(
key,
"L3_DCACHE_SIZE") || !strcmp(
key,
"L3_UCACHE_SIZE") ){
448 }
else if( !strcmp(
key,
"L4_DCACHE_SIZE") || !strcmp(
key,
"L4_UCACHE_SIZE") ){
452 }
else if( !strcmp(
key,
"L1_ICACHE_LINE_SIZE") || !strcmp(
key,
"L1_UCACHE_LINE_SIZE") ){
454 }
else if( !strcmp(
key,
"L2_ICACHE_LINE_SIZE") || !strcmp(
key,
"L2_UCACHE_LINE_SIZE") ){
456 }
else if( !strcmp(
key,
"L3_ICACHE_LINE_SIZE") || !strcmp(
key,
"L3_UCACHE_LINE_SIZE") ){
458 }
else if( !strcmp(
key,
"L4_ICACHE_LINE_SIZE") || !strcmp(
key,
"L4_UCACHE_LINE_SIZE") ){
460 }
else if( !strcmp(
key,
"L1_ICACHE_SIZE") || !strcmp(
key,
"L1_UCACHE_SIZE") ){
462 }
else if( !strcmp(
key,
"L2_ICACHE_SIZE") || !strcmp(
key,
"L2_UCACHE_SIZE") ){
464 }
else if( !strcmp(
key,
"L3_ICACHE_SIZE") || !strcmp(
key,
"L3_UCACHE_SIZE") ){
466 }
else if( !strcmp(
key,
"L4_ICACHE_SIZE") || !strcmp(
key,
"L4_UCACHE_SIZE") ){
468 }
else if( !strcmp(
key,
"L1_SPLIT") ){
469 hw_desc->
split[0] = value;
470 }
else if( !strcmp(
key,
"L2_SPLIT") ){
471 hw_desc->
split[1] = value;
472 }
else if( !strcmp(
key,
"L3_SPLIT") ){
473 hw_desc->
split[2] = value;
474 }
else if( !strcmp(
key,
"L4_SPLIT") ){
475 hw_desc->
split[3] = value;
476 }
else if( !strcmp(
key,
"MM_SPLIT") ){
478 }
else if( !strcmp(
key,
"PTS_PER_L1") ){
480 }
else if( !strcmp(
key,
"PTS_PER_L2") ){
482 }
else if( !strcmp(
key,
"PTS_PER_L3") ){
484 }
else if( !strcmp(
key,
"PTS_PER_L4") ){
486 }
else if( !strcmp(
key,
"PTS_PER_MM") ){
488 }
else if( !strcmp(
key,
"MAX_PPB") ){
500int setup_evts(
char* inputfile,
char*** basenames,
int** evnt_cards)
503 int cnt = 0, status = 0;
504 char *line = NULL, *place;
506 int evnt_count = 256;
508 char **
names = (
char **)calloc(evnt_count,
sizeof(
char *));
509 int *cards = (
int *)calloc(evnt_count,
sizeof(
int));
511 if (NULL ==
names || NULL == cards) {
512 fprintf(
stderr,
"Failed to allocate memory.\n");
517 input = fopen(inputfile,
"r");
520 ssize_t ret_val = getline(&line, &linelen, input);
523 if( cnt >= evnt_count )
526 names = realloc(
names, evnt_count*
sizeof(
char *));
527 cards = realloc(cards, evnt_count*
sizeof(
int));
529 if (NULL ==
names || NULL == cards) {
530 fprintf(
stderr,
"Failed to allocate memory.\n");
535 place = strstr(line,
" ");
538 if(strlen(line) > 0 && line[0] ==
'#') {
547 }
else if( NULL == place ) {
548 fprintf(
stderr,
"problem with line: '%s'\n",line);
560 status = sscanf(line,
"%ms %d", &(
names[cnt]), &(cards[cnt]) );
565 fprintf(
stderr,
"problem with line: '%s'\n",line);
585void combine_qualifiers(
int n,
int pk,
int ct,
char** list,
char*
name,
char** allevts,
int* track,
int flag,
int* bitmap)
592 original = bitmap[ct];
598 for(
i = 0;
i < n; ++
i)
600 counter += bitmap[
i];
622 size_t evtsize = strlen(
name)+1;
623 for(
i = 0;
i < n; ++
i)
628 evtsize += strlen(list[
i])+1;
632 if (NULL == (chunk = (
char*)malloc((evtsize+1)*
sizeof(
char)))) {
633 fprintf(
stderr,
"Failed to allocate memory.\n");
638 for(
i = 0;
i < n; ++
i)
643 strcat(chunk,list[
i]);
648 allevts[*track] = strdup(chunk);
656 bitmap[ct] = original;
662void trav_evts(
evstock* stock,
int pk,
int* cards,
int nevts,
int selexnsize,
int mode,
char** allevts,
int* track,
int* indexmemo,
char** basenames)
665 char** chosen = NULL;
672 for(
i = 0;
i < selexnsize; ++
i)
697 chosen = (
char**)malloc(n*
sizeof(
char*));
698 bitmap = (
int*)calloc(n,
sizeof(
int));
700 if (NULL == chosen || NULL == bitmap) {
701 fprintf(
stderr,
"Failed to allocate memory.\n");
706 for(k = 0; k < n; ++k)
708 chosen[k] = strdup(stock->
evts[j][k]);
713 if (n!=0 && cards[
i]>0)
720 allevts[*track] = strdup(
name);
727 for(k = 0; k < n; ++k)
739 for(
i = 0;
i < nevts; ++
i)
749 chosen = (
char**)malloc(n*
sizeof(
char*));
750 bitmap = (
int*)calloc(n,
sizeof(
int));
752 if (NULL == chosen || NULL == bitmap) {
753 fprintf(
stderr,
"Failed to allocate memory.\n");
758 for(j = 0; j < n; ++j)
760 chosen[j] = strdup(stock->
evts[
i][j]);
771 allevts[*track] = strdup(
name);
776 for(j = 0; j < n; ++j)
795 for(
i = n;
i > diff; --
i)
812 printf(
"%3d%%\b\b\b\b",prg);
814 printf(
"%3d%%\n",prg);
822 printf(
"Total:%3d%% Current test: 0%%\b\b\b\b",prg);
824 printf(
"Total:%3d%%\n",prg);
832 int junk=((
int)getpid()+123)/456;
833 int low =
myid*(cmbtotal/nprocs);
834 int cap = (
myid+1)*(cmbtotal/nprocs);
835 int offset = nprocs*(1+cmbtotal/nprocs)-cmbtotal;
839 cap +=
myid-offset+1;
846 fprintf(
stderr,
"No events to measure.\n");
854 fprintf(
stderr,
"Warning: No benchmark specified. Running 'branch' by default.\n");
862 for(
i = low;
i < cap; ++
i)
866 if( allevts[
i] != NULL )
879 printf(
"D-Cache Latencies: 0%%\b\b\b\b");
886 if(params.
show_progress) printf(
"D-Cache Read Benchmarks: ");
887 for(
i = low;
i < cap; ++
i)
891 if( allevts[
i] != NULL ) {
906 printf(
"D-Cache Latencies: 0%%\b\b\b\b");
913 if(params.
show_progress) printf(
"D-Cache Write Benchmarks: ");
914 for(
i = low;
i < cap; ++
i)
918 if( allevts[
i] != NULL ) {
930 for(
i = low;
i < cap; ++
i)
934 if( allevts[
i] != NULL )
945 for(
i = low;
i < cap; ++
i)
949 if( allevts[
i] != NULL )
960 for(
i = low;
i < cap; ++
i)
964 if( allevts[
i] != NULL )
975 for(
i = low;
i < cap; ++
i)
979 if( allevts[
i] != NULL )
989 char *
name = argv[0];
1002 if( !strcmp(argv[0],
"-h") ){
1006 if( argc > 1 && !strcmp(argv[0],
"-k") ){
1011 fprintf(
stderr,
"Warning: Cannot pass a negative value to -k.\n");
1019 if( argc > 1 && !strcmp(argv[0],
"-n") ){
1025 if( argc > 1 && !strcmp(argv[0],
"-conf") ){
1031 if( argc > 1 && !strcmp(argv[0],
"-in") ){
1039 if( argc > 1 && !strcmp(argv[0],
"-out") ){
1045 if( !strcmp(argv[0],
"-verbose") ){
1049 if( !strcmp(argv[0],
"-quick") ){
1053 if( !strcmp(argv[0],
"-branch") ){
1057 if( !strcmp(argv[0],
"-dcr") ){
1061 if( !strcmp(argv[0],
"-dcw") ){
1065 if( !strcmp(argv[0],
"-flops") ){
1069 if( !strcmp(argv[0],
"-ic") ){
1073 if( !strcmp(argv[0],
"-vec") ){
1077 if( !strcmp(argv[0],
"-instr") ){
1092 fprintf(
stderr,
"Could not open %s. Exiting...\n", params->
inputfile);
1099 if(kflag == 1 && inflag == 1)
1101 fprintf(
stderr,
"Cannot use -k flag with -in flag. Exiting...\n");
1106 if(kflag == 0 && inflag == 0)
1115 fprintf(
stderr,
"Output path not provided. Exiting...\n");
1120 dirlen = strlen(
tmp);
1121 params->
outputdir = (
char*)malloc((2+dirlen)*
sizeof(char));
1124 fprintf(
stderr,
"Failed to allocate memory.\n");
1128 len = snprintf( params->
outputdir, 2+dirlen,
"%s/",
tmp);
1129 if( len < 1+dirlen )
1131 fprintf(
stderr,
"Problem with output directory name.\n");
1136 status = access(params->
outputdir, W_OK);
1139 fprintf(
stderr,
"Permission to write files to \"%s\" denied. Make sure the path exists and is writable.\n",
tmp);
1149 fprintf(
stdout,
"\nUsage: %s [OPTIONS...]\n",
name);
1151 fprintf(
stdout,
"\nRequired:\n");
1152 fprintf(
stdout,
" -out <path> Output files location.\n");
1153 fprintf(
stdout,
" -in <file> Events and cardinalities file.\n");
1154 fprintf(
stdout,
" -k <value> Cardinality of subsets.\n");
1155 fprintf(
stdout,
" Parameters \"-k\" and \"-in\" are mutually exclusive.\n");
1157 fprintf(
stdout,
"\nOptional:\n");
1158 fprintf(
stdout,
" -conf <path> Configuration file location.\n");
1159 fprintf(
stdout,
" -verbose Show benchmark progress in the standard output.\n");
1160 fprintf(
stdout,
" -quick Skip latency tests.\n");
1161 fprintf(
stdout,
" -n <value> Number of iterations for data cache kernels.\n");
1162 fprintf(
stdout,
" -branch Branch kernels.\n");
1163 fprintf(
stdout,
" -dcr Data cache reading kernels.\n");
1164 fprintf(
stdout,
" -dcw Data cache writing kernels.\n");
1165 fprintf(
stdout,
" -flops Floating point operations kernels.\n");
1166 fprintf(
stdout,
" -ic Instruction cache kernels.\n");
1167 fprintf(
stdout,
" -vec Vector FLOPs kernels.\n");
1168 fprintf(
stdout,
" -instr Instructions kernels.\n");
1171 fprintf(
stdout,
"EXAMPLE: %s -in event_list.txt -out OUTPUT_DIRECTORY -branch -dcw\n",
name);
const char * names[NUM_EVENTS]
void branch_driver(char *papi_event_name, int junk, hw_desc_t *hw_desc, char *outdir)
get information about the system hardware
initialize the PAPI library.
Finish using PAPI and free all related resources.
Returns a string describing the PAPI error code.
Initialize thread support in the PAPI library.
static pthread_t myid[NUM_THREADS]
void d_cache_driver(char *papi_event_name, cat_params_t params, hw_desc_t *hw_desc, int latency_only, int mode)
#define BENCH_DCACHE_WRITE
#define BENCH_DCACHE_READ
#define BENCH_ICACHE_READ
int num_evts(evstock *stock)
void remove_stock(evstock *stock)
int build_stock(evstock *stock)
int num_quals(evstock *stock, int base_evt)
char * evt_name(evstock *stock, int index)
void flops_driver(char *papi_event_name, hw_desc_t *hw_desc, char *outdir)
#define _MAX_SUPPORTED_CACHE_LEVELS
void i_cache_driver(char *papi_event_name, int junk, hw_desc_t *hw_desc, char *outdir, int show_progress)
void instr_driver(char *papi_event_name, hw_desc_t *hw_desc, char *outdir)
void testbench(char **allevts, int cmbtotal, hw_desc_t *hw_desc, cat_params_t params, int myid, int nprocs)
int setup_evts(char *inputfile, char ***basenames, int **evnt_cards)
static int parse_line(FILE *input, char **key, long long *value)
static void print_progress2(int prg)
void trav_evts(evstock *stock, int pk, int *cards, int nevts, int selexnsize, int mode, char **allevts, int *track, int *indexmemo, char **basenames)
static hw_desc_t * obtain_hardware_description(char *conf_file_name)
static void print_progress(int prg)
int check_cards(cat_params_t params, int **indexmemo, char **basenames, int *cards, int ct, int nevts, evstock *data)
void combine_qualifiers(int n, int pk, int ct, char **list, char *name, char **allevts, int *track, int flag, int *bitmap)
unsigned long int omp_get_thread_num_wrapper()
int parseArgs(int argc, char **argv, cat_params_t *params)
static void read_conf_file(char *conf_file_name, hw_desc_t *hw_desc)
Return codes and api definitions.
#define PAPI_MH_TYPE_DATA
#define PAPI_MH_CACHE_TYPE(a)
#define PAPI_MH_TYPE_INST
#define PAPI_MH_TYPE_UNIFIED
int fclose(FILE *__stream)
PAPI_mh_info_t mem_hierarchy
PAPI_mh_level_t level[PAPI_MAX_MEM_HIERARCHY_LEVELS]
PAPI_mh_cache_info_t cache[PAPI_MH_MAX_LEVELS]
long long icache_line_size[_MAX_SUPPORTED_CACHE_LEVELS]
int icache_assoc[_MAX_SUPPORTED_CACHE_LEVELS]
int split[_MAX_SUPPORTED_CACHE_LEVELS]
int pts_per_reg[_MAX_SUPPORTED_CACHE_LEVELS]
long long dcache_line_size[_MAX_SUPPORTED_CACHE_LEVELS]
long long dcache_size[_MAX_SUPPORTED_CACHE_LEVELS]
long long icache_size[_MAX_SUPPORTED_CACHE_LEVELS]
int dcache_assoc[_MAX_SUPPORTED_CACHE_LEVELS]
void vec_driver(char *papi_event_name, hw_desc_t *hw_desc, char *outdir)