PAPI 7.1.0.0
Loading...
Searching...
No Matches
main.c
Go to the documentation of this file.
1#include <stdlib.h>
2#include <string.h>
3#include <strings.h>
4#include <inttypes.h>
5#include <unistd.h>
6#include <fcntl.h>
7#include <unistd.h>
8
9#include "papi.h"
10#include "driver.h"
11
12#if defined(USE_MPI)
13#include <mpi.h>
14#endif
15
16int main(int argc, char*argv[])
17{
18 int cmbtotal = 0, ct = 0, track = 0, ret = 0;
19 int i, nevts = 0, status;
20 int *cards = NULL, *indexmemo = NULL;
21 char **allevts = NULL, **basenames = NULL;
22 evstock *data = NULL;
23 cat_params_t params = {-1,0,1,0,0,0,NULL,NULL,NULL};
24 int nprocs = 1, myid = 0;
25
26#if defined(USE_MPI)
27 MPI_Init(&argc, &argv);
28 MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
29 MPI_Comm_rank(MPI_COMM_WORLD, &myid);
30#endif
31
32 // Initialize PAPI.
34 if(ret != PAPI_VER_CURRENT){
35
36 fprintf(stderr,"PAPI shared library version error: %s Exiting...\n", PAPI_strerror(ret));
37 return 0;
38 }
39
40 // Initialize PAPI thread support.
42 if( ret != PAPI_OK ) {
43
44 fprintf(stderr,"PAPI thread init error: %s Exiting...\n", PAPI_strerror(ret));
45 return 0;
46 }
47
48 // Parse the command-line arguments.
49 status = parseArgs(argc, argv, &params);
50 if(0 != status)
51 {
52 free(params.outputdir);
54 return 0;
55 }
56
57 // Allocate space for the native events and qualifiers.
58 data = (evstock*)calloc(1,sizeof(evstock));
59 if(NULL == data)
60 {
61 free(params.outputdir);
62 fprintf(stderr, "Could not initialize event stock. Exiting...\n");
64 return 0;
65 }
66
67 // Read the list of base event names and maximum qualifier set cardinalities.
68 if( READ_FROM_FILE == params.mode)
69 {
70 ct = setup_evts(params.inputfile, &basenames, &cards);
71 if(ct == -1)
72 {
73 free(params.outputdir);
74 remove_stock(data);
76 return 0;
77 }
78 }
79
80 // Populate the event stock.
81 status = build_stock(data);
82 if(status)
83 {
84 free(params.outputdir);
85 remove_stock(data);
86 if(READ_FROM_FILE == params.mode)
87 {
88 for(i = 0; i < ct; ++i)
89 {
90 free(basenames[i]);
91 }
92 free(basenames);
93 free(cards);
94 }
95 fprintf(stderr, "Could not populate event stock. Exiting...\n");
97 return 0;
98 }
99
100 // Get the number of events contained in the event stock.
101 nevts = num_evts(data);
102
103 // Verify the validity of the cardinalities.
104 cmbtotal = check_cards(params, &indexmemo, basenames, cards, ct, nevts, data);
105 if(-1 == cmbtotal)
106 {
107 free(params.outputdir);
108 remove_stock(data);
109 if(READ_FROM_FILE == params.mode)
110 {
111 for(i = 0; i < ct; ++i)
112 {
113 free(basenames[i]);
114 }
115 free(basenames);
116 free(cards);
117 }
119 return 0;
120 }
121
122 // Allocate enough space for all of the event+qualifier combinations.
123 if (NULL == (allevts = (char**)malloc(cmbtotal*sizeof(char*)))) {
124 fprintf(stderr, "Failed to allocate memory.\n");
126 return 0;
127 }
128
129 // Create the qualifier combinations for each event.
130 trav_evts(data, params.subsetsize, cards, nevts, ct, params.mode, allevts, &track, indexmemo, basenames);
131
132 char *conf_file_name = ".cat_cfg";
133 if( NULL != params.conf_file ) {
134 conf_file_name = params.conf_file;
135 }
136 hw_desc_t *hw_desc = obtain_hardware_description(conf_file_name);
137
138 /* Set the default number of threads to the OMP_NUM_THREADS environment
139 * variable if it is defined. Otherwise, set it to the number of CPUs
140 * in a single socket. */
141 int numSetThreads = 1;
142 char* envVarDefined = getenv("OMP_NUM_THREADS");
143 if (NULL == envVarDefined) {
144 omp_set_num_threads(hw_desc->numcpus);
145
146 #pragma omp parallel default(shared)
147 {
148 if(!omp_get_thread_num()) {
149 numSetThreads = omp_get_num_threads();
150 }
151 }
152
153 if (numSetThreads != hw_desc->numcpus) {
154 fprintf(stderr, "Warning! Failed to set default number of threads to number of CPUs in a single socket.\n");
155 }
156 }
157
158 // Run the benchmark for each qualifier combination.
159 testbench(allevts, cmbtotal, hw_desc, params, myid, nprocs);
160
161 // Free dynamically allocated memory.
162 free(params.outputdir);
163 remove_stock(data);
164 if(READ_FROM_FILE == params.mode)
165 {
166 for(i = 0; i < ct; ++i)
167 {
168 free(basenames[i]);
169 }
170 free(basenames);
171 free(cards);
172 free(indexmemo);
173 }
174 for(i = 0; i < cmbtotal; ++i)
175 {
176 free(allevts[i]);
177 }
178 free(allevts);
179 free(hw_desc);
180
182
183#if defined(USE_MPI)
184 MPI_Barrier(MPI_COMM_WORLD);
185 MPI_Finalize();
186#endif
187
188 return 0;
189}
190
191unsigned long int omp_get_thread_num_wrapper() {
192 return omp_get_thread_num();
193}
194
195// Verify that valid qualifier counts are provided and count their combinations.
196int check_cards(cat_params_t params, int** indexmemo, char** basenames, int* cards, int ct, int nevts, evstock* data)
197{
198 int i, j, minim, n, cmbtotal = 0;
199 char *name;
200 int mode = params.mode;
201 int pk = params.subsetsize;
202
203 // User provided a file of events.
204 if(READ_FROM_FILE == mode)
205 {
206 // Compute the total number of qualifier combinations and allocate memory to store them.
207 if (NULL == ((*indexmemo) = (int*)malloc(ct*sizeof(int)))) {
208 fprintf(stderr, "Failed to allocate memory.\n");
209 return 0;
210 }
211
212 // Find the index in the main stock whose event corresponds to that in the file provided.
213 // This simplifies looking up event qualifiers later.
214 for(i = 0; i < ct; ++i)
215 {
216 if(NULL == basenames[i])
217 {
218 (*indexmemo)[i] = -1;
219 cmbtotal -= 1;
220 continue;
221 }
222
223 // j is the index of the event name provided by the user.
224 for(j = 0; j < nevts; ++j)
225 {
226 name = evt_name(data, j);
227 if(strcmp(basenames[i], name) == 0)
228 {
229 break;
230 }
231 }
232
233 // If the event name provided by the user does not match any of the main event
234 // names in the architecture, then it either contains qualifiers or it does not
235 // exist.
236 if(cards[i] != 0 && j == nevts)
237 {
238 fprintf(stderr, "The provided event '%s' is either not in the architecture or contains qualifiers.\n" \
239 "If the latter, use '0' in place of the provided '%d'.\n", basenames[i], cards[i]);
240 cards[i] = 0;
241 }
242
243 // If an invalid (negative) qualifier count was given, use zero qualifiers.
244 if(cards[i] < 0)
245 {
246 fprintf(stderr, "The qualifier count (provided for event '%s') cannot be negative.\n", basenames[i]);
247 cards[i] = 0;
248 }
249
250 (*indexmemo)[i] = j;
251 }
252
253 // Count the total number of events to test.
254 for(i = 0; i < ct; ++i)
255 {
256 // If no qualifiers are used, then just count the event itself.
257 if(cards[i] <= 0)
258 {
259 cmbtotal += 1;
260 continue;
261 }
262
263 // Get the number of qualifiers which belong to the main event.
264 if((*indexmemo)[i] != -1)
265 {
266 n = num_quals(data, (*indexmemo)[i]);
267 }
268 else
269 {
270 n = 0;
271 }
272
273 // If the user specifies to use more qualifiers than are available
274 // for the main event, do not use any qualifiers. Otherwise, count
275 // the number of combinations of qualifiers for the main event.
276 minim = cards[i];
277 if(cards[i] > n || cards[i] < 0)
278 {
279 minim = 0;
280 }
281 cmbtotal += comb(n, minim);
282 }
283 }
284 // User wants to inspect all events in the architecture.
285 else
286 {
287 for(i = 0; i < nevts; ++i)
288 {
289 // Get the number of qualifiers which belong to the main event.
290 n = num_quals(data, i);
291
292 // If the user specifies to use more qualifiers than are available
293 // for the main event, do not use any qualifiers. Otherwise, count
294 // the number of combinations of qualifiers for the main event.
295 minim = pk;
296 if(pk > n || pk < 0)
297 {
298 minim = 0;
299 }
300 cmbtotal += comb(n, minim);
301 }
302 }
303
304 return cmbtotal;
305}
306
307static hw_desc_t *obtain_hardware_description(char *conf_file_name){
308 int i,j;
309 hw_desc_t *hw_desc;
311 const PAPI_hw_info_t *meminfo;
312
313 // Allocate some space.
314 hw_desc = (hw_desc_t *)calloc(1, sizeof(hw_desc_t));
315
316 // Set at least the L1 cache size to a default value.
317 hw_desc->dcache_line_size[0] = 64;
318
319 // Set other default values.
320 for( i=0; i<_MAX_SUPPORTED_CACHE_LEVELS; ++i ) {
321 hw_desc->split[i] = 1;
322 hw_desc->pts_per_reg[i] = 3;
323 }
324 hw_desc->mmsplit = 1;
325 hw_desc->pts_per_mm = 3;
326 hw_desc->maxPPB = 512;
327
328 // Obtain hardware values through PAPI_get_hardware_info().
329 meminfo = PAPI_get_hardware_info();
330 if( NULL != meminfo ) {
331 hw_desc->numcpus = meminfo->ncpu;
332 hw_desc->cache_levels = meminfo->mem_hierarchy.levels;
333 L = ( PAPI_mh_level_t * ) & ( meminfo->mem_hierarchy.level[0] );
334 for ( i = 0; i < meminfo->mem_hierarchy.levels && i<_MAX_SUPPORTED_CACHE_LEVELS; i++ ) {
335 for ( j = 0; j < 2; j++ ) {
336 if ( (PAPI_MH_TYPE_DATA == PAPI_MH_CACHE_TYPE(L[i].cache[j].type)) ||
337 (PAPI_MH_TYPE_UNIFIED == PAPI_MH_CACHE_TYPE(L[i].cache[j].type)) ){
338 hw_desc->dcache_line_size[i] = L[i].cache[j].line_size;
339 hw_desc->dcache_size[i] = L[i].cache[j].size;
340 hw_desc->dcache_assoc[i] = L[i].cache[j].associativity;
341 }
342 if ( (PAPI_MH_TYPE_INST == PAPI_MH_CACHE_TYPE(L[i].cache[j].type)) ||
343 (PAPI_MH_TYPE_UNIFIED == PAPI_MH_CACHE_TYPE(L[i].cache[j].type)) ){
344 hw_desc->icache_line_size[i] = L[i].cache[j].line_size;
345 hw_desc->icache_size[i] = L[i].cache[j].size;
346 hw_desc->icache_assoc[i] = L[i].cache[j].associativity;
347 }
348 }
349 }
350 }
351
352 // Read the config file, if there, in case the user wants to overwrite some values.
353 read_conf_file(conf_file_name, hw_desc);
354 return hw_desc;
355}
356
357
358
359static int parse_line(FILE *input, char **key, long long *value){
360 int status;
361 size_t linelen=0, len;
362 char *line=NULL;
363 char *pos=NULL;
364
365 // Read one line from the input file.
366 int ret_val = (int)getline(&line, &linelen, input);
367 if( ret_val < 0 )
368 return ret_val;
369
370 // Kill the part of the line after the comment character '#'.
371 pos = strchr(line, '#');
372 if( NULL != pos ){
373 *pos = '\0';
374 }
375
376 // Make sure the line is an assignment.
377 pos = strchr(line, '=');
378 if( NULL == pos ){
379 goto handle_error;
380 }
381
382 len = strcspn(line, " =");
383 *key = (char *)calloc((1+len),sizeof(char));
384 strncpy(*key, line, len);
385
386 // Scan the line to make sure it has the form "key = value"
387 status = sscanf(pos, "= %lld", value);
388 if(1 != status){
389 fprintf(stderr,"Malformed line in conf file: '%s'\n", line);
390 goto handle_error;
391 }
392
393 return 0;
394
395handle_error:
396 free(line);
397 key = NULL;
398 *value = 0;
399 line = NULL;
400 linelen = 0;
401 return 1;
402}
403
404
405static void read_conf_file(char *conf_file_name, hw_desc_t *hw_desc){
406 FILE *input;
407
408 // Try to open the file.
409 input = fopen(conf_file_name, "r");
410 if (NULL == input ){
411 return;
412 }
413
414 while(1){
415 long long value;
416 char *key=NULL;
417
418 int ret_val = parse_line(input, &key, &value);
419 if( ret_val < 0 ){
420 free(key);
421 break;
422 }else if( ret_val > 0 ){
423 continue;
424 }
425
426 // If the user has set "AUTO_DISCOVERY_MODE = 1" then we don't need to process this file.
427 // Otherwise, any entry in this file should overwrite what we auto discovered.
428 if( !strcmp(key, "AUTO_DISCOVERY_MODE") && (value == 1) ){
429 return;
430 // Data caches (including unified caches)
431 }else if( !strcmp(key, "L1_DCACHE_LINE_SIZE") || !strcmp(key, "L1_UCACHE_LINE_SIZE") ){
432 hw_desc->dcache_line_size[0] = value;
433 }else if( !strcmp(key, "L2_DCACHE_LINE_SIZE") || !strcmp(key, "L2_UCACHE_LINE_SIZE") ){
434 hw_desc->dcache_line_size[1] = value;
435 }else if( !strcmp(key, "L3_DCACHE_LINE_SIZE") || !strcmp(key, "L3_UCACHE_LINE_SIZE") ){
436 hw_desc->dcache_line_size[2] = value;
437 }else if( !strcmp(key, "L4_DCACHE_LINE_SIZE") || !strcmp(key, "L4_UCACHE_LINE_SIZE") ){
438 hw_desc->dcache_line_size[3] = value;
439 }else if( !strcmp(key, "L1_DCACHE_SIZE") || !strcmp(key, "L1_UCACHE_SIZE") ){
440 if( hw_desc->cache_levels < 1 ) hw_desc->cache_levels = 1;
441 hw_desc->dcache_size[0] = value;
442 }else if( !strcmp(key, "L2_DCACHE_SIZE") || !strcmp(key, "L2_UCACHE_SIZE") ){
443 if( hw_desc->cache_levels < 2 ) hw_desc->cache_levels = 2;
444 hw_desc->dcache_size[1] = value;
445 }else if( !strcmp(key, "L3_DCACHE_SIZE") || !strcmp(key, "L3_UCACHE_SIZE") ){
446 if( hw_desc->cache_levels < 3 ) hw_desc->cache_levels = 3;
447 hw_desc->dcache_size[2] = value;
448 }else if( !strcmp(key, "L4_DCACHE_SIZE") || !strcmp(key, "L4_UCACHE_SIZE") ){
449 if( hw_desc->cache_levels < 4 ) hw_desc->cache_levels = 4;
450 hw_desc->dcache_size[3] = value;
451 // Instruction caches (including unified caches)
452 }else if( !strcmp(key, "L1_ICACHE_LINE_SIZE") || !strcmp(key, "L1_UCACHE_LINE_SIZE") ){
453 hw_desc->icache_line_size[0] = value;
454 }else if( !strcmp(key, "L2_ICACHE_LINE_SIZE") || !strcmp(key, "L2_UCACHE_LINE_SIZE") ){
455 hw_desc->icache_line_size[1] = value;
456 }else if( !strcmp(key, "L3_ICACHE_LINE_SIZE") || !strcmp(key, "L3_UCACHE_LINE_SIZE") ){
457 hw_desc->icache_line_size[2] = value;
458 }else if( !strcmp(key, "L4_ICACHE_LINE_SIZE") || !strcmp(key, "L4_UCACHE_LINE_SIZE") ){
459 hw_desc->icache_line_size[3] = value;
460 }else if( !strcmp(key, "L1_ICACHE_SIZE") || !strcmp(key, "L1_UCACHE_SIZE") ){
461 hw_desc->icache_size[0] = value;
462 }else if( !strcmp(key, "L2_ICACHE_SIZE") || !strcmp(key, "L2_UCACHE_SIZE") ){
463 hw_desc->icache_size[1] = value;
464 }else if( !strcmp(key, "L3_ICACHE_SIZE") || !strcmp(key, "L3_UCACHE_SIZE") ){
465 hw_desc->icache_size[2] = value;
466 }else if( !strcmp(key, "L4_ICACHE_SIZE") || !strcmp(key, "L4_UCACHE_SIZE") ){
467 hw_desc->icache_size[3] = value;
468 }else if( !strcmp(key, "L1_SPLIT") ){
469 hw_desc->split[0] = value;
470 }else if( !strcmp(key, "L2_SPLIT") ){
471 hw_desc->split[1] = value;
472 }else if( !strcmp(key, "L3_SPLIT") ){
473 hw_desc->split[2] = value;
474 }else if( !strcmp(key, "L4_SPLIT") ){
475 hw_desc->split[3] = value;
476 }else if( !strcmp(key, "MM_SPLIT") ){
477 hw_desc->mmsplit = value;
478 }else if( !strcmp(key, "PTS_PER_L1") ){
479 hw_desc->pts_per_reg[0] = value;
480 }else if( !strcmp(key, "PTS_PER_L2") ){
481 hw_desc->pts_per_reg[1] = value;
482 }else if( !strcmp(key, "PTS_PER_L3") ){
483 hw_desc->pts_per_reg[2] = value;
484 }else if( !strcmp(key, "PTS_PER_L4") ){
485 hw_desc->pts_per_reg[3] = value;
486 }else if( !strcmp(key, "PTS_PER_MM") ){
487 hw_desc->pts_per_mm = value;
488 }else if( !strcmp(key, "MAX_PPB") ){
489 hw_desc->maxPPB = value;
490 }
491
492 free(key);
493 key = NULL;
494 }
495 fclose(input);
496 return;
497}
498
499// Read the contents of the file supplied by the user.
500int setup_evts(char* inputfile, char*** basenames, int** evnt_cards)
501{
502 size_t linelen = 0;
503 int cnt = 0, status = 0;
504 char *line = NULL, *place;
505 FILE *input;
506 int evnt_count = 256;
507
508 char **names = (char **)calloc(evnt_count, sizeof(char *));
509 int *cards = (int *)calloc(evnt_count, sizeof(int));
510
511 if (NULL == names || NULL == cards) {
512 fprintf(stderr, "Failed to allocate memory.\n");
513 return 0;
514 }
515
516 // Read the base event name and cardinality columns.
517 input = fopen(inputfile, "r");
518 for(cnt=0; 1; cnt++)
519 {
520 ssize_t ret_val = getline(&line, &linelen, input);
521 if( ret_val < 0 )
522 break;
523 if( cnt >= evnt_count )
524 {
525 evnt_count *= 2;
526 names = realloc(names, evnt_count*sizeof(char *));
527 cards = realloc(cards, evnt_count*sizeof(int));
528
529 if (NULL == names || NULL == cards) {
530 fprintf(stderr, "Failed to allocate memory.\n");
531 return 0;
532 }
533 }
534
535 place = strstr(line, " ");
536
537 // If this line was commented, silently ignore it.
538 if(strlen(line) > 0 && line[0] == '#') {
539 names[cnt] = NULL;
540 cards[cnt] = -1;
541 cnt--;
542
543 free(line);
544 line = NULL;
545 linelen = 0;
546 continue;
547 } else if( NULL == place ) {
548 fprintf(stderr,"problem with line: '%s'\n",line);
549 names[cnt] = NULL;
550 cards[cnt] = -1;
551 cnt--;
552
553 free(line);
554 line = NULL;
555 linelen = 0;
556 continue;
557 }
558
559 names[cnt] = NULL;
560 status = sscanf(line, "%ms %d", &(names[cnt]), &(cards[cnt]) );
561
562 // If this line was malformed, ignore it.
563 if(2 != status)
564 {
565 fprintf(stderr,"problem with line: '%s'\n",line);
566 names[cnt] = NULL;
567 cards[cnt] = -1;
568 cnt--;
569 }
570
571 free(line);
572 line = NULL;
573 linelen = 0;
574 }
575 free(line);
576 fclose(input);
577
578 *basenames = names;
579 *evnt_cards = cards;
580
581 return cnt;
582}
583
584// Recursively builds the list of all combinations of an event's qualifiers.
585void combine_qualifiers(int n, int pk, int ct, char** list, char* name, char** allevts, int* track, int flag, int* bitmap)
586{
587 int original;
588 int counter;
589 int i;
590
591 // Set flag in the array.
592 original = bitmap[ct];
593 bitmap[ct] = flag;
594
595 // Only make recursive calls if there are more items.
596 // Ensure proper cardinality.
597 counter = 0;
598 for(i = 0; i < n; ++i)
599 {
600 counter += bitmap[i];
601 }
602
603 // Cannot use more qualifiers than are available.
604 if(ct+1 < n)
605 {
606 // Make recursive calls both with and without a given qualifier.
607 // Recursion cannot exceed the number of qualifiers specified by
608 // the user.
609 if(counter < pk)
610 {
611 combine_qualifiers(n, pk, ct+1, list, name, allevts, track, 1, bitmap);
612 }
613 combine_qualifiers(n, pk, ct+1, list, name, allevts, track, 0, bitmap);
614 }
615 // Qualifier count matches that specified by the user.
616 else
617 {
618 if(counter == pk)
619 {
620 // Construct the qualifier combination string.
621 char* chunk;
622 size_t evtsize = strlen(name)+1;
623 for(i = 0; i < n; ++i)
624 {
625 if(bitmap[i] == 1)
626 {
627 // Add one to account for the colon in front of the qualifier.
628 evtsize += strlen(list[i])+1;
629 }
630 }
631
632 if (NULL == (chunk = (char*)malloc((evtsize+1)*sizeof(char)))) {
633 fprintf(stderr, "Failed to allocate memory.\n");
634 return;
635 }
636
637 strcpy(chunk,name);
638 for(i = 0; i < n; ++i)
639 {
640 if(bitmap[i] == 1)
641 {
642 strcat(chunk,":");
643 strcat(chunk,list[i]);
644 }
645 }
646
647 // Add qualifier combination string to the list.
648 allevts[*track] = strdup(chunk);
649 *track += 1;
650
651 free(chunk);
652 }
653 }
654
655 // Undo effect of recursive call to combine other qualifiers.
656 bitmap[ct] = original;
657
658 return;
659}
660
661// Create the combinations of qualifiers for the events.
662void trav_evts(evstock* stock, int pk, int* cards, int nevts, int selexnsize, int mode, char** allevts, int* track, int* indexmemo, char** basenames)
663{
664 int i, j, k, n = 0;
665 char** chosen = NULL;
666 char* name = NULL;
667 int* bitmap = NULL;
668
669 // User provided a file of events.
670 if(READ_FROM_FILE == mode)
671 {
672 for(i = 0; i < selexnsize; ++i)
673 {
674 // Iterate through whole stock. If there are matches, proceed normally using the given cardinalities.
675 j = indexmemo[i];
676 if( -1 == j )
677 {
678 allevts[i] = NULL;
679 continue;
680 }
681
682 // Get event's name and qualifier count.
683 if(j == nevts)
684 {
685 // User a provided specific qualifier combination.
686 name = basenames[i];
687 }
688 else
689 {
690 name = evt_name(stock, j);
691 n = num_quals(stock, j);
692 }
693
694 // Create a list to contain the qualifiers.
695 if(cards[i] > 0)
696 {
697 chosen = (char**)malloc(n*sizeof(char*));
698 bitmap = (int*)calloc(n, sizeof(int));
699
700 if (NULL == chosen || NULL == bitmap) {
701 fprintf(stderr, "Failed to allocate memory.\n");
702 return;
703 }
704
705 // Store the qualifiers for the current event.
706 for(k = 0; k < n; ++k)
707 {
708 chosen[k] = strdup(stock->evts[j][k]);
709 }
710 }
711
712 // Get combinations of all current event's qualifiers.
713 if (n!=0 && cards[i]>0)
714 {
715 combine_qualifiers(n, cards[i], 0, chosen, name, allevts, track, 0, bitmap);
716 combine_qualifiers(n, cards[i], 0, chosen, name, allevts, track, 1, bitmap);
717 }
718 else
719 {
720 allevts[*track] = strdup(name);
721 *track += 1;
722 }
723
724 // Free the space back up.
725 if(cards[i] > 0)
726 {
727 for(k = 0; k < n; ++k)
728 {
729 free(chosen[k]);
730 }
731 free(chosen);
732 free(bitmap);
733 }
734 }
735 }
736 // User wants to inspect all events in the architecture.
737 else
738 {
739 for(i = 0; i < nevts; ++i)
740 {
741 // Get event's name and qualifier count.
742 n = num_quals(stock, i);
743 name = evt_name(stock, i);
744
745 // Show progress to the user.
746 //fprintf(stderr, "CURRENT EVENT: %s (%d/%d)\n", name, (i+1), nevts);
747
748 // Create a list to contain the qualifiers.
749 chosen = (char**)malloc(n*sizeof(char*));
750 bitmap = (int*)calloc(n, sizeof(int));
751
752 if (NULL == chosen || NULL == bitmap) {
753 fprintf(stderr, "Failed to allocate memory.\n");
754 return;
755 }
756
757 // Store the qualifiers for the current event.
758 for(j = 0; j < n; ++j)
759 {
760 chosen[j] = strdup(stock->evts[i][j]);
761 }
762
763 // Get combinations of all current event's qualifiers.
764 if (n!=0)
765 {
766 combine_qualifiers(n, pk, 0, chosen, name, allevts, track, 0, bitmap);
767 combine_qualifiers(n, pk, 0, chosen, name, allevts, track, 1, bitmap);
768 }
769 else
770 {
771 allevts[*track] = strdup(name);
772 *track += 1;
773 }
774
775 // Free the space back up.
776 for(j = 0; j < n; ++j)
777 {
778 free(chosen[j]);
779 }
780 free(chosen);
781 free(bitmap);
782 }
783 }
784
785 return;
786}
787
788// Compute the permutations of k objects from a set of n objects.
789int perm(int n, int k)
790{
791 int i;
792 int prod = 1;
793 int diff = n-k;
794
795 for(i = n; i > diff; --i)
796 {
797 prod *= i;
798 }
799
800 return prod;
801}
802
803// Compute the combinations of k objects from a set of n objects.
804int comb(int n, int k)
805{
806 return perm(n, k)/perm(k, k);
807}
808
809static void print_progress(int prg)
810{
811 if(prg < 100)
812 printf("%3d%%\b\b\b\b",prg);
813 else
814 printf("%3d%%\n",prg);
815
816 fflush(stdout);
817}
818
819static void print_progress2(int prg)
820{
821 if(prg < 100)
822 printf("Total:%3d%% Current test: 0%%\b\b\b\b",prg);
823 else
824 printf("Total:%3d%%\n",prg);
825
826 fflush(stdout);
827}
828
829void testbench(char** allevts, int cmbtotal, hw_desc_t *hw_desc, cat_params_t params, int myid, int nprocs)
830{
831 int i;
832 int junk=((int)getpid()+123)/456;
833 int low = myid*(cmbtotal/nprocs);
834 int cap = (myid+1)*(cmbtotal/nprocs);
835 int offset = nprocs*(1+cmbtotal/nprocs)-cmbtotal;
836
837 // Divide the work as evenly as possible.
838 if(myid >= offset) {
839 cap += myid-offset+1;
840 low += myid-offset;
841 }
842
843 // Make sure the user provided events and iterate through all events.
844 if( 0 == cmbtotal )
845 {
846 fprintf(stderr, "No events to measure.\n");
847 return;
848 }
849
850 // Run the branch benchmark by default if none are specified.
851 if( 0 == params.bench_type )
852 {
853 params.bench_type |= BENCH_BRANCH;
854 fprintf(stderr, "Warning: No benchmark specified. Running 'branch' by default.\n");
855 }
856
857 /* Benchmark I - Branch*/
858 if( params.bench_type & BENCH_BRANCH )
859 {
860 if(params.show_progress) printf("Branch Benchmarks: ");
861
862 for(i = low; i < cap; ++i)
863 {
864 if(params.show_progress) print_progress((100*i)/cmbtotal);
865
866 if( allevts[i] != NULL )
867 branch_driver(allevts[i], junk, hw_desc, params.outputdir);
868 }
869 if(params.show_progress) print_progress(100);
870 }
871
872 /* Benchmark II - Data Cache Reads*/
873 if( params.bench_type & BENCH_DCACHE_READ )
874 {
875 if ( !params.quick && 0 == myid )
876 {
877 if(params.show_progress)
878 {
879 printf("D-Cache Latencies: 0%%\b\b\b\b");
880 fflush(stdout);
881 }
882 d_cache_driver("cat::latencies", params, hw_desc, 1, 0);
883 if(params.show_progress) printf("100%%\n");
884 }
885
886 if(params.show_progress) printf("D-Cache Read Benchmarks: ");
887 for(i = low; i < cap; ++i)
888 {
889 if(params.show_progress) print_progress2((100*i)/cmbtotal);
890
891 if( allevts[i] != NULL ) {
892 d_cache_driver(allevts[i], params, hw_desc, 0, 0);
893 }
894 }
895 if(params.show_progress) print_progress2(100);
896 }
897
898 /* Benchmark III - Data Cache Writes*/
899 if( params.bench_type & BENCH_DCACHE_WRITE )
900 {
901 // If the READ benchmark was run, do not recompute the latencies.
902 if ( !(params.bench_type & BENCH_DCACHE_READ) && !params.quick)
903 {
904 if(params.show_progress)
905 {
906 printf("D-Cache Latencies: 0%%\b\b\b\b");
907 fflush(stdout);
908 }
909 d_cache_driver("cat::latencies", params, hw_desc, 1, 0);
910 if(params.show_progress) printf("100%%\n");
911 }
912
913 if(params.show_progress) printf("D-Cache Write Benchmarks: ");
914 for(i = low; i < cap; ++i)
915 {
916 if(params.show_progress) print_progress2((100*i)/cmbtotal);
917
918 if( allevts[i] != NULL ) {
919 d_cache_driver(allevts[i], params, hw_desc, 0, 1);
920 }
921 }
922 if(params.show_progress) print_progress2(100);
923 }
924
925 /* Benchmark IV - FLOPS*/
926 if( params.bench_type & BENCH_FLOPS )
927 {
928 if(params.show_progress) printf("FLOP Benchmarks: ");
929
930 for(i = low; i < cap; ++i)
931 {
932 if(params.show_progress) print_progress((100*i)/cmbtotal);
933
934 if( allevts[i] != NULL )
935 flops_driver(allevts[i], hw_desc, params.outputdir);
936 }
937 if(params.show_progress) print_progress(100);
938 }
939
940 /* Benchmark V - Instruction Cache*/
941 if( params.bench_type & BENCH_ICACHE_READ )
942 {
943 if(params.show_progress) printf("I-Cache Benchmarks: ");
944
945 for(i = low; i < cap; ++i)
946 {
947 if(params.show_progress) print_progress2((100*i)/cmbtotal);
948
949 if( allevts[i] != NULL )
950 i_cache_driver(allevts[i], junk, hw_desc, params.outputdir, params.show_progress);
951 }
952 if(params.show_progress) print_progress2(100);
953 }
954
955 /* Benchmark VI - Vector FLOPS*/
956 if( params.bench_type & BENCH_VEC )
957 {
958 if(params.show_progress) printf("Vector FLOP Benchmarks: ");
959
960 for(i = low; i < cap; ++i)
961 {
962 if(params.show_progress) print_progress((100*i)/cmbtotal);
963
964 if( allevts[i] != NULL )
965 vec_driver(allevts[i], hw_desc, params.outputdir);
966 }
967 if(params.show_progress) print_progress(100);
968 }
969
970 /* Benchmark VII - Instructions*/
971 if( params.bench_type & BENCH_INSTR )
972 {
973 if(params.show_progress) printf("Instruction Benchmarks: ");
974
975 for(i = low; i < cap; ++i)
976 {
977 if(params.show_progress) print_progress((100*i)/cmbtotal);
978
979 if( allevts[i] != NULL )
980 instr_driver(allevts[i], hw_desc, params.outputdir);
981 }
982 if(params.show_progress) print_progress(100);
983 }
984
985 return;
986}
987
988int parseArgs(int argc, char **argv, cat_params_t *params){
989 char *name = argv[0];
990 char *tmp = NULL;
991 int dirlen = 0;
992 int kflag = 0;
993 int inflag = 0;
994 FILE *test = NULL;
995 int len, status = 0;
996
997 params->subsetsize = -1;
998
999 // Parse the command line arguments
1000 while(--argc){
1001 ++argv;
1002 if( !strcmp(argv[0],"-h") ){
1004 return -1;
1005 }
1006 if( argc > 1 && !strcmp(argv[0],"-k") ){
1007 params->subsetsize = atoi(argv[1]);
1008 if( params->subsetsize < 0 )
1009 {
1010 params->subsetsize = 0;
1011 fprintf(stderr, "Warning: Cannot pass a negative value to -k.\n");
1012 }
1013 params->mode = USE_ALL_EVENTS;
1014 kflag = 1;
1015 --argc;
1016 ++argv;
1017 continue;
1018 }
1019 if( argc > 1 && !strcmp(argv[0],"-n") ){
1020 params->max_iter = atoi(argv[1]);
1021 --argc;
1022 ++argv;
1023 continue;
1024 }
1025 if( argc > 1 && !strcmp(argv[0],"-conf") ){
1026 params->conf_file = argv[1];
1027 --argc;
1028 ++argv;
1029 continue;
1030 }
1031 if( argc > 1 && !strcmp(argv[0],"-in") ){
1032 params->inputfile = argv[1];
1033 params->mode = READ_FROM_FILE;
1034 inflag = 1;
1035 --argc;
1036 ++argv;
1037 continue;
1038 }
1039 if( argc > 1 && !strcmp(argv[0],"-out") ){
1040 tmp = argv[1];
1041 --argc;
1042 ++argv;
1043 continue;
1044 }
1045 if( !strcmp(argv[0],"-verbose") ){
1046 params->show_progress = 1;
1047 continue;
1048 }
1049 if( !strcmp(argv[0],"-quick") ){
1050 params->quick = 1;
1051 continue;
1052 }
1053 if( !strcmp(argv[0],"-branch") ){
1054 params->bench_type |= BENCH_BRANCH;
1055 continue;
1056 }
1057 if( !strcmp(argv[0],"-dcr") ){
1058 params->bench_type |= BENCH_DCACHE_READ;
1059 continue;
1060 }
1061 if( !strcmp(argv[0],"-dcw") ){
1062 params->bench_type |= BENCH_DCACHE_WRITE;
1063 continue;
1064 }
1065 if( !strcmp(argv[0],"-flops") ){
1066 params->bench_type |= BENCH_FLOPS;
1067 continue;
1068 }
1069 if( !strcmp(argv[0],"-ic") ){
1070 params->bench_type |= BENCH_ICACHE_READ;
1071 continue;
1072 }
1073 if( !strcmp(argv[0],"-vec") ){
1074 params->bench_type |= BENCH_VEC;
1075 continue;
1076 }
1077 if( !strcmp(argv[0],"-instr") ){
1078 params->bench_type |= BENCH_INSTR;
1079 continue;
1080 }
1081
1083 return -1;
1084 }
1085
1086 // MODE INFO: mode 1 uses file; mode 2 uses all native events.
1087 if(READ_FROM_FILE == params->mode)
1088 {
1089 test = fopen(params->inputfile, "r");
1090 if(test == NULL)
1091 {
1092 fprintf(stderr, "Could not open %s. Exiting...\n", params->inputfile);
1093 return -1;
1094 }
1095 fclose(test);
1096 }
1097
1098 // Make sure user does not specify both modes simultaneously.
1099 if(kflag == 1 && inflag == 1)
1100 {
1101 fprintf(stderr, "Cannot use -k flag with -in flag. Exiting...\n");
1102 return -1;
1103 }
1104
1105 // Make sure user specifies mode explicitly.
1106 if(kflag == 0 && inflag == 0)
1107 {
1109 return -1;
1110 }
1111
1112 // Make sure output path was provided.
1113 if(tmp == NULL)
1114 {
1115 fprintf(stderr, "Output path not provided. Exiting...\n");
1116 return -1;
1117 }
1118
1119 // Write output files in the user-specified directory.
1120 dirlen = strlen(tmp);
1121 params->outputdir = (char*)malloc((2+dirlen)*sizeof(char));
1122
1123 if (NULL == params->outputdir) {
1124 fprintf(stderr, "Failed to allocate memory.\n");
1125 return -1;
1126 }
1127
1128 len = snprintf( params->outputdir, 2+dirlen, "%s/", tmp);
1129 if( len < 1+dirlen )
1130 {
1131 fprintf(stderr, "Problem with output directory name.\n");
1132 return -1;
1133 }
1134
1135 // Make sure files can be written to the provided path.
1136 status = access(params->outputdir, W_OK);
1137 if(status != 0)
1138 {
1139 fprintf(stderr, "Permission to write files to \"%s\" denied. Make sure the path exists and is writable.\n", tmp);
1140 return -1;
1141 }
1142
1143 return 0;
1144}
1145
1146// Show the user how to properly use the program.
1148{
1149 fprintf(stdout, "\nUsage: %s [OPTIONS...]\n", name);
1150
1151 fprintf(stdout, "\nRequired:\n");
1152 fprintf(stdout, " -out <path> Output files location.\n");
1153 fprintf(stdout, " -in <file> Events and cardinalities file.\n");
1154 fprintf(stdout, " -k <value> Cardinality of subsets.\n");
1155 fprintf(stdout, " Parameters \"-k\" and \"-in\" are mutually exclusive.\n");
1156
1157 fprintf(stdout, "\nOptional:\n");
1158 fprintf(stdout, " -conf <path> Configuration file location.\n");
1159 fprintf(stdout, " -verbose Show benchmark progress in the standard output.\n");
1160 fprintf(stdout, " -quick Skip latency tests.\n");
1161 fprintf(stdout, " -n <value> Number of iterations for data cache kernels.\n");
1162 fprintf(stdout, " -branch Branch kernels.\n");
1163 fprintf(stdout, " -dcr Data cache reading kernels.\n");
1164 fprintf(stdout, " -dcw Data cache writing kernels.\n");
1165 fprintf(stdout, " -flops Floating point operations kernels.\n");
1166 fprintf(stdout, " -ic Instruction cache kernels.\n");
1167 fprintf(stdout, " -vec Vector FLOPs kernels.\n");
1168 fprintf(stdout, " -instr Instructions kernels.\n");
1169
1170 fprintf(stdout, "\n");
1171 fprintf(stdout, "EXAMPLE: %s -in event_list.txt -out OUTPUT_DIRECTORY -branch -dcw\n", name);
1172 fprintf(stdout, "\n");
1173
1174 return;
1175}
double tmp
int i
const char * names[NUM_EVENTS]
void branch_driver(char *papi_event_name, int junk, hw_desc_t *hw_desc, char *outdir)
Definition: branch.c:15
get information about the system hardware
initialize the PAPI library.
Finish using PAPI and free all related resources.
Returns a string describing the PAPI error code.
Initialize thread support in the PAPI library.
static pthread_t myid[NUM_THREADS]
void d_cache_driver(char *papi_event_name, cat_params_t params, hw_desc_t *hw_desc, int latency_only, int mode)
Definition: dcache.c:17
#define BENCH_DCACHE_WRITE
Definition: driver.h:17
#define READ_FROM_FILE
Definition: driver.h:12
#define BENCH_FLOPS
Definition: driver.h:14
#define BENCH_VEC
Definition: driver.h:19
#define BENCH_DCACHE_READ
Definition: driver.h:16
#define USE_ALL_EVENTS
Definition: driver.h:11
#define BENCH_BRANCH
Definition: driver.h:15
void print_usage()
#define BENCH_INSTR
Definition: driver.h:20
#define BENCH_ICACHE_READ
Definition: driver.h:18
int num_evts(evstock *stock)
Definition: eventstock.c:173
void remove_stock(evstock *stock)
Definition: eventstock.c:198
int build_stock(evstock *stock)
Definition: eventstock.c:13
int num_quals(evstock *stock, int base_evt)
Definition: eventstock.c:178
char * evt_name(evstock *stock, int index)
Definition: eventstock.c:193
void flops_driver(char *papi_event_name, hw_desc_t *hw_desc, char *outdir)
Definition: flops.c:841
#define PAPI_VER_CURRENT
Definition: f90papi.h:54
#define PAPI_OK
Definition: f90papi.h:73
#define _MAX_SUPPORTED_CACHE_LEVELS
Definition: hw_desc.h:4
void i_cache_driver(char *papi_event_name, int junk, hw_desc_t *hw_desc, char *outdir, int show_progress)
Definition: icache.c:13
void instr_driver(char *papi_event_name, hw_desc_t *hw_desc, char *outdir)
static pthread_key_t key
uint16_t type
void testbench(char **allevts, int cmbtotal, hw_desc_t *hw_desc, cat_params_t params, int myid, int nprocs)
Definition: main.c:829
int setup_evts(char *inputfile, char ***basenames, int **evnt_cards)
Definition: main.c:500
int perm(int n, int k)
Definition: main.c:789
static int parse_line(FILE *input, char **key, long long *value)
Definition: main.c:359
static void print_progress2(int prg)
Definition: main.c:819
void trav_evts(evstock *stock, int pk, int *cards, int nevts, int selexnsize, int mode, char **allevts, int *track, int *indexmemo, char **basenames)
Definition: main.c:662
static hw_desc_t * obtain_hardware_description(char *conf_file_name)
Definition: main.c:307
static void print_progress(int prg)
Definition: main.c:809
int check_cards(cat_params_t params, int **indexmemo, char **basenames, int *cards, int ct, int nevts, evstock *data)
Definition: main.c:196
void combine_qualifiers(int n, int pk, int ct, char **list, char *name, char **allevts, int *track, int flag, int *bitmap)
Definition: main.c:585
unsigned long int omp_get_thread_num_wrapper()
Definition: main.c:191
int comb(int n, int k)
Definition: main.c:804
int parseArgs(int argc, char **argv, cat_params_t *params)
Definition: main.c:988
static void read_conf_file(char *conf_file_name, hw_desc_t *hw_desc)
Definition: main.c:405
Return codes and api definitions.
#define PAPI_MH_TYPE_DATA
Definition: papi.h:720
#define PAPI_MH_CACHE_TYPE(a)
Definition: papi.h:724
#define PAPI_MH_TYPE_INST
Definition: papi.h:719
#define PAPI_MH_TYPE_UNIFIED
Definition: papi.h:723
__ssize_t ssize_t
FILE * stdout
FILE * stderr
int fclose(FILE *__stream)
int main()
Definition: pernode.c:20
const char * name
Definition: rocs.c:225
int
Definition: sde_internal.h:89
Hardware info structure.
Definition: papi.h:774
PAPI_mh_info_t mem_hierarchy
Definition: papi.h:793
int ncpu
Definition: papi.h:775
int levels
Definition: papi.h:768
PAPI_mh_level_t level[PAPI_MAX_MEM_HIERARCHY_LEVELS]
Definition: papi.h:769
PAPI_mh_cache_info_t cache[PAPI_MH_MAX_LEVELS]
Definition: papi.h:762
int quick
Definition: params.h:10
char * conf_file
Definition: params.h:11
char * inputfile
Definition: params.h:12
int show_progress
Definition: params.h:9
int subsetsize
Definition: params.h:5
int mode
Definition: params.h:6
int bench_type
Definition: params.h:8
int max_iter
Definition: params.h:7
char * outputdir
Definition: params.h:13
char *** evts
Definition: eventstock.h:10
int maxPPB
Definition: hw_desc.h:9
int numcpus
Definition: hw_desc.h:7
long long icache_line_size[_MAX_SUPPORTED_CACHE_LEVELS]
Definition: hw_desc.h:17
int icache_assoc[_MAX_SUPPORTED_CACHE_LEVELS]
Definition: hw_desc.h:19
int split[_MAX_SUPPORTED_CACHE_LEVELS]
Definition: hw_desc.h:12
int cache_levels
Definition: hw_desc.h:8
int pts_per_reg[_MAX_SUPPORTED_CACHE_LEVELS]
Definition: hw_desc.h:13
long long dcache_line_size[_MAX_SUPPORTED_CACHE_LEVELS]
Definition: hw_desc.h:14
long long dcache_size[_MAX_SUPPORTED_CACHE_LEVELS]
Definition: hw_desc.h:15
long long icache_size[_MAX_SUPPORTED_CACHE_LEVELS]
Definition: hw_desc.h:18
int dcache_assoc[_MAX_SUPPORTED_CACHE_LEVELS]
Definition: hw_desc.h:16
int pts_per_mm
Definition: hw_desc.h:11
int mmsplit
Definition: hw_desc.h:10
void vec_driver(char *papi_event_name, hw_desc_t *hw_desc, char *outdir)
Definition: vec.c:9