75 {
76
77
79
80
81
82 int retVal, r, code;
83 int ComponentID, NumComponents, IB_ID = -1;
85 int eventCount = 0;
86 int eventNum = 0;
87
88
89 int addEventFailCount = 0, codeConvertFailCount = 0, eventInfoFailCount = 0;
90 int PAPIstartFailCount = 0, PAPIstopFailCount = 0;
92 int failedEventIndex = 0;
93
94
98
99
101
104
105
106 long long startTime, endTime;
107 double elapsedTime;
108
109
112 test_fail(__FILE__, __LINE__,
"PAPI_library_init failed. The test has been terminated.\n",retVal);
113 }
114
115
117
118
119 for ( ComponentID = 0; ComponentID < NumComponents; ComponentID++ ) {
120
122 fprintf(
stderr,
"WARNING: PAPI_get_component_info failed on one of the components.\n"
123 "\t The test will continue for now, but it will be skipped later on\n"
124 "\t if this error was for a component under test.\n");
125 continue;
126 }
127
128 if (strcmp(cmpInfo->
name,
"infiniband") != 0) {
129 continue;
130 }
131
132
134 printf("INFO: Component %d (%d) - %d events - %s\n",
135 ComponentID, cmpInfo->
CmpIdx,
137 }
138
140 test_skip(__FILE__,__LINE__,
"Infiniband Component is disabled. The test has been terminated.\n", 0);
141 break;
142 }
143
145 IB_ID = ComponentID;
146 break;
147 }
148
149
150 if (eventCount==0) {
151 fprintf(
stderr,
"FATAL: No events found for the Infiniband component, even though it is enabled.\n"
152 " The test will be skipped.\n");
153 test_skip(__FILE__,__LINE__,
"No events found for the Infiniband component.\n", 0);
154 }
155
156
157
158 int NumProcs, Rank;
159
160
161 MPI_Init (&argc, &argv);
162 MPI_Comm_size (MPI_COMM_WORLD, &NumProcs);
163 MPI_Comm_rank (MPI_COMM_WORLD, &Rank);
164
166 printf("INFO: This test should trigger some network events.\n");
167 }
168
169
172
173 while (Nmax_per_Proc <= Nmin_per_Proc)
174 Nmax_per_Proc = Nmin_per_Proc*10;
175 int Nmax = Nmax_per_Proc * NumProcs;
176 int NstepSize = (Nmax_per_Proc - Nmin_per_Proc)/
NSTEPS;
177
179 int memoryAllocateFailure = 0, ALLmemoryAllocateFailure = 0;
180
181
182 double *X, *Y, *Out;
183 double *Xp, *Yp, *Outp;
184
185
186 if (Rank == 0) {
187 X = (double *) malloc (sizeof(double) * Nmax);
188 Y = (double *) malloc (sizeof(double) * Nmax);
189 Out = (double *) malloc (sizeof(double) * Nmax);
190
191
192
193 if ( (X == NULL) || (Y == NULL) || (Out == NULL) ) {
194 fprintf(
stderr,
"FATAL: Failed to allocate memory on Master Node.\n");
195 memoryAllocateFailure = 1;
196 }
197
198 if (memoryAllocateFailure == 0) {
199
201 printf("INFO: Master is initializing data.\n");
202
203 for (
i = 0;
i < Nmax;
i++ ) {
206 }
207
209 printf("INFO: Master has successfully initialized arrays.\n");
210
211 }
212 }
213
214
215 MPI_Bcast (&memoryAllocateFailure, 1, MPI_INT, 0, MPI_COMM_WORLD);
216 if (memoryAllocateFailure == 1)
217 test_fail(__FILE__,__LINE__,
"Could not allocate memory during the test. This is fatal and the test has been terminated.\n", 0);
218
219 memoryAllocateFailure = 0;
220
221
222 Xp = (double *) malloc (sizeof(double) * Nmax_per_Proc);
223 Yp = (double *) malloc (sizeof(double) * Nmax_per_Proc);
224 Outp = (double *) malloc (sizeof(double) * Nmax_per_Proc);
225
226
227 if ( (Xp == NULL) || (Yp == NULL) || (Outp == NULL) ) {
228 fprintf(
stderr,
"FATAL: Failed to allocate %zu bytes on Rank %d.\n",
sizeof(
double)*Nmax_per_Proc, Rank);
229 memoryAllocateFailure = 1;
230 }
231 MPI_Allreduce (&memoryAllocateFailure, &ALLmemoryAllocateFailure, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
232 if (ALLmemoryAllocateFailure > 0)
233 test_fail(__FILE__,__LINE__,
"Could not allocate memory during the test. This is fatal and the test has been terminated.\n", 0);
234
235
236 int Nstep_per_Proc;
239 Nstep_per_Proc = Nmin_per_Proc + (
i * NstepSize);
240
241 if ((
i == (
NSTEPS - 1)) || (Nstep_per_Proc > Nmax_per_Proc))
242 Nstep_per_Proc = Nmax_per_Proc;
243 DataSizes[
i] = Nstep_per_Proc;
244 }
245
246
247
249
250
253
254
255
256
257 fprintf(
stderr,
"FATAL: Could not create an eventSet on MPI Rank %d due to: %s.\n"
259 test_fail(__FILE__, __LINE__,
"PAPI_create_eventset failed. This is fatal and the test has been terminated.\n", retVal);
260 }
261
262
265
266
267
269
270
273
275 fprintf(
stderr,
"FATAL: Could not add an event to eventSet on MPI Rank %d due to insufficient memory.\n"
276 " Test will not proceed.\n", Rank);
277 test_fail(__FILE__, __LINE__,
"PAPI_add_event failed due to fatal error and the test has been terminated.\n", retVal);
278 }
279
281 fprintf(
stderr,
"WARNING: Could not add an event to eventSet on MPI Rank %d since eventSet does not exist.\n"
282 "\t Test will proceed attempting to create a new eventSet\n", Rank);
286 test_fail(__FILE__, __LINE__,
"PAPI_create_eventset failed while handling failure of PAPI_add_event."
287 " This is fatal and the test has been terminated.\n", retVal);
288 continue;
289 }
290
292 long long tempValue;
293 fprintf(
stderr,
"WARNING: Could not add an event to eventSet on MPI Rank %d since eventSet is already counting.\n"
294 "\t Test will proceed attempting to stop counting and re-attempting to add current event.\n", Rank);
297 test_fail(__FILE__,__LINE__,
"PAPI_stop failed while handling failure of PAPI_add_event."
298 " This is fatal and the test has been terminated.\n", retVal);
301 test_fail(__FILE__,__LINE__,
"PAPI_cleanup_eventset failed while handling failure of PAPI_add_event."
302 " This is fatal and the test has been terminated.\n", retVal);
303 continue;
304 }
305
306
307 addEventFailCount++;
308 failedEventCodes[failedEventIndex] = code;
309 failedEventIndex++;
310 fprintf(
stderr,
"WARNING: Could not add an event to eventSet on MPI Rank %d due to: %s.\n"
311 "\t Test will proceed attempting to add other events.\n", Rank,
PAPI_strerror(retVal));
312
314
315 if (addEventFailCount >= eventCount)
316 break;
317
318 continue;
319 }
320
321
324
325 codeConvertFailCount++;
326 fprintf(
stderr,
"WARNING: PAPI_event_code_to_name failed due to: %s.\n"
327 "\t Test will proceed but an event name will not be available.\n",
PAPI_strerror(retVal));
328 strncpy(eventNames[eventNum], "ERROR:NOT_AVAILABLE", sizeof(eventNames[0])-1);
329 eventNames[eventNum][sizeof(eventNames[0])-1] = '\0';
330 }
331
332
335
336 eventInfoFailCount++;
337 fprintf(
stderr,
"WARNING: PAPI_get_event_info failed due to: %s.\n"
338 "\t Test will proceed but an event description will not be available.\n",
PAPI_strerror(retVal));
339 strncpy(description[eventNum], "ERROR:NOT_AVAILABLE", sizeof(description[0])-1);
340 description[eventNum][sizeof(description[0])-1] = '\0';
341 } else {
342 strncpy(description[eventNum], eventInfo.
long_descr,
sizeof(description[0])-1);
343 description[eventNum][sizeof(description[0])-1] = '\0';
344 }
345
346
347
349
350
353
354
355
356
357 PAPIstartFailCount++;
358 failedEventCodes[failedEventIndex] = code;
359 failedEventIndex++;
360 fprintf(
stderr,
"WARNING: PAPI_start failed on Event Number %d (%s) due to: %s.\n"
361 "\t Test will proceed with other events if available.\n",
363
365 values[k][eventNum] = (
unsigned long long) - 1;
366
367 break;
368 }
369
371 printf("INFO: Doing MPI communication for %s: min. %ld bytes transferred by each process.\n",
372 eventNames[eventNum], DataSizes[
i]*
sizeof(
double));
373
374 MPI_Scatter (X, DataSizes[
i], MPI_DOUBLE, Xp, DataSizes[
i], MPI_DOUBLE, 0, MPI_COMM_WORLD);
375 MPI_Scatter (Y, DataSizes[
i], MPI_DOUBLE, Yp, DataSizes[
i], MPI_DOUBLE, 0, MPI_COMM_WORLD);
376
377
378
379 for (j = 0; j < DataSizes[
i]; j++)
380 Outp [j] = Xp [j] + Yp [j];
381
382 MPI_Gather (Outp, DataSizes[
i], MPI_DOUBLE, Out, DataSizes[
i], MPI_DOUBLE, 0, MPI_COMM_WORLD);
383
384
387
388
389
390 PAPIstopFailCount++;
392 fprintf(
stderr,
"WARNING: PAPI_stop failed on Event Number %d (%s) since eventSet is not running.\n"
393 "\t Test will attempt to restart counting on this eventSet.\n",
394 eventNum, eventNames[eventNum]);
395 if (PAPIstopFailCount <
NSTEPS) {
397 continue;
398 }
399 }
400
401 failedEventCodes[failedEventIndex] = code;
402 failedEventIndex++;
403 fprintf(
stderr,
"WARNING: PAPI_stop failed on Event Number %d (%s) due to: %s.\n"
404 "\t Test will proceed with other events if available.\n",
406
408 values[k][eventNum] = (
unsigned long long) - 1;
409
410 break;
411 }
412
413
414 if (strstr(eventNames[eventNum], ":port_rcv_data")) {
415 rxCount[
i] =
values[
i][eventNum] * 4;
416 }
417
418 if (strstr(eventNames[eventNum], ":port_xmit_data")) {
419 txCount[
i] =
values[
i][eventNum] * 4;
420 }
421
422 }
423
424
427
429 fprintf(
stderr,
"WARNING: Could not clean up eventSet on MPI Rank %d since eventSet does not exist.\n"
430 "\t Test will proceed attempting to create a new eventSet\n", Rank);
434 test_fail(__FILE__, __LINE__,
"PAPI_create_eventset failed while handling failure of PAPI_cleanup_eventset.\n"
435 "This is fatal and the test has been terminated.\n", retVal);
437 long long tempValue;
438 fprintf(
stderr,
"WARNING: Could not clean up eventSet on MPI Rank %d since eventSet is already counting.\n"
439 "\t Test will proceed attempting to stop counting and re-attempting to clean up.\n", Rank);
442 test_fail(__FILE__,__LINE__,
"PAPI_stop failed while handling failure of PAPI_cleanup_eventset."
443 "This is fatal and the test has been terminated.\n", retVal);
446 test_fail(__FILE__,__LINE__,
"PAPI_cleanup_eventset failed once again while handling failure of PAPI_cleanup_eventset."
447 "This is fatal and the test has been terminated.\n", retVal);
448 } else {
449 test_fail(__FILE__, __LINE__,
"PAPI_cleanup_eventset failed:", retVal);
450 }
451 }
452
453
454 eventNum++;
456
457 }
458
459
460 free (Xp); free (Yp); free (Outp);
461
462
465
467 fprintf(
stderr,
"WARNING: Could not destroy eventSet on MPI Rank %d since eventSet does not exist or has invalid value.\n"
468 "\t Test will proceed with other operations.\n", Rank);
470 long long tempValue;
471 fprintf(
stderr,
"WARNING: Could not destroy eventSet on MPI Rank %d since eventSet is already counting.\n"
472 "\t Test will proceed attempting to stop counting and re-attempting to clean up.\n", Rank);
475 test_fail(__FILE__,__LINE__,
"PAPI_stop failed while handling failure of PAPI_destroy_eventset."
476 "This is fatal and the test has been terminated.\n", retVal);
479 test_fail(__FILE__,__LINE__,
"PAPI_cleanup_eventset failed while handling failure of PAPI_destroy_eventset."
480 "This is fatal and the test has been terminated.\n", retVal);
483 test_fail(__FILE__,__LINE__,
"PAPI_destroy_eventset failed once again while handling failure of PAPI_destroy_eventset."
484 " This is fatal and the test has been terminated.\n", retVal);
485 } else {
486 fprintf(
stderr,
"WARNING: Could not destroy eventSet on MPI Rank %d since there is an internal bug in PAPI.\n"
487 "\t Please report this to the developers. Test will proceed and operation may be unexpected.\n", Rank);
488 }
489 }
490
491
492
494 elapsedTime = ((double) (endTime-startTime))/1.0e9;
495
496
498 int eventX;
499
500 printf("POST WORK EVENT VALUES (Rank, Event Name, List of Event Values w/ Different Data Sizes)>>>\n");
501 for (eventX = 0; eventX < eventNum; eventX++) {
502 printf("\tRank %d> %s --> \t\t", Rank, eventNames[eventX]);
505 printf(
"%lld, ",
values[
i][eventX]);
506 else
507 printf(
"%lld.",
values[
i][eventX]);
508 }
509 printf("\n");
510 }
511
512
513 if (Rank == 0) {
514 printf("\n\nTHE DESCRIPTION OF EVENTS IS AS FOLLOWS>>>\n");
515 for (eventX = 0; eventX < eventNum; eventX++) {
516 printf("\t%s \t\t--> %s \n", eventNames[eventX], description[eventX]);
517 }
518 }
519 }
520
521
522 int computeTestPass = 0, computeTestPassCount = 0;
523 if (Rank == 0) {
524
525 for (
i = 0;
i < Nmax;
i++) {
526 if ( fabs(Out[
i] - (X[
i] + Y[
i])) < 0.00001 )
527 computeTestPassCount++;
528 }
529
530 if (computeTestPassCount == Nmax)
531 computeTestPass = 1;
532
533
534 free (X); free (Y); free (Out);
535 }
536
537 MPI_Bcast (&computeTestPass, 1, MPI_INT, 0, MPI_COMM_WORLD);
538
539
540 long long rxCountSumWorkers[
NSTEPS], txCountSumWorkers[
NSTEPS];
541 long long *allProcessRxEvents, *allProcessTxEvents;
542 int txFailedIndex = 0, rxFailedIndex = 0;
543 int txFailedDataSizes[
NSTEPS], rxFailedDataSizes[
NSTEPS];
544 int eventValueTestPass = 0;
545 if ((txCount[0] > 0) && (rxCount[0] > 0)) {
546 if (Rank == 0) {
547 allProcessRxEvents = (
long long*) malloc(
sizeof(
long long) * NumProcs *
NSTEPS);
548 allProcessTxEvents = (
long long*) malloc(
sizeof(
long long) * NumProcs *
NSTEPS);
549 }
550
551 MPI_Gather (&rxCount,
NSTEPS, MPI_LONG_LONG, allProcessRxEvents,
NSTEPS, MPI_LONG_LONG, 0, MPI_COMM_WORLD);
552 MPI_Gather (&txCount,
NSTEPS, MPI_LONG_LONG, allProcessTxEvents,
NSTEPS, MPI_LONG_LONG, 0, MPI_COMM_WORLD);
553
554
555 if (Rank == 0) {
556 memset (rxCountSumWorkers, 0,
sizeof(
long long) *
NSTEPS);
557 memset (txCountSumWorkers, 0,
sizeof(
long long) *
NSTEPS);
559 for (j = 1; j < NumProcs; j++) {
560 rxCountSumWorkers[
i] += allProcessRxEvents[j*
NSTEPS+
i];
561 txCountSumWorkers[
i] += allProcessTxEvents[j*
NSTEPS+
i];
562 }
563 }
564
567
568
570 txFailedDataSizes[txFailedIndex] = DataSizes[
i];
571 txFailedIndex++;
573 printf("WARNING: The transmit event count at Master Node (%lld) is not equal"
574 " to receive event counts at Worker Nodes (%lld) when using %ld bytes!\n"
575 "\t A difference of %lld was recorded.\n", txCount[
i], rxCountSumWorkers[
i],
576 DataSizes[
i]*
sizeof(
double), llabs(rxCountSumWorkers[
i] - txCount[
i]));
577 } else {
579 printf("PASSED: The transmit event count at Master Node (%lld) is almost equal"
580 " to receive event counts at Worker Nodes (%lld) when using %ld bytes.\n",
581 txCount[
i], rxCountSumWorkers[
i], DataSizes[
i]*
sizeof(
double));
582 }
583
584
586 rxFailedDataSizes[rxFailedIndex] = DataSizes[
i];
587 rxFailedIndex++;
589 printf("WARNING: The receive event count at Master Node (%lld) is not equal"
590 " to transmit event counts at Worker Nodes (%lld) when using %ld bytes!\n"
591 " A difference of %lld was recorded.\n", rxCount[
i], txCountSumWorkers[
i],
592 DataSizes[
i]*
sizeof(
double), llabs(txCountSumWorkers[
i] - rxCount[
i]));
593 } else {
595 printf("PASSED: The receive event count at Master Node (%lld) is almost equal"
596 " to transmit event counts at Worker Nodes (%lld) when using %ld bytes.\n",
597 rxCount[
i], txCountSumWorkers[
i], DataSizes[
i]*
sizeof(
double));
598 }
599 }
600
601
604 eventValueTestPass = 1;
607 eventValueTestPass = -1;
608 else
609 eventValueTestPass = 0;
610
611 }
612
613
614 MPI_Bcast (&eventValueTestPass, 1, MPI_INT, 0, MPI_COMM_WORLD);
615
616 } else {
617 eventValueTestPass = -2;
618 }
619
620
621
622 int eventNumTestPass = 0;
623
625 eventNumTestPass = 1;
627 eventNumTestPass = -1;
628 else
629 eventNumTestPass = 0;
630
631
632
634
635 printf("\n\n************************ TEST SUMMARY (EVENTS) ******************************\n"
636 "No. of Events NOT tested successfully: %d (%.1f%%)\n"
637 "Note: the above failed event count is for Master node.\n"
638 "Total No. of Events reported by component info: %d\n",
639 failedEventIndex, ((float) failedEventIndex/eventCount)*100.0, eventCount);
640
641 if (failedEventIndex > 0) {
642 printf("\tNames of Events NOT tested: ");
644 for (
i = 0;
i < failedEventIndex;
i++) {
647 strncpy(failedEventName, "ERROR:NOT_AVAILABLE", sizeof(failedEventName)-1);
648 failedEventName[sizeof(failedEventName)-1] = '\0';
649 }
650 printf("%s ", failedEventName);
651 if ((
i > 0) && (
i % 2 == 1)) printf(
"\n \t\t\t\t");
652 }
653 printf("\n");
654
655 printf("\tThe error counts for different PAPI routines are as follows:\n"
656 "\t\t\tNo. of PAPI add event errors (major) --> %d\n"
657 "\t\t\tNo. of PAPI code convert errors (minor) --> %d\n"
658 "\t\t\tNo. of PAPI event info errors (minor) --> %d\n"
659 "\t\t\tNo. of PAPI start errors (major) --> %d\n"
660 "\t\t\tNo. of PAPI stop errors (major) --> %d\n",
661 addEventFailCount, codeConvertFailCount, eventInfoFailCount, PAPIstartFailCount, PAPIstopFailCount);
662 }
663 printf("The PAPI event test has ");
664 if (eventNumTestPass == 1) printf("PASSED\n");
665 else if (eventNumTestPass == -1) printf("PASSED WITH WARNING\n");
666 else printf("FAILED\n");
667
668
669 printf("************************ TEST SUMMARY (EVENT VALUES) ************************\n");
670 if ((txCount[0] > 0) && (rxCount[0] > 0)) {
671 printf("No. of times transmit event at Master node did NOT match up receive events at worker nodes: %d (%.1f%%)\n"
672 "No. of times receive event at Master node did NOT match up transmit events at worker nodes: %d (%.1f%%)\n"
673 "Total No. of data sizes tested: %d\n"
674 "\tList of Data Sizes tested in bytes:\n\t\t\t",
675 txFailedIndex, ((
float) txFailedIndex/
NSTEPS)*100.0, rxFailedIndex, ((
float) rxFailedIndex/
NSTEPS)*100.0,
NSTEPS);
677 printf(
"%ld ",DataSizes[
i]*
sizeof(
double));
678 printf("\n");
679 if (txFailedIndex > 0 || rxFailedIndex > 0) {
680 printf("\tList of Data Sizes where transmit count at Master was not equal to sum of all worker receive counts:\n"
681 "\t\t\t");
682 for (
i = 0;
i < txFailedIndex;
i++)
683 printf(
"%ld ", txFailedDataSizes[
i]*
sizeof(
double));
684 printf("\n\tList of Data Sizes where receive count at Master was not equal to sum of all worker transmit counts:\n"
685 "\t\t\t");
686 for (
i = 0;
i < rxFailedIndex;
i++)
687 printf(
"%ld ", rxFailedDataSizes[
i]*
sizeof(
double));
688 printf("\n");
689 }
690 printf("The PAPI event value test has ");
691 if (eventValueTestPass == 1) printf("PASSED\n");
692 else if (eventValueTestPass == -1) printf("PASSED WITH WARNING\n");
693 else printf("FAILED\n");
694 } else {
695 printf("Transmit or receive events were NOT found!\n");
696 }
697
698
699 printf("************************ TEST SUMMARY (COMPUTE VALUES) **********************\n");
700 if (computeTestPassCount != Nmax) {
701 printf("No. of times sanity check FAILED on the floating point computation: %d (%.1f%%)\n"
702 "Total No. of floating point computations performed: %d \n",
703 Nmax-computeTestPassCount, ((float) (Nmax-computeTestPassCount)/Nmax)*100.0, Nmax);
704 } else {
705 printf("Sanity check PASSED on all floating point computations.\n"
706 "Note: this may pass even if one event was tested successfully!\n");
707 }
708 printf("The overall test took %.3f secs.\n\n", elapsedTime);
709 }
710
711
712 MPI_Finalize();
713
714
715 if (computeTestPass == 1 && eventValueTestPass == 1 && eventNumTestPass == 1) {
716
717
719 }
720 else if ( (eventValueTestPass < 0 && (eventNumTestPass < 0 || eventNumTestPass == 1) ) ||
721 (eventValueTestPass == 1 && eventNumTestPass < 0) ||
722 (eventValueTestPass == 1 && eventNumTestPass == 1 && computeTestPass == 0) ) {
723 test_warn(__FILE__,__LINE__,
"A warning was generated during any PAPI related tests or sanity check on computation failed", 0);
725 }
726 else {
727
728
729 test_fail(__FILE__, __LINE__,
"Any of PAPI event related tests have failed", 0);
730 }
731
732}
#define EVENT_VAL_DIFF_THRESHOLD
#define NSIZE_PASS_THRESHOLD
#define NSIZE_WARN_THRESHOLD
#define EVENT_PASS_THRESHOLD
#define EVENT_WARN_THRESHOLD
add PAPI preset or native hardware event to an event set
Empty and destroy an EventSet.
Create a new empty PAPI EventSet.
Empty and destroy an EventSet.
Enumerate PAPI preset or native events for a given component.
Convert a numeric hardware event code to a name.
get information about a specific software component
Get the event's name and description info.
Get real time counter value in nanoseconds.
initialize the PAPI library.
Get the number of components available on the system.
Start counting hardware events in an event set.
Stop counting hardware events in an event set.
Returns a string describing the PAPI error code.
static long long values[NUM_EVENTS]
int tests_quiet(int argc, char **argv)
void PAPI_NORETURN test_fail(const char *file, int line, const char *call, int retval)
void PAPI_NORETURN test_pass(const char *filename)
void test_warn(const char *file, int line, const char *call, int retval)
void PAPI_NORETURN test_skip(const char *file, int line, const char *call, int retval)
char name[PAPI_MAX_STR_LEN]
char long_descr[PAPI_HUGE_STR_LEN]