PAPI 7.1.0.0
Loading...
Searching...
No Matches
MPI_test_infiniband_events.c
Go to the documentation of this file.
1/****************************/
2/* THIS IS OPEN SOURCE CODE */
3/****************************/
4
34#include <stdlib.h>
35#include <stdio.h>
36#include <string.h>
37#include <math.h>
38#include <mpi.h>
39
40/* headers required by PAPI */
41#include "papi.h"
42#include "papi_test.h"
43
44/* constants */
45// NSIZE_MIN/MAX: min/max no. of double floating point
46// values allocated at each node
47#define NSIZE_MIN 10000
48#define NSIZE_MAX 100000
49// No. of different data sizes to be
50// tested b/w NSIZE_MIN and NSIZE_MAX
51#define NSTEPS 9
52// The max no. of infiniband events expected
53#define MAX_IB_EVENTS 150
54// Threshold value to use when comparing TX/RX event values,
55// i.e., error will be recorded when any difference greater
56// than the threshold occurs
57#define EVENT_VAL_DIFF_THRESHOLD 100
58// PASS_THRESHOLD: percentage of values out of all possibilities
59// which need to be correct for the test to PASS
60// WARN_THRESHOLD: If PASS_THRESHOLD is not met, then this threshold
61// is used to check if the test can be declared
62// PASS WITH WARNING. Otherwise, the test is declared
63// as FAILED.
64// NSIZE_* : No. of Data Sizes out of all possible NSTEPS data sizes
65// where TX/RX event value comparison will be performed to check
66// expected behavior.
67#define NSIZE_PASS_THRESHOLD 90
68#define NSIZE_WARN_THRESHOLD 50
69// EVENT_* : No. of events out of all possible events as reported by
70// component_info which need to be added successfully to the
71// event set.
72#define EVENT_PASS_THRESHOLD 90
73#define EVENT_WARN_THRESHOLD 50
74
75int main (int argc, char **argv) {
76
77 /* Set TESTS_QUIET variable */
78 tests_quiet( argc, argv );
79
80 /************************* SETUP PAPI ENV *************************************
81 *******************************************************************************/
82 int retVal, r, code;
83 int ComponentID, NumComponents, IB_ID = -1;
84 int EventSet = PAPI_NULL;
85 int eventCount = 0; // total events as reported by component info
86 int eventNum = 0; // number of events successfully tested
87
88 /* error reporting */
89 int addEventFailCount = 0, codeConvertFailCount = 0, eventInfoFailCount = 0;
90 int PAPIstartFailCount = 0, PAPIstopFailCount = 0;
91 int failedEventCodes[MAX_IB_EVENTS];
92 int failedEventIndex = 0;
93
94 /* Note: these are fixed length arrays */
95 char eventNames[MAX_IB_EVENTS][PAPI_MAX_STR_LEN];
96 char description[MAX_IB_EVENTS][PAPI_MAX_STR_LEN];
97 long long values[NSTEPS][MAX_IB_EVENTS];
98
99 /* these record certain event values for event value testing */
100 long long rxCount[NSTEPS], txCount[NSTEPS];
101
102 const PAPI_component_info_t *cmpInfo = NULL;
103 PAPI_event_info_t eventInfo;
104
105 /* for timing the test */
106 long long startTime, endTime;
107 double elapsedTime;
108
109 /* PAPI Initialization */
111 if ( retVal != PAPI_VER_CURRENT ) {
112 test_fail(__FILE__, __LINE__,"PAPI_library_init failed. The test has been terminated.\n",retVal);
113 }
114
115 /* Get total number of components detected by PAPI */
116 NumComponents = PAPI_num_components();
117
118 /* Check if infiniband component exists */
119 for ( ComponentID = 0; ComponentID < NumComponents; ComponentID++ ) {
120
121 if ( (cmpInfo = PAPI_get_component_info(ComponentID)) == NULL ) {
122 fprintf(stderr, "WARNING: PAPI_get_component_info failed on one of the components.\n"
123 "\t The test will continue for now, but it will be skipped later on\n"
124 "\t if this error was for a component under test.\n");
125 continue;
126 }
127
128 if (strcmp(cmpInfo->name, "infiniband") != 0) {
129 continue;
130 }
131
132 // if we are here, Infiniband component is found
133 if (!TESTS_QUIET) {
134 printf("INFO: Component %d (%d) - %d events - %s\n",
135 ComponentID, cmpInfo->CmpIdx,
136 cmpInfo->num_native_events, cmpInfo->name);
137 }
138
139 if (cmpInfo->disabled) {
140 test_skip(__FILE__,__LINE__,"Infiniband Component is disabled. The test has been terminated.\n", 0);
141 break;
142 }
143
144 eventCount = cmpInfo->num_native_events;
145 IB_ID = ComponentID;
146 break;
147 }
148
149 /* if we did not find any valid events, just skip the test. */
150 if (eventCount==0) {
151 fprintf(stderr, "FATAL: No events found for the Infiniband component, even though it is enabled.\n"
152 " The test will be skipped.\n");
153 test_skip(__FILE__,__LINE__,"No events found for the Infiniband component.\n", 0);
154 }
155
156 /************************* SETUP MPI ENV **************************************
157 *******************************************************************************/
158 int NumProcs, Rank;
159
160 /* Initialize MPI environment */
161 MPI_Init (&argc, &argv);
162 MPI_Comm_size (MPI_COMM_WORLD, &NumProcs);
163 MPI_Comm_rank (MPI_COMM_WORLD, &Rank);
164
165 if ((!TESTS_QUIET) && (Rank == 0)) {
166 printf("INFO: This test should trigger some network events.\n");
167 }
168
169 /* data sizes assigned here */
170 int Nmax_per_Proc = NSIZE_MAX;
171 int Nmin_per_Proc = NSIZE_MIN;
172 // fix data size if not appropriately set
173 while (Nmax_per_Proc <= Nmin_per_Proc)
174 Nmax_per_Proc = Nmin_per_Proc*10;
175 int Nmax = Nmax_per_Proc * NumProcs;
176 int NstepSize = (Nmax_per_Proc - Nmin_per_Proc)/NSTEPS;
177
178 int i, j, k; // loop variables
179 int memoryAllocateFailure = 0, ALLmemoryAllocateFailure = 0; // error flags
180
181 /* data arrays */
182 double *X, *Y, *Out;
183 double *Xp, *Yp, *Outp;
184
185 /* Master will initialize data arrays */
186 if (Rank == 0) {
187 X = (double *) malloc (sizeof(double) * Nmax);
188 Y = (double *) malloc (sizeof(double) * Nmax);
189 Out = (double *) malloc (sizeof(double) * Nmax);
190
191 // check if memory was successfully allocated.
192 // Do NOT quit from here. Need to quit safely.
193 if ( (X == NULL) || (Y == NULL) || (Out == NULL) ) {
194 fprintf(stderr, "FATAL: Failed to allocate memory on Master Node.\n");
195 memoryAllocateFailure = 1;
196 }
197
198 if (memoryAllocateFailure == 0) {
199
200 if (!TESTS_QUIET)
201 printf("INFO: Master is initializing data.\n");
202
203 for ( i = 0; i < Nmax; i++ ) {
204 X[i] = i*0.25;
205 Y[i] = i*0.75;
206 }
207
208 if (!TESTS_QUIET)
209 printf("INFO: Master has successfully initialized arrays.\n");
210
211 }
212 }
213
214 // communicate to workers if master was able to successfully allocate memory
215 MPI_Bcast (&memoryAllocateFailure, 1, MPI_INT, 0, MPI_COMM_WORLD);
216 if (memoryAllocateFailure == 1)
217 test_fail(__FILE__,__LINE__,"Could not allocate memory during the test. This is fatal and the test has been terminated.\n", 0);
218
219 memoryAllocateFailure = 0; // re-use flag
220
221 /* allocate memory for all nodes */
222 Xp = (double *) malloc (sizeof(double) * Nmax_per_Proc);
223 Yp = (double *) malloc (sizeof(double) * Nmax_per_Proc);
224 Outp = (double *) malloc (sizeof(double) * Nmax_per_Proc);
225
226 // handle error cases for memory allocation failure for all nodes.
227 if ( (Xp == NULL) || (Yp == NULL) || (Outp == NULL) ) {
228 fprintf(stderr, "FATAL: Failed to allocate %zu bytes on Rank %d.\n", sizeof(double)*Nmax_per_Proc, Rank);
229 memoryAllocateFailure = 1;
230 }
231 MPI_Allreduce (&memoryAllocateFailure, &ALLmemoryAllocateFailure, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
232 if (ALLmemoryAllocateFailure > 0)
233 test_fail(__FILE__,__LINE__,"Could not allocate memory during the test. This is fatal and the test has been terminated.\n", 0);
234
235 /* calculate data size for each compute step */
236 int Nstep_per_Proc;
237 int DataSizes[NSTEPS];
238 for (i = 0; i < NSTEPS; i++) {
239 Nstep_per_Proc = Nmin_per_Proc + (i * NstepSize);
240 //last iteration or when max size is exceeded
241 if ((i == (NSTEPS - 1)) || (Nstep_per_Proc > Nmax_per_Proc))
242 Nstep_per_Proc = Nmax_per_Proc;
243 DataSizes[i] = Nstep_per_Proc;
244 }
245
246 /************************* MAIN TEST CODE *************************************
247 *******************************************************************************/
248 startTime = PAPI_get_real_nsec();
249
250 /* create an eventSet */
251 retVal = PAPI_create_eventset ( &EventSet );
252 if (retVal != PAPI_OK) {
253 // handle error cases for PAPI_create_eventset()
254 // Two outcomes are possible here:
255 // 1. PAPI_EINVAL: invalid argument. This should not occur.
256 // 2. PAPI_ENOMEM: insufficient memory. If this is the case, then we need to quit the test.
257 fprintf(stderr, "FATAL: Could not create an eventSet on MPI Rank %d due to: %s.\n"
258 " Test will not proceed.\n", Rank, PAPI_strerror(retVal));
259 test_fail(__FILE__, __LINE__, "PAPI_create_eventset failed. This is fatal and the test has been terminated.\n", retVal);
260 } // end -- handle error cases for PAPI_create_eventset()
261
262 /* find the code for first event in component */
263 code = PAPI_NATIVE_MASK;
264 r = PAPI_enum_cmp_event ( &code, PAPI_ENUM_FIRST, IB_ID );
265
266 /* add each event individually in the eventSet and measure event values. */
267 /* for each event, repeat work with different data sizes. */
268 while ( r == PAPI_OK ) {
269
270 // attempt to add event to event set
271 retVal = PAPI_add_event (EventSet, code);
272 if (retVal != PAPI_OK ) {
273 // handle error cases for PAPI_add_event()
274 if (retVal == PAPI_ENOMEM) {
275 fprintf(stderr, "FATAL: Could not add an event to eventSet on MPI Rank %d due to insufficient memory.\n"
276 " Test will not proceed.\n", Rank);
277 test_fail(__FILE__, __LINE__, "PAPI_add_event failed due to fatal error and the test has been terminated.\n", retVal);
278 }
279
280 if (retVal == PAPI_ENOEVST) {
281 fprintf(stderr, "WARNING: Could not add an event to eventSet on MPI Rank %d since eventSet does not exist.\n"
282 "\t Test will proceed attempting to create a new eventSet\n", Rank);
284 retVal = PAPI_create_eventset ( &EventSet );
285 if (retVal != PAPI_OK)
286 test_fail(__FILE__, __LINE__, "PAPI_create_eventset failed while handling failure of PAPI_add_event."
287 " This is fatal and the test has been terminated.\n", retVal);
288 continue;
289 }
290
291 if (retVal == PAPI_EISRUN) {
292 long long tempValue;
293 fprintf(stderr, "WARNING: Could not add an event to eventSet on MPI Rank %d since eventSet is already counting.\n"
294 "\t Test will proceed attempting to stop counting and re-attempting to add current event.\n", Rank);
295 retVal = PAPI_stop (EventSet, &tempValue);
296 if (retVal != PAPI_OK)
297 test_fail(__FILE__,__LINE__,"PAPI_stop failed while handling failure of PAPI_add_event."
298 " This is fatal and the test has been terminated.\n", retVal);
300 if (retVal != PAPI_OK)
301 test_fail(__FILE__,__LINE__,"PAPI_cleanup_eventset failed while handling failure of PAPI_add_event."
302 " This is fatal and the test has been terminated.\n", retVal);
303 continue;
304 }
305
306 // for all other errors, skip an event
307 addEventFailCount++; // error reporting
308 failedEventCodes[failedEventIndex] = code;
309 failedEventIndex++;
310 fprintf(stderr, "WARNING: Could not add an event to eventSet on MPI Rank %d due to: %s.\n"
311 "\t Test will proceed attempting to add other events.\n", Rank, PAPI_strerror(retVal));
312
313 r = PAPI_enum_cmp_event (&code, PAPI_ENUM_EVENTS, IB_ID);
314
315 if (addEventFailCount >= eventCount) // if no event was added successfully
316 break;
317
318 continue;
319 } // end -- handle error cases for PAPI_add_event()
320
321 /* get event name of added event */
322 retVal = PAPI_event_code_to_name (code, eventNames[eventNum]);
323 if (retVal != PAPI_OK ) {
324 // handle error cases for PAPI_event_code_to_name().
325 codeConvertFailCount++; // error reporting
326 fprintf(stderr, "WARNING: PAPI_event_code_to_name failed due to: %s.\n"
327 "\t Test will proceed but an event name will not be available.\n", PAPI_strerror(retVal));
328 strncpy(eventNames[eventNum], "ERROR:NOT_AVAILABLE", sizeof(eventNames[0])-1);
329 eventNames[eventNum][sizeof(eventNames[0])-1] = '\0';
330 } // end -- handle error cases for PAPI_event_code_to_name()
331
332 /* get long description of added event */
333 retVal = PAPI_get_event_info (code, &eventInfo);
334 if (retVal != PAPI_OK ) {
335 // handle error cases for PAPI_get_event_info()
336 eventInfoFailCount++; // error reporting
337 fprintf(stderr, "WARNING: PAPI_get_event_info failed due to: %s.\n"
338 "\t Test will proceed but an event description will not be available.\n", PAPI_strerror(retVal));
339 strncpy(description[eventNum], "ERROR:NOT_AVAILABLE", sizeof(description[0])-1);
340 description[eventNum][sizeof(description[0])-1] = '\0';
341 } else {
342 strncpy(description[eventNum], eventInfo.long_descr, sizeof(description[0])-1);
343 description[eventNum][sizeof(description[0])-1] = '\0';
344 }
345
346 /****************** PERFORM WORK (W/ DIFFERENT DATA SIZES) *********************
347 *******************************************************************************/
348 for (i = 0; i < NSTEPS; i++) {
349
350 /* start recording event value */
351 retVal = PAPI_start (EventSet);
352 if (retVal != PAPI_OK ) {
353 // handle error cases for PAPI_start()
354 // we need to skip the current event being counted for all errors,
355 // in all cases, errors will be handled later on.
356
357 PAPIstartFailCount++; // error reporting
358 failedEventCodes[failedEventIndex] = code;
359 failedEventIndex++;
360 fprintf(stderr, "WARNING: PAPI_start failed on Event Number %d (%s) due to: %s.\n"
361 "\t Test will proceed with other events if available.\n",
362 eventNum, eventNames[eventNum], PAPI_strerror(retVal));
363
364 for (k = i; k < NSTEPS; k++) // fill invalid event values.
365 values[k][eventNum] = (unsigned long long) - 1;
366
367 break; // try next event
368 } // end -- handle error cases for PAPI_start()
369
370 if ((!TESTS_QUIET) && (Rank == 0))
371 printf("INFO: Doing MPI communication for %s: min. %ld bytes transferred by each process.\n",
372 eventNames[eventNum], DataSizes[i]*sizeof(double));
373
374 MPI_Scatter (X, DataSizes[i], MPI_DOUBLE, Xp, DataSizes[i], MPI_DOUBLE, 0, MPI_COMM_WORLD);
375 MPI_Scatter (Y, DataSizes[i], MPI_DOUBLE, Yp, DataSizes[i], MPI_DOUBLE, 0, MPI_COMM_WORLD);
376
377 /* perform calculation. */
378 /* Note: there is redundant computation here. */
379 for (j = 0; j < DataSizes[i]; j++)
380 Outp [j] = Xp [j] + Yp [j];
381
382 MPI_Gather (Outp, DataSizes[i], MPI_DOUBLE, Out, DataSizes[i], MPI_DOUBLE, 0, MPI_COMM_WORLD);
383
384 /* stop recording and collect event value */
385 retVal = PAPI_stop (EventSet, &values[i][eventNum]);
386 if (retVal != PAPI_OK ) {
387 // handle error cases for PAPI_stop()
388 // we need to skip the current event for all errors
389 // except one case, as below.
390 PAPIstopFailCount++; // error reporting
391 if (retVal == PAPI_ENOTRUN) {
392 fprintf(stderr, "WARNING: PAPI_stop failed on Event Number %d (%s) since eventSet is not running.\n"
393 "\t Test will attempt to restart counting on this eventSet.\n",
394 eventNum, eventNames[eventNum]);
395 if (PAPIstopFailCount < NSTEPS) {
396 i = i - 1; // re-attempt this data size
397 continue;
398 }
399 }
400 // for all other errors, try next event.
401 failedEventCodes[failedEventIndex] = code;
402 failedEventIndex++;
403 fprintf(stderr, "WARNING: PAPI_stop failed on Event Number %d (%s) due to: %s.\n"
404 "\t Test will proceed with other events if available.\n",
405 eventNum, eventNames[eventNum], PAPI_strerror(retVal));
406
407 for (k = i; k < NSTEPS; k++) // fill invalid event values
408 values[k][eventNum] = (unsigned long long) - 1;
409
410 break;
411 } // end -- handle error cases for PAPI_stop()
412
413 /* record number of bytes received */
414 if (strstr(eventNames[eventNum], ":port_rcv_data")) {
415 rxCount[i] = values[i][eventNum] * 4; // counter value needs to be multiplied by 4 to get total number of bytes
416 }
417 /* record number of bytes transmitted */
418 if (strstr(eventNames[eventNum], ":port_xmit_data")) {
419 txCount[i] = values[i][eventNum] * 4;
420 }
421
422 } // end -- work loop
423
424 /* Done, clean up eventSet for next iteration */
426 if (retVal != PAPI_OK) {
427 // handle failure cases for PAPI_cleanup_eventset()
428 if (retVal == PAPI_ENOEVST) {
429 fprintf(stderr, "WARNING: Could not clean up eventSet on MPI Rank %d since eventSet does not exist.\n"
430 "\t Test will proceed attempting to create a new eventSet\n", Rank);
432 retVal = PAPI_create_eventset ( &EventSet );
433 if (retVal != PAPI_OK)
434 test_fail(__FILE__, __LINE__, "PAPI_create_eventset failed while handling failure of PAPI_cleanup_eventset.\n"
435 "This is fatal and the test has been terminated.\n", retVal);
436 } else if (retVal == PAPI_EISRUN) {
437 long long tempValue;
438 fprintf(stderr, "WARNING: Could not clean up eventSet on MPI Rank %d since eventSet is already counting.\n"
439 "\t Test will proceed attempting to stop counting and re-attempting to clean up.\n", Rank);
440 retVal = PAPI_stop (EventSet, &tempValue);
441 if (retVal != PAPI_OK)
442 test_fail(__FILE__,__LINE__,"PAPI_stop failed while handling failure of PAPI_cleanup_eventset."
443 "This is fatal and the test has been terminated.\n", retVal);
445 if (retVal != PAPI_OK)
446 test_fail(__FILE__,__LINE__,"PAPI_cleanup_eventset failed once again while handling failure of PAPI_cleanup_eventset."
447 "This is fatal and the test has been terminated.\n", retVal);
448 } else {
449 test_fail(__FILE__, __LINE__, "PAPI_cleanup_eventset failed:", retVal);
450 }
451 } // end -- handle failure cases for PAPI_cleanup_eventset()
452
453 /* get next event */
454 eventNum++;
455 r = PAPI_enum_cmp_event (&code, PAPI_ENUM_EVENTS, IB_ID);
456
457 } // end -- event loop
458
459 // free memory at all nodes
460 free (Xp); free (Yp); free (Outp);
461
462 /* Done, destroy eventSet */
463 retVal = PAPI_destroy_eventset( &EventSet );
464 if (retVal != PAPI_OK) {
465 // handle error cases for PAPI_destroy_eventset()
466 if (retVal == PAPI_ENOEVST || retVal == PAPI_EINVAL) {
467 fprintf(stderr, "WARNING: Could not destroy eventSet on MPI Rank %d since eventSet does not exist or has invalid value.\n"
468 "\t Test will proceed with other operations.\n", Rank);
469 } else if (retVal == PAPI_EISRUN) {
470 long long tempValue;
471 fprintf(stderr, "WARNING: Could not destroy eventSet on MPI Rank %d since eventSet is already counting.\n"
472 "\t Test will proceed attempting to stop counting and re-attempting to clean up.\n", Rank);
473 retVal = PAPI_stop (EventSet, &tempValue);
474 if (retVal != PAPI_OK)
475 test_fail(__FILE__,__LINE__,"PAPI_stop failed while handling failure of PAPI_destroy_eventset."
476 "This is fatal and the test has been terminated.\n", retVal);
478 if (retVal != PAPI_OK)
479 test_fail(__FILE__,__LINE__,"PAPI_cleanup_eventset failed while handling failure of PAPI_destroy_eventset."
480 "This is fatal and the test has been terminated.\n", retVal);
482 if (retVal != PAPI_OK)
483 test_fail(__FILE__,__LINE__,"PAPI_destroy_eventset failed once again while handling failure of PAPI_destroy_eventset."
484 " This is fatal and the test has been terminated.\n", retVal);
485 } else {
486 fprintf(stderr, "WARNING: Could not destroy eventSet on MPI Rank %d since there is an internal bug in PAPI.\n"
487 "\t Please report this to the developers. Test will proceed and operation may be unexpected.\n", Rank);
488 }
489 } // end -- handle failure cases for PAPI_destroy_eventset()
490
491 /*************************** SUMMARIZE RESULTS ********************************
492 ******************************************************************************/
493 endTime = PAPI_get_real_nsec();
494 elapsedTime = ((double) (endTime-startTime))/1.0e9;
495
496 /* print results: event values and descriptions */
497 if (!TESTS_QUIET) {
498 int eventX;
499 // print event values at each process/rank
500 printf("POST WORK EVENT VALUES (Rank, Event Name, List of Event Values w/ Different Data Sizes)>>>\n");
501 for (eventX = 0; eventX < eventNum; eventX++) {
502 printf("\tRank %d> %s --> \t\t", Rank, eventNames[eventX]);
503 for (i = 0; i < NSTEPS; i++) {
504 if (i < NSTEPS-1)
505 printf("%lld, ", values[i][eventX]);
506 else
507 printf("%lld.", values[i][eventX]);
508 }
509 printf("\n");
510 }
511
512 // print description of each event
513 if (Rank == 0) {
514 printf("\n\nTHE DESCRIPTION OF EVENTS IS AS FOLLOWS>>>\n");
515 for (eventX = 0; eventX < eventNum; eventX++) {
516 printf("\t%s \t\t--> %s \n", eventNames[eventX], description[eventX]);
517 }
518 }
519 }
520
521 /* test summary: 1) sanity check on floating point computation */
522 int computeTestPass = 0, computeTestPassCount = 0;
523 if (Rank == 0) {
524 // check results of computation
525 for (i = 0; i < Nmax; i++) {
526 if ( fabs(Out[i] - (X[i] + Y[i])) < 0.00001 )
527 computeTestPassCount++;
528 }
529 // summarize results of computation
530 if (computeTestPassCount == Nmax)
531 computeTestPass = 1;
532
533 // free memory
534 free (X); free (Y); free (Out);
535 }
536 // communicate test results to everyone
537 MPI_Bcast (&computeTestPass, 1, MPI_INT, 0, MPI_COMM_WORLD);
538
539 /* test summary: 2) check TX and RX event values, if available */
540 long long rxCountSumWorkers[NSTEPS], txCountSumWorkers[NSTEPS];
541 long long *allProcessRxEvents, *allProcessTxEvents;
542 int txFailedIndex = 0, rxFailedIndex = 0;
543 int txFailedDataSizes[NSTEPS], rxFailedDataSizes[NSTEPS];
544 int eventValueTestPass = 0; // for test summary
545 if ((txCount[0] > 0) && (rxCount[0] > 0)) {
546 if (Rank == 0) {
547 allProcessRxEvents = (long long*) malloc(sizeof(long long) * NumProcs * NSTEPS);
548 allProcessTxEvents = (long long*) malloc(sizeof(long long) * NumProcs * NSTEPS);
549 }
550 // get all rxCount/txCount at master. Used to check if rx/tx counts match up.
551 MPI_Gather (&rxCount, NSTEPS, MPI_LONG_LONG, allProcessRxEvents, NSTEPS, MPI_LONG_LONG, 0, MPI_COMM_WORLD);
552 MPI_Gather (&txCount, NSTEPS, MPI_LONG_LONG, allProcessTxEvents, NSTEPS, MPI_LONG_LONG, 0, MPI_COMM_WORLD);
553
554 // perform event count check at master
555 if (Rank == 0) {
556 memset (rxCountSumWorkers, 0, sizeof(long long) * NSTEPS);
557 memset (txCountSumWorkers, 0, sizeof(long long) * NSTEPS);
558 for (i = 0; i < NSTEPS; i++) {
559 for (j = 1; j < NumProcs; j++) {
560 rxCountSumWorkers[i] += allProcessRxEvents[j*NSTEPS+i];
561 txCountSumWorkers[i] += allProcessTxEvents[j*NSTEPS+i];
562 }
563 }
564
565 if (!TESTS_QUIET) printf("\n\n");
566 for (i = 0; i < NSTEPS; i++) {
567 // check: Master TX event ~= Sum of all RX events across all workers (NumProcs-1)
568 // difference threshold may need to be adjusted based on observed values
569 if ((llabs(rxCountSumWorkers[i] - txCount[i]) > EVENT_VAL_DIFF_THRESHOLD)) {
570 txFailedDataSizes[txFailedIndex] = DataSizes[i];
571 txFailedIndex++;
572 if (!TESTS_QUIET)
573 printf("WARNING: The transmit event count at Master Node (%lld) is not equal"
574 " to receive event counts at Worker Nodes (%lld) when using %ld bytes!\n"
575 "\t A difference of %lld was recorded.\n", txCount[i], rxCountSumWorkers[i],
576 DataSizes[i]*sizeof(double), llabs(rxCountSumWorkers[i] - txCount[i]));
577 } else {
578 if (!TESTS_QUIET)
579 printf("PASSED: The transmit event count at Master Node (%lld) is almost equal"
580 " to receive event counts at Worker Nodes (%lld) when using %ld bytes.\n",
581 txCount[i], rxCountSumWorkers[i], DataSizes[i]*sizeof(double));
582 }
583
584 // check: Master RX event ~= Sum of all TX events across all workers (NumProcs-1)
585 if ((llabs(txCountSumWorkers[i] - rxCount[i]) > EVENT_VAL_DIFF_THRESHOLD)) {
586 rxFailedDataSizes[rxFailedIndex] = DataSizes[i];
587 rxFailedIndex++;
588 if (!TESTS_QUIET)
589 printf("WARNING: The receive event count at Master Node (%lld) is not equal"
590 " to transmit event counts at Worker Nodes (%lld) when using %ld bytes!\n"
591 " A difference of %lld was recorded.\n", rxCount[i], txCountSumWorkers[i],
592 DataSizes[i]*sizeof(double), llabs(txCountSumWorkers[i] - rxCount[i]));
593 } else {
594 if (!TESTS_QUIET)
595 printf("PASSED: The receive event count at Master Node (%lld) is almost equal"
596 " to transmit event counts at Worker Nodes (%lld) when using %ld bytes.\n",
597 rxCount[i], txCountSumWorkers[i], DataSizes[i]*sizeof(double));
598 }
599 }
600
601 // test evaluation criteria
602 if ( (((float) txFailedIndex / NSTEPS) <= (1.0 - (float) NSIZE_PASS_THRESHOLD/100)) &&
603 (((float) rxFailedIndex / NSTEPS) <= (1.0 - (float) NSIZE_PASS_THRESHOLD/100)) )
604 eventValueTestPass = 1; // pass
605 else if ( (((float) txFailedIndex / NSTEPS) <= (1.0 - (float) NSIZE_WARN_THRESHOLD/100)) &&
606 (((float) rxFailedIndex / NSTEPS) <= (1.0 - (float) NSIZE_WARN_THRESHOLD/100)) )
607 eventValueTestPass = -1; // warning
608 else
609 eventValueTestPass = 0; // fail
610
611 } // end -- check RX/TX counts for all data sizes at Master node.
612
613 // communicate test results to everyone, since only master knows the result
614 MPI_Bcast (&eventValueTestPass, 1, MPI_INT, 0, MPI_COMM_WORLD);
615
616 } else {
617 eventValueTestPass = -2; // not available
618 } // end -- event value test
619
620 /* test summary: 3) number of events added and counted successfully */
621 // Note: under some rare circumstances, the number of failed events at each node may be different.
622 int eventNumTestPass = 0;
623 // test evaluation criteria
624 if (((float) failedEventIndex / eventCount) <= (1.0 - (float) EVENT_PASS_THRESHOLD/100) )
625 eventNumTestPass = 1;
626 else if (((float) failedEventIndex / eventCount) <= (1.0 - (float) EVENT_WARN_THRESHOLD/100) )
627 eventNumTestPass = -1;
628 else
629 eventNumTestPass = 0;
630
631
632 /* print test summary */
633 if ((!TESTS_QUIET) && (Rank == 0)) {
634
635 printf("\n\n************************ TEST SUMMARY (EVENTS) ******************************\n"
636 "No. of Events NOT tested successfully: %d (%.1f%%)\n"
637 "Note: the above failed event count is for Master node.\n"
638 "Total No. of Events reported by component info: %d\n",
639 failedEventIndex, ((float) failedEventIndex/eventCount)*100.0, eventCount);
640
641 if (failedEventIndex > 0) {
642 printf("\tNames of Events NOT tested: ");
643 char failedEventName[PAPI_MAX_STR_LEN];
644 for (i = 0; i < failedEventIndex; i++) {
645 retVal = PAPI_event_code_to_name (failedEventCodes[i], failedEventName);
646 if (retVal != PAPI_OK) {
647 strncpy(failedEventName, "ERROR:NOT_AVAILABLE", sizeof(failedEventName)-1);
648 failedEventName[sizeof(failedEventName)-1] = '\0';
649 }
650 printf("%s ", failedEventName);
651 if ((i > 0) && (i % 2 == 1)) printf("\n \t\t\t\t");
652 }
653 printf("\n");
654
655 printf("\tThe error counts for different PAPI routines are as follows:\n"
656 "\t\t\tNo. of PAPI add event errors (major) --> %d\n"
657 "\t\t\tNo. of PAPI code convert errors (minor) --> %d\n"
658 "\t\t\tNo. of PAPI event info errors (minor) --> %d\n"
659 "\t\t\tNo. of PAPI start errors (major) --> %d\n"
660 "\t\t\tNo. of PAPI stop errors (major) --> %d\n",
661 addEventFailCount, codeConvertFailCount, eventInfoFailCount, PAPIstartFailCount, PAPIstopFailCount);
662 }
663 printf("The PAPI event test has ");
664 if (eventNumTestPass == 1) printf("PASSED\n");
665 else if (eventNumTestPass == -1) printf("PASSED WITH WARNING\n");
666 else printf("FAILED\n");
667
668 // event values
669 printf("************************ TEST SUMMARY (EVENT VALUES) ************************\n");
670 if ((txCount[0] > 0) && (rxCount[0] > 0)) {
671 printf("No. of times transmit event at Master node did NOT match up receive events at worker nodes: %d (%.1f%%)\n"
672 "No. of times receive event at Master node did NOT match up transmit events at worker nodes: %d (%.1f%%)\n"
673 "Total No. of data sizes tested: %d\n"
674 "\tList of Data Sizes tested in bytes:\n\t\t\t",
675 txFailedIndex, ((float) txFailedIndex/NSTEPS)*100.0, rxFailedIndex, ((float) rxFailedIndex/NSTEPS)*100.0, NSTEPS);
676 for (i = 0; i < NSTEPS; i++)
677 printf("%ld ",DataSizes[i]*sizeof(double));
678 printf("\n");
679 if (txFailedIndex > 0 || rxFailedIndex > 0) {
680 printf("\tList of Data Sizes where transmit count at Master was not equal to sum of all worker receive counts:\n"
681 "\t\t\t");
682 for (i = 0; i < txFailedIndex; i++)
683 printf("%ld ", txFailedDataSizes[i]*sizeof(double));
684 printf("\n\tList of Data Sizes where receive count at Master was not equal to sum of all worker transmit counts:\n"
685 "\t\t\t");
686 for (i = 0; i < rxFailedIndex; i++)
687 printf("%ld ", rxFailedDataSizes[i]*sizeof(double));
688 printf("\n");
689 }
690 printf("The PAPI event value test has ");
691 if (eventValueTestPass == 1) printf("PASSED\n");
692 else if (eventValueTestPass == -1) printf("PASSED WITH WARNING\n");
693 else printf("FAILED\n");
694 } else {
695 printf("Transmit or receive events were NOT found!\n");
696 }
697
698 // compute values
699 printf("************************ TEST SUMMARY (COMPUTE VALUES) **********************\n");
700 if (computeTestPassCount != Nmax) {
701 printf("No. of times sanity check FAILED on the floating point computation: %d (%.1f%%)\n"
702 "Total No. of floating point computations performed: %d \n",
703 Nmax-computeTestPassCount, ((float) (Nmax-computeTestPassCount)/Nmax)*100.0, Nmax);
704 } else {
705 printf("Sanity check PASSED on all floating point computations.\n"
706 "Note: this may pass even if one event was tested successfully!\n");
707 }
708 printf("The overall test took %.3f secs.\n\n", elapsedTime);
709 } // end -- print summary of test results.
710
711 /* finialize MPI */
712 MPI_Finalize();
713
714 /* determine success of overall test based on all tests */
715 if (computeTestPass == 1 && eventValueTestPass == 1 && eventNumTestPass == 1) {
716 // all has to be good for the test to pass.
717 // note: test will generate a warning if tx/rx events are not available.
718 test_pass( __FILE__ );
719 }
720 else if ( (eventValueTestPass < 0 && (eventNumTestPass < 0 || eventNumTestPass == 1) ) ||
721 (eventValueTestPass == 1 && eventNumTestPass < 0) ||
722 (eventValueTestPass == 1 && eventNumTestPass == 1 && computeTestPass == 0) ) {
723 test_warn(__FILE__,__LINE__,"A warning was generated during any PAPI related tests or sanity check on computation failed", 0);
724 test_pass(__FILE__);
725 }
726 else {
727 // fail, in case any of eventValueTest and eventNumTest have failed,
728 // irrespective of the result of computeTest.
729 test_fail(__FILE__, __LINE__,"Any of PAPI event related tests have failed", 0);
730 }
731
732} // end main
#define NSIZE_MAX
#define EVENT_VAL_DIFF_THRESHOLD
#define NSIZE_PASS_THRESHOLD
#define NSIZE_MIN
#define NSIZE_WARN_THRESHOLD
#define EVENT_PASS_THRESHOLD
#define EVENT_WARN_THRESHOLD
#define NSTEPS
#define MAX_IB_EVENTS
int i
add PAPI preset or native hardware event to an event set
Empty and destroy an EventSet.
Create a new empty PAPI EventSet.
Empty and destroy an EventSet.
Enumerate PAPI preset or native events for a given component.
Convert a numeric hardware event code to a name.
get information about a specific software component
Get the event's name and description info.
Get real time counter value in nanoseconds.
initialize the PAPI library.
Get the number of components available on the system.
Start counting hardware events in an event set.
Stop counting hardware events in an event set.
Returns a string describing the PAPI error code.
#define PAPI_VER_CURRENT
Definition: f90papi.h:54
#define PAPI_ENUM_EVENTS
Definition: f90papi.h:224
#define PAPI_OK
Definition: f90papi.h:73
#define PAPI_ENUM_FIRST
Definition: f90papi.h:85
#define PAPI_NULL
Definition: f90papi.h:78
#define PAPI_ENOEVST
Definition: f90papi.h:95
#define PAPI_EINVAL
Definition: f90papi.h:115
#define PAPI_MAX_STR_LEN
Definition: f90papi.h:77
#define PAPI_EISRUN
Definition: f90papi.h:277
#define PAPI_ENOMEM
Definition: f90papi.h:16
#define PAPI_ENOTRUN
Definition: f90papi.h:146
static int EventSet
Definition: init_fini.c:8
static long long values[NUM_EVENTS]
Definition: init_fini.c:10
int TESTS_QUIET
Definition: test_utils.c:18
#define PAPI_NATIVE_MASK
Return codes and api definitions.
FILE * stderr
int tests_quiet(int argc, char **argv)
Definition: test_utils.c:376
void PAPI_NORETURN test_fail(const char *file, int line, const char *call, int retval)
Definition: test_utils.c:491
void PAPI_NORETURN test_pass(const char *filename)
Definition: test_utils.c:432
void test_warn(const char *file, int line, const char *call, int retval)
Definition: test_utils.c:547
void PAPI_NORETURN test_skip(const char *file, int line, const char *call, int retval)
Definition: test_utils.c:584
int main()
Definition: pernode.c:20
char name[PAPI_MAX_STR_LEN]
Definition: papi.h:627
char long_descr[PAPI_HUGE_STR_LEN]
Definition: papi.h:963