PAPI 7.1.0.0
Loading...
Searching...
No Matches
cupti_profiler.h File Reference
Include dependency graph for cupti_profiler.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

int cuptip_init (void)
 
int cuptip_control_create (cuptiu_event_table_t *event_names, cuptic_info_t thr_info, cuptip_control_t *pstate)
 
int cuptip_control_destroy (cuptip_control_t *pstate)
 
int cuptip_control_start (cuptip_control_t state)
 
int cuptip_control_stop (cuptip_control_t state)
 
int cuptip_control_read (cuptip_control_t state, long long *values)
 
int cuptip_control_reset (cuptip_control_t state)
 
int cuptip_event_enum (cuptiu_event_table_t *all_evt_names)
 
int cuptip_event_name_to_descr (const char *evt_name, char *description)
 
int cuptip_shutdown (void)
 

Detailed Description

Function Documentation

◆ cuptip_control_create()

int cuptip_control_create ( cuptiu_event_table_t event_names,
cuptic_info_t  thr_info,
cuptip_control_t *  pstate 
)

Definition at line 1425 of file cupti_profiler.c.

1426{
1427 COMPDBG("Entering.\n");
1428 int papi_errno = PAPI_OK, gpu_id;
1429 cuptip_control_t state = (cuptip_control_t) papi_calloc (1, sizeof(struct cuptip_control_s));
1430 if (state == NULL) {
1431 return PAPI_ENOMEM;
1432 }
1433 state->gpu_ctl = (cuptip_gpu_state_t *) papi_calloc(num_gpus, sizeof(cuptip_gpu_state_t));
1434 if (state->gpu_ctl == NULL) {
1435 return PAPI_ENOMEM;
1436 }
1437 for (gpu_id = 0; gpu_id < num_gpus; gpu_id++) {
1438 state->gpu_ctl[gpu_id].gpu_id = gpu_id;
1439 }
1440
1441 /* Register the user created cuda context for the current gpu if not already known */
1442 papi_errno = cuptic_ctxarr_update_current(thr_info);
1443 if (papi_errno != PAPI_OK) {
1444 goto fn_exit;
1445 }
1447 if (papi_errno != PAPI_OK) {
1448 goto fn_exit;
1449 }
1450 papi_errno = add_events_per_gpu(state, event_names);
1451 if (papi_errno != PAPI_OK) {
1452 goto fn_exit;
1453 }
1454 papi_errno = control_state_validate(state);
1455 state->info = thr_info;
1456
1457fn_exit:
1458 *pstate = state;
1459 return papi_errno;
1460}
static const char * event_names[2]
Definition: Gamum.c:27
int cuptic_ctxarr_update_current(cuptic_info_t info)
Definition: cupti_common.c:536
static int nvpw_cuda_metricscontext_create(cuptip_control_t state)
static int num_gpus
static int add_events_per_gpu(cuptip_control_t state, cuptiu_event_table_t *event_names)
static int control_state_validate(cuptip_control_t state)
#define PAPI_OK
Definition: f90papi.h:73
#define PAPI_ENOMEM
Definition: f90papi.h:16
#define COMPDBG(format, args...)
Definition: lcuda_debug.h:21
bool state
Definition: papi_hl.c:155
#define papi_calloc(a, b)
Definition: papi_memory.h:37
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuptip_control_destroy()

int cuptip_control_destroy ( cuptip_control_t *  pstate)

Definition at line 1462 of file cupti_profiler.c.

1463{
1464 COMPDBG("Entering.\n");
1465 cuptip_control_t state = *pstate;
1466 int i, j;
1467 int papi_errno = nvpw_cuda_metricscontext_destroy(state);
1468 for (i = 0; i < num_gpus; i++) {
1469 reset_cupti_prof_config_images( &(state->gpu_ctl[i]) );
1470 cuptiu_event_table_destroy( &(state->gpu_ctl[i].event_names) );
1471 for (j = 0; j < state->gpu_ctl[i].rmr_count; j++) {
1472 papi_free((void *) state->gpu_ctl[i].rmr[j].pMetricName);
1473 }
1474 papi_free(state->gpu_ctl[i].rmr);
1475 }
1476 papi_free(state->gpu_ctl);
1478 *pstate = NULL;
1479 return papi_errno;
1480}
int i
static int reset_cupti_prof_config_images(cuptip_gpu_state_t *gpu_ctl)
static int nvpw_cuda_metricscontext_destroy(cuptip_control_t state)
void cuptiu_event_table_destroy(cuptiu_event_table_t **pevt_table)
Definition: cupti_utils.c:159
#define papi_free(a)
Definition: papi_memory.h:35
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuptip_control_read()

int cuptip_control_read ( cuptip_control_t  state,
long long values 
)

Definition at line 1599 of file cupti_profiler.c.

1600{
1601 COMPDBG("Entering.\n");
1602 int papi_errno, gpu_id, i;
1603 cuptip_gpu_state_t *gpu_ctl = NULL;
1604 CUcontext userCtx = NULL, ctx = NULL;
1605 CUDA_CALL( cuCtxGetCurrentPtr(&userCtx), goto fn_fail_misc);
1606 if (userCtx == NULL) {
1607 CUDART_CALL( cudaFreePtr(NULL), goto fn_fail_misc );
1608 CUDART_CALL( cuCtxGetCurrentPtr(&userCtx), goto fn_fail_misc );
1609 }
1610 unsigned int evt_pos;
1611 long long val;
1612 cuptiu_event_t *evt_rec = NULL;
1613 for (gpu_id = 0; gpu_id < num_gpus; gpu_id++) {
1614 gpu_ctl = &(state->gpu_ctl[gpu_id]);
1615 if (gpu_ctl->event_names->count == 0) {
1616 continue;
1617 }
1618
1619 papi_errno = cuptic_ctxarr_get_ctx(state->info, gpu_id, &ctx);
1620 CUDA_CALL( cuCtxSetCurrentPtr(ctx), goto fn_fail_misc );
1621
1622 CUpti_Profiler_PopRange_Params popRangeParams = {
1623 .structSize = CUpti_Profiler_PopRange_Params_STRUCT_SIZE,
1624 .pPriv = NULL,
1625 .ctx = NULL,
1626 };
1627 CUPTI_CALL( cuptiProfilerPopRangePtr(&popRangeParams), goto fn_fail_misc );
1628
1629 CUpti_Profiler_EndPass_Params endPassParams = {
1630 .structSize = CUpti_Profiler_EndPass_Params_STRUCT_SIZE,
1631 .pPriv = NULL,
1632 .ctx = NULL,
1633 };
1634 CUPTI_CALL( cuptiProfilerEndPassPtr(&endPassParams), goto fn_fail_misc );
1635
1636 CUpti_Profiler_FlushCounterData_Params flushCounterDataParams = {
1637 .structSize = CUpti_Profiler_FlushCounterData_Params_STRUCT_SIZE,
1638 .pPriv = NULL,
1639 .ctx = NULL,
1640 };
1641 CUPTI_CALL( cuptiProfilerFlushCounterDataPtr(&flushCounterDataParams), goto fn_fail_misc );
1642
1643 papi_errno = get_measured_values(gpu_ctl);
1644 if (papi_errno != PAPI_OK) {
1645 goto fn_exit;
1646 }
1647 for (i = 0; i < (int) gpu_ctl->event_names->count; i++) {
1648 papi_errno = cuptiu_event_table_get_item(gpu_ctl->event_names, i, &evt_rec);
1649 if (papi_errno != PAPI_OK) {
1650 goto fn_exit;
1651 }
1652 evt_pos = evt_rec->evt_pos;
1653 val = (long long) evt_rec->value;
1654
1655 if (state->read_count == 0) {
1656 values[evt_pos] = val;
1657 }
1658 else {
1659 switch (get_event_collection_method(evt_rec->name)) {
1660 case RunningSum:
1661 values[evt_pos] += val;
1662 break;
1663 case RunningMin:
1664 values[evt_pos] = values[evt_pos] < val ? values[evt_pos] : val;
1665 break;
1666 case RunningMax:
1667 values[evt_pos] = values[evt_pos] > val ? values[evt_pos] : val;
1668 break;
1669 default:
1670 values[evt_pos] = val;
1671 break;
1672 }
1673 }
1674 }
1675
1676 CUPTI_CALL( cuptiProfilerCounterDataImageInitializePtr(&gpu_ctl->initializeParams), goto fn_fail_misc );
1677 CUPTI_CALL( cuptiProfilerCounterDataImageInitializeScratchBufferPtr(&gpu_ctl->initScratchBufferParams), goto fn_fail_misc );
1678
1679 CUpti_Profiler_BeginPass_Params beginPassParams = {
1680 .structSize = CUpti_Profiler_BeginPass_Params_STRUCT_SIZE,
1681 .pPriv = NULL,
1682 .ctx = NULL,
1683 };
1684 CUPTI_CALL( cuptiProfilerBeginPassPtr(&beginPassParams), goto fn_fail_misc );
1685
1686 char rangeName[64];
1687 sprintf(rangeName, "PAPI_Range_%d", gpu_ctl->gpu_id);
1688 CUpti_Profiler_PushRange_Params pushRangeParams = {
1689 .structSize = CUpti_Profiler_PushRange_Params_STRUCT_SIZE,
1690 .pPriv = NULL,
1691 .ctx = NULL,
1692 .pRangeName = (const char*) &rangeName,
1693 .rangeNameLength = 100,
1694 };
1695 CUPTI_CALL( cuptiProfilerPushRangePtr(&pushRangeParams), goto fn_fail_misc );
1696
1697 }
1698 state->read_count++;
1699fn_exit:
1700 CUDA_CALL( cuCtxSetCurrentPtr(userCtx), );
1701 return papi_errno;
1702fn_fail_misc:
1703 papi_errno = PAPI_EMISC;
1704 goto fn_exit;
1705}
cudaError_t(* cudaFreePtr)(void *)
Definition: cupti_common.c:46
CUresult(* cuCtxSetCurrentPtr)(CUcontext)
Definition: cupti_common.c:24
int cuptic_ctxarr_get_ctx(cuptic_info_t info, int gpu_idx, CUcontext *ctx)
Definition: cupti_common.c:566
CUresult(* cuCtxGetCurrentPtr)(CUcontext *)
Definition: cupti_common.c:23
#define CUDA_CALL(call, handleerror)
Definition: cupti_common.h:58
#define CUDART_CALL(call, handleerror)
Definition: cupti_common.h:68
#define CUPTI_CALL(call, handleerror)
Definition: cupti_common.h:78
static int get_measured_values(cuptip_gpu_state_t *gpu_ctl)
CUptiResult(* cuptiProfilerPopRangePtr)(CUpti_Profiler_PopRange_Params *params)
static enum collection_method_e get_event_collection_method(const char *evt_name)
CUptiResult(* cuptiProfilerEndPassPtr)(CUpti_Profiler_EndPass_Params *params)
CUptiResult(* cuptiProfilerPushRangePtr)(CUpti_Profiler_PushRange_Params *params)
CUptiResult(* cuptiProfilerFlushCounterDataPtr)(CUpti_Profiler_FlushCounterData_Params *params)
@ RunningSum
@ RunningMax
@ RunningMin
CUptiResult(* cuptiProfilerCounterDataImageInitializePtr)(CUpti_Profiler_CounterDataImage_Initialize_Params *params)
CUptiResult(* cuptiProfilerCounterDataImageInitializeScratchBufferPtr)(CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params *params)
CUptiResult(* cuptiProfilerBeginPassPtr)(CUpti_Profiler_BeginPass_Params *params)
int cuptiu_event_table_get_item(cuptiu_event_table_t *evt_table, int evt_idx, cuptiu_event_t **record)
Definition: cupti_utils.c:47
#define PAPI_EMISC
Definition: f90papi.h:122
static long long values[NUM_EVENTS]
Definition: init_fini.c:10
if(file==NULL) goto out
int
Definition: sde_internal.h:89
long long int long long
Definition: sde_internal.h:85
char name[PAPI_2MAX_STR_LEN]
Definition: cupti_utils.h:13
unsigned int evt_pos
Definition: cupti_utils.h:15
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuptip_control_reset()

int cuptip_control_reset ( cuptip_control_t  state)

Definition at line 1707 of file cupti_profiler.c.

1708{
1709 COMPDBG("Entering.\n");
1710 state->read_count = 0;
1711 return PAPI_OK;
1712}
Here is the caller graph for this function:

◆ cuptip_control_start()

int cuptip_control_start ( cuptip_control_t  state)

Definition at line 1482 of file cupti_profiler.c.

1483{
1484 COMPDBG("Entering.\n");
1485 cuptip_gpu_state_t *gpu_ctl;
1486 CUcontext userCtx, ctx;
1487 CUDA_CALL( cuCtxGetCurrentPtr(&userCtx), goto fn_fail_misc );
1488 if (userCtx == NULL) {
1489 CUDART_CALL( cudaFreePtr(NULL), goto fn_fail_misc );
1490 CUDA_CALL( cuCtxGetCurrentPtr(&userCtx), goto fn_fail_misc );
1491 }
1492 int gpu_id;
1493 int papi_errno = PAPI_OK;
1494 for (gpu_id = 0; gpu_id < num_gpus; gpu_id++) {
1495 gpu_ctl = &(state->gpu_ctl[gpu_id]);
1496 if (gpu_ctl->event_names->count == 0) {
1497 continue;
1498 }
1499 LOGDBG("Device num %d: event_count %d, rmr count %d\n", gpu_id, gpu_ctl->event_names->count, gpu_ctl->rmr_count);
1500 papi_errno = cuptic_device_acquire(state->gpu_ctl[gpu_id].event_names);
1501 if (papi_errno != PAPI_OK) {
1502 ERRDBG("Profiling same gpu from multiple event sets not allowed.\n");
1503 return papi_errno;
1504 }
1505 papi_errno = cuptic_ctxarr_get_ctx(state->info, gpu_id, &ctx);
1506 CUDA_CALL( cuCtxSetCurrentPtr(ctx), goto fn_fail_misc );
1507 papi_errno = get_counter_availability(gpu_ctl);
1508 if (papi_errno != PAPI_OK) {
1509 ERRDBG("Error getting counter availability image.\n");
1510 return papi_errno;
1511 }
1512 /* CUPTI profiler host configuration */
1513 papi_errno = metric_get_config_image(gpu_ctl);
1514 papi_errno += metric_get_counter_data_prefix_image(gpu_ctl);
1515 papi_errno += create_counter_data_image(gpu_ctl);
1516 if (papi_errno != PAPI_OK) {
1517 ERRDBG("Failed to create CUPTI profiler state for gpu %d\n", gpu_id);
1518 goto fn_fail;
1519 }
1520 papi_errno = begin_profiling(gpu_ctl);
1521 if (papi_errno != PAPI_OK) {
1522 ERRDBG("Failed to start profiling for gpu %d\n", gpu_id);
1523 goto fn_fail;
1524 }
1525 }
1526 state->running = True;
1527fn_exit:
1528 CUDA_CALL( cuCtxSetCurrentPtr(userCtx), goto fn_fail_misc );
1529 return papi_errno;
1530fn_fail:
1531 papi_errno = PAPI_ECMP;
1532 goto fn_exit;
1533fn_fail_misc:
1534 papi_errno = PAPI_EMISC;
1535 goto fn_exit;
1536}
int cuptic_device_acquire(cuptiu_event_table_t *evt_table)
Definition: cupti_common.c:629
static int create_counter_data_image(cuptip_gpu_state_t *gpu_ctl)
static int begin_profiling(cuptip_gpu_state_t *gpu_ctl)
static int get_counter_availability(cuptip_gpu_state_t *gpu_ctl)
static int metric_get_config_image(cuptip_gpu_state_t *gpu_ctl)
@ True
static int metric_get_counter_data_prefix_image(cuptip_gpu_state_t *gpu_ctl)
#define PAPI_ECMP
Definition: f90papi.h:214
#define ERRDBG(format, args...)
Definition: lcuda_debug.h:30
#define LOGDBG(format, args...)
Definition: lcuda_debug.h:24
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuptip_control_stop()

int cuptip_control_stop ( cuptip_control_t  state)

Definition at line 1538 of file cupti_profiler.c.

1539{
1540 COMPDBG("Entering.\n");
1541 cuptip_gpu_state_t *gpu_ctl;
1542 CUcontext userCtx = NULL, ctx = NULL;
1543 CUDA_CALL( cuCtxGetCurrentPtr(&userCtx), goto fn_fail_misc );
1544 if (userCtx == NULL) {
1545 CUDART_CALL( cudaFreePtr(NULL), goto fn_fail_misc );
1546 CUDA_CALL( cuCtxGetCurrentPtr(&userCtx), goto fn_fail_misc );
1547 }
1548 int gpu_id;
1549 int papi_errno = PAPI_OK;
1550 if (state->running == False) {
1551 ERRDBG("Profiler is already stopped.\n");
1552 papi_errno = PAPI_EINVAL;
1553 goto fn_fail;
1554 }
1555 for (gpu_id=0; gpu_id<num_gpus; gpu_id++) {
1556 gpu_ctl = &(state->gpu_ctl[gpu_id]);
1557 if (gpu_ctl->event_names->count == 0) {
1558 continue;
1559 }
1560 papi_errno = cuptic_ctxarr_get_ctx(state->info, gpu_id, &ctx);
1561 CUDA_CALL( cuCtxSetCurrentPtr(ctx), goto fn_fail_misc );
1562 papi_errno = end_profiling(gpu_ctl);
1563 if (papi_errno != PAPI_OK) {
1564 ERRDBG("Failed to stop profiling on gpu %d\n", gpu_id);
1565 goto fn_fail;
1566 }
1567 papi_errno = cuptic_device_release(state->gpu_ctl[gpu_id].event_names);
1568 if (papi_errno != PAPI_OK) {
1569 goto fn_fail;
1570 }
1571 }
1572 state->running = False;
1573fn_exit:
1574 CUDA_CALL( cuCtxSetCurrentPtr(userCtx), goto fn_fail_misc );
1575 return papi_errno;
1576fn_fail:
1577 goto fn_exit;
1578fn_fail_misc:
1579 papi_errno = PAPI_EMISC;
1580 goto fn_exit;
1581}
int cuptic_device_release(cuptiu_event_table_t *evt_table)
Definition: cupti_common.c:644
static int end_profiling(cuptip_gpu_state_t *gpu_ctl)
@ False
#define PAPI_EINVAL
Definition: f90papi.h:115
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuptip_event_enum()

int cuptip_event_enum ( cuptiu_event_table_t all_evt_names)

Definition at line 1206 of file cupti_profiler.c.

1207{
1208 int gpu_id, i, found, listsubmetrics = 1, papi_errno = PAPI_OK;
1209 if (avail_events[0].nv_metrics != NULL) {
1210 /* Already eumerated for 1st device? Then exit... */
1211 goto fn_exit;
1212 }
1213 for (gpu_id = 0; gpu_id < num_gpus; gpu_id++) {
1214 LOGDBG("Getting metric names for gpu %d\n", gpu_id);
1215 found = find_same_chipname(gpu_id);
1216 if (found > -1) {
1217 avail_events[gpu_id].num_metrics = avail_events[found].num_metrics;
1218 avail_events[gpu_id].nv_metrics = avail_events[found].nv_metrics;
1219 continue;
1220 }
1221 /* If same chip_name not found, get all the details for this gpu */
1222
1223 NVPW_MetricsContext_GetMetricNames_Begin_Params getMetricNameBeginParams = {
1224 .structSize = NVPW_MetricsContext_GetMetricNames_Begin_Params_STRUCT_SIZE,
1225 .pPriv = NULL,
1226 .pMetricsContext = avail_events[gpu_id].pmetricsContextCreateParams->pMetricsContext,
1227 .hidePeakSubMetrics = !listsubmetrics,
1228 .hidePerCycleSubMetrics = !listsubmetrics,
1229 .hidePctOfPeakSubMetrics = !listsubmetrics,
1230 };
1231 NVPW_CALL( NVPW_MetricsContext_GetMetricNames_BeginPtr(&getMetricNameBeginParams), goto fn_fail );
1232
1233 avail_events[gpu_id].num_metrics = getMetricNameBeginParams.numMetrics;
1234
1235 papi_errno = cuptiu_event_table_create_init_capacity(avail_events[gpu_id].num_metrics, sizeof(cuptiu_event_t), &(avail_events[gpu_id].nv_metrics));
1236 if (papi_errno != PAPI_OK) {
1237 goto fn_exit;
1238 }
1239 for (i = 0; i < avail_events[gpu_id].num_metrics; i++) {
1240 papi_errno = cuptiu_event_table_insert_record(avail_events[gpu_id].nv_metrics,
1241 getMetricNameBeginParams.ppMetricNames[i],
1242 i, 0);
1243 if (papi_errno != PAPI_OK) {
1244 goto fn_exit;
1245 }
1246 }
1247
1248 NVPW_MetricsContext_GetMetricNames_End_Params getMetricNameEndParams = {
1249 .structSize = NVPW_MetricsContext_GetMetricNames_End_Params_STRUCT_SIZE,
1250 .pPriv = NULL,
1251 .pMetricsContext = avail_events[gpu_id].pmetricsContextCreateParams->pMetricsContext,
1252 };
1253 NVPW_CALL( NVPW_MetricsContext_GetMetricNames_EndPtr((NVPW_MetricsContext_GetMetricNames_End_Params *) &getMetricNameEndParams), goto fn_fail );
1254
1255 }
1257 cuptiu_event_t *find = NULL;
1258 cuptiu_event_t *evt_rec = NULL;
1259 int len;
1260 unsigned int curr = all_evt_names->count;
1261 for (gpu_id = 0; gpu_id < num_gpus; gpu_id++) {
1262 for (i = 0; i < avail_events[gpu_id].num_metrics; i++) {
1263 papi_errno = cuptiu_event_table_get_item(avail_events[gpu_id].nv_metrics, i, &evt_rec);
1264 if (papi_errno != PAPI_OK) {
1265 goto fn_exit;
1266 }
1267 len = snprintf(evt_name, PAPI_2MAX_STR_LEN, "%s:device=%d", evt_rec->name, gpu_id);
1268 if (len > PAPI_2MAX_STR_LEN) {
1269 ERRDBG("String formatting exceeded maximum length.\n");
1270 papi_errno = PAPI_ENOMEM;
1271 goto fn_exit;
1272 }
1273 if (cuptiu_event_table_find_name(all_evt_names, evt_name, &find) == PAPI_ENOEVNT) {
1274 papi_errno = cuptiu_event_table_insert_record(all_evt_names, evt_name, curr, 0);
1275 if (papi_errno != PAPI_OK) {
1276 goto fn_exit;
1277 }
1278 ++curr;
1279 }
1280 }
1281 }
1282fn_exit:
1283 return papi_errno;
1284fn_fail:
1285 papi_errno = PAPI_EMISC;
1286 goto fn_exit;
1287}
static list_metrics_t * avail_events
NVPA_Status(* NVPW_MetricsContext_GetMetricNames_EndPtr)(NVPW_MetricsContext_GetMetricNames_End_Params *params)
NVPA_Status(* NVPW_MetricsContext_GetMetricNames_BeginPtr)(NVPW_MetricsContext_GetMetricNames_Begin_Params *params)
#define NVPW_CALL(call, handleerror)
static int find_same_chipname(int gpu_id)
int cuptiu_event_table_create_init_capacity(int capacity, int sizeof_rec, cuptiu_event_table_t **pevt_table)
Definition: cupti_utils.c:16
int cuptiu_event_table_insert_record(cuptiu_event_table_t *evt_table, const char *evt_name, unsigned int evt_code, int evt_pos)
Definition: cupti_utils.c:88
int cuptiu_event_table_find_name(cuptiu_event_table_t *evt_table, const char *evt_name, cuptiu_event_t **found_rec)
Definition: cupti_utils.c:147
char * evt_name(evstock *stock, int index)
Definition: eventstock.c:193
#define PAPI_ENOEVNT
Definition: f90papi.h:139
#define PAPI_2MAX_STR_LEN
Definition: f90papi.h:180
unsigned int count
Definition: cupti_utils.h:22
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuptip_event_name_to_descr()

int cuptip_event_name_to_descr ( const char *  evt_name,
char *  description 
)

Definition at line 1289 of file cupti_profiler.c.

1290{
1291 int papi_errno, numdep, gpu_id, passes;
1292 char nv_name[PAPI_MAX_STR_LEN];
1293 cuptiu_event_t *evt_rec = NULL;
1294 NVPA_RawMetricRequest *temp;
1295 papi_errno = event_name_tokenize(evt_name, nv_name, &gpu_id);
1296 if (papi_errno != PAPI_OK) {
1297 goto fn_exit;
1298 }
1299 papi_errno = cuptiu_event_table_find_name(avail_events[gpu_id].nv_metrics, nv_name, &evt_rec);
1300 if (papi_errno != PAPI_OK) {
1301 ERRDBG("Event name not found in avail_events array.\n");
1302 goto fn_exit;
1303 }
1304 char *desc = evt_rec->desc;
1305 if (desc[0] == '\0') {
1306 papi_errno = retrieve_metric_details(avail_events[gpu_id].pmetricsContextCreateParams->pMetricsContext,
1307 nv_name, desc, &numdep, &temp);
1308 if (papi_errno == PAPI_OK) {
1309 NVPW_CUDA_RawMetricsConfig_Create_Params nvpw_metricsConfigCreateParams = {
1310 .structSize = NVPW_CUDA_RawMetricsConfig_Create_Params_STRUCT_SIZE,
1311 .pPriv = NULL,
1312 .activityKind = NVPA_ACTIVITY_KIND_PROFILER,
1313 .pChipName = avail_events[gpu_id].chip_name,
1314 };
1315 NVPW_CALL( NVPW_CUDA_RawMetricsConfig_CreatePtr(&nvpw_metricsConfigCreateParams), goto fn_fail );
1316
1317 papi_errno = check_num_passes(nvpw_metricsConfigCreateParams.pRawMetricsConfig,
1318 numdep, temp, &passes);
1319
1320 NVPW_RawMetricsConfig_Destroy_Params rawMetricsConfigDestroyParams = {
1321 .structSize = NVPW_RawMetricsConfig_Destroy_Params_STRUCT_SIZE,
1322 .pPriv = NULL,
1323 .pRawMetricsConfig = nvpw_metricsConfigCreateParams.pRawMetricsConfig,
1324 };
1325 NVPW_CALL( NVPW_RawMetricsConfig_DestroyPtr((NVPW_RawMetricsConfig_Destroy_Params *) &rawMetricsConfigDestroyParams), goto fn_fail );
1326
1327 snprintf(desc + strlen(desc), PAPI_2MAX_STR_LEN - strlen(desc), " Numpass=%d", passes);
1328 if (passes > 1) {
1329 snprintf(desc + strlen(desc), PAPI_2MAX_STR_LEN - strlen(desc), " (multi-pass not supported)");
1330 }
1331
1332 const char *token_sw_evt = "sass";
1333 if (strstr(nv_name, token_sw_evt) != NULL) {
1334 snprintf(desc + strlen(desc), PAPI_2MAX_STR_LEN - strlen(desc), " (SW event)");
1335 }
1336 }
1337 papi_free(temp);
1338 }
1339 strcpy(description, desc);
1340fn_exit:
1341 return papi_errno;
1342fn_fail:
1343 papi_errno = PAPI_EMISC;
1344 goto fn_exit;
1345}
NVPA_Status(* NVPW_RawMetricsConfig_DestroyPtr)(NVPW_RawMetricsConfig_Destroy_Params *params)
NVPA_Status(* NVPW_CUDA_RawMetricsConfig_CreatePtr)(NVPW_CUDA_RawMetricsConfig_Create_Params *)
static int check_num_passes(struct NVPA_RawMetricsConfig *pRawMetricsConfig, int rmr_count, NVPA_RawMetricRequest *rmr, int *num_pass)
static int event_name_tokenize(const char *name, char *nv_name, int *gpuid)
static int retrieve_metric_details(NVPA_MetricsContext *pMetricsContext, const char *nv_name, char *description, int *numDep, NVPA_RawMetricRequest **pRMR)
#define PAPI_MAX_STR_LEN
Definition: f90papi.h:77
char desc[PAPI_2MAX_STR_LEN]
Definition: cupti_utils.h:17
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuptip_init()

int cuptip_init ( void  )

Definition at line 1383 of file cupti_profiler.c.

1384{
1385 COMPDBG("Entering.\n");
1386 int papi_errno = PAPI_OK;
1387
1388 papi_errno = load_cupti_perf_sym();
1389 papi_errno += load_nvpw_sym();
1390 if (papi_errno != PAPI_OK) {
1391 cuptic_disabled_reason_set("Unable to load CUDA library functions.");
1392 goto fn_fail;
1393 }
1394
1395 papi_errno = cuptic_device_get_count(&num_gpus);
1396 if (papi_errno != PAPI_OK) {
1397 goto fn_fail;
1398 }
1399
1400 if (num_gpus <= 0) {
1401 cuptic_disabled_reason_set("No GPUs found on system.");
1402 goto fn_fail;
1403 }
1404
1405 papi_errno = initialize_cupti_profiler_api();
1406 papi_errno += initialize_perfworks_api();
1407 if (papi_errno != PAPI_OK) {
1408 cuptic_disabled_reason_set("Unable to initialize CUPTI profiler libraries.");
1409 goto fn_fail;
1410 }
1411 papi_errno = init_all_metrics();
1412 if (papi_errno != PAPI_OK) {
1413 goto fn_fail;
1414 }
1415 papi_errno = cuInitPtr(0);
1416 if (papi_errno != CUDA_SUCCESS) {
1417 cuptic_disabled_reason_set("Failed to initialize CUDA driver API.");
1418 goto fn_fail;
1419 }
1420 return PAPI_OK;
1421fn_fail:
1422 return PAPI_EMISC;
1423}
void cuptic_disabled_reason_set(const char *msg)
Definition: cupti_common.c:385
int cuptic_device_get_count(int *num_gpus)
Definition: cupti_common.c:303
CUresult(* cuInitPtr)(unsigned int)
Definition: cupti_common.c:33
static int load_nvpw_sym(void)
static int init_all_metrics(void)
static int initialize_perfworks_api(void)
static int initialize_cupti_profiler_api(void)
static int load_cupti_perf_sym(void)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuptip_shutdown()

int cuptip_shutdown ( void  )

Definition at line 1714 of file cupti_profiler.c.

1715{
1716 COMPDBG("Entering.\n");
1721 return PAPI_OK;
1722}
static void free_all_enumerated_metrics(void)
static int unload_nvpw_sym(void)
static int finalize_cupti_profiler_api(void)
static int unload_cupti_perf_sym(void)
Here is the call graph for this function:
Here is the caller graph for this function: