PAPI 7.1.0.0
Loading...
Searching...
No Matches
cupti_common.c File Reference
Include dependency graph for cupti_common.c:

Go to the source code of this file.

Data Structures

struct  cuptic_info_t
 

Typedefs

typedef int64_t gpu_occupancy_t
 

Enumerations

enum  gpu_collection_e {
  GPU_COLLECTION_UNKNOWN , GPU_COLLECTION_ALL_PERF , GPU_COLLECTION_MIXED , GPU_COLLECTION_ALL_EVENTS ,
  GPU_COLLECTION_ALL_CC70
}
 

Functions

static int load_cuda_sym (void)
 
static int unload_cuda_sym (void)
 
void * cuptic_load_dynamic_syms (const char *parent_path, const char *dlname, const char *search_subpaths[])
 
static int load_cudart_sym (void)
 
static int unload_cudart_sym (void)
 
static int load_cupti_common_sym (void)
 
static int unload_cupti_common_sym (void)
 
static int util_load_cuda_sym (void)
 
static void unload_linked_cudart_path (void)
 
int cuptic_shutdown (void)
 
static int util_dylib_cu_runtime_version (void)
 
static int util_dylib_cupti_version (void)
 
int cuptic_device_get_count (int *num_gpus)
 
static int get_gpu_compute_capability (int dev_num, int *cc)
 
static int util_gpu_collection_kind (gpu_collection_e *coll_kind)
 
void cuptic_disabled_reason_set (const char *msg)
 
void cuptic_disabled_reason_get (const char **pmsg)
 
static int dl_iterate_phdr_cb (struct dl_phdr_info *info, __attribute__((unused)) size_t size, __attribute__((unused)) void *data)
 
static int get_user_cudart_path (void)
 
int cuptic_init (void)
 
int cuptic_is_runtime_perfworks_api (void)
 
int cuptic_is_runtime_events_api (void)
 
int cuptic_ctxarr_create (cuptic_info_t *pinfo)
 
int cuptic_ctxarr_update_current (cuptic_info_t info)
 
int cuptic_ctxarr_get_ctx (cuptic_info_t info, int gpu_idx, CUcontext *ctx)
 
int cuptic_ctxarr_destroy (cuptic_info_t *pinfo)
 
static int event_name_get_gpuid (const char *name, int *gpuid)
 
static int _devmask_events_get (cuptiu_event_table_t *evt_table, gpu_occupancy_t *bitmask)
 
int cuptic_device_acquire (cuptiu_event_table_t *evt_table)
 
int cuptic_device_release (cuptiu_event_table_t *evt_table)
 

Variables

static void * dl_drv
 
static void * dl_rt
 
const char * linked_cudart_path
 
void * dl_cupti
 
unsigned int _cuda_lock
 
CUresult(* cuCtxGetCurrentPtr )(CUcontext *)
 
CUresult(* cuCtxSetCurrentPtr )(CUcontext)
 
CUresult(* cuCtxDestroyPtr )(CUcontext)
 
CUresult(* cuCtxCreatePtr )(CUcontext *pctx, unsigned int flags, CUdevice dev)
 
CUresult(* cuCtxGetDevicePtr )(CUdevice *)
 
CUresult(* cuDeviceGetPtr )(CUdevice *, int)
 
CUresult(* cuDeviceGetCountPtr )(int *)
 
CUresult(* cuDeviceGetNamePtr )(char *, int, CUdevice)
 
CUresult(* cuDevicePrimaryCtxRetainPtr )(CUcontext *pctx, CUdevice)
 
CUresult(* cuDevicePrimaryCtxReleasePtr )(CUdevice)
 
CUresult(* cuInitPtr )(unsigned int)
 
CUresult(* cuGetErrorStringPtr )(CUresult error, const char **pStr)
 
CUresult(* cuCtxPopCurrentPtr )(CUcontext *pctx)
 
CUresult(* cuCtxPushCurrentPtr )(CUcontext pctx)
 
CUresult(* cuCtxSynchronizePtr )()
 
CUresult(* cuDeviceGetAttributePtr )(int *, CUdevice_attribute, CUdevice)
 
cudaError_t(* cudaGetDeviceCountPtr )(int *)
 
cudaError_t(* cudaGetDevicePtr )(int *)
 
const char *(* cudaGetErrorStringPtr )(cudaError_t)
 
cudaError_t(* cudaSetDevicePtr )(int)
 
cudaError_t(* cudaGetDevicePropertiesPtr )(struct cudaDeviceProp *prop, int device)
 
cudaError_t(* cudaDeviceGetAttributePtr )(int *value, enum cudaDeviceAttr attr, int device)
 
cudaError_t(* cudaFreePtr )(void *)
 
cudaError_t(* cudaDriverGetVersionPtr )(int *)
 
cudaError_t(* cudaRuntimeGetVersionPtr )(int *)
 
CUptiResult(* cuptiGetVersionPtr )(uint32_t *)
 
const char * cuptic_disabled_reason_g
 
static gpu_occupancy_t global_gpu_bitmask
 

Detailed Description

Typedef Documentation

◆ gpu_occupancy_t

typedef int64_t gpu_occupancy_t

Definition at line 580 of file cupti_common.c.

Enumeration Type Documentation

◆ gpu_collection_e

Enumerator
GPU_COLLECTION_UNKNOWN 
GPU_COLLECTION_ALL_PERF 
GPU_COLLECTION_MIXED 
GPU_COLLECTION_ALL_EVENTS 
GPU_COLLECTION_ALL_CC70 

Definition at line 331 of file cupti_common.c.

Function Documentation

◆ _devmask_events_get()

static int _devmask_events_get ( cuptiu_event_table_t evt_table,
gpu_occupancy_t bitmask 
)
static

Definition at line 607 of file cupti_common.c.

608{
609 int papi_errno = PAPI_OK, gpu_id;
610 long i;
611 gpu_occupancy_t acq_mask = 0;
612 cuptiu_event_t *evt_rec;
613 for (i = 0; i < evt_table->count; i++) {
614 papi_errno = cuptiu_event_table_get_item(evt_table, i, (cuptiu_event_t **) &evt_rec);
615 if (papi_errno != PAPI_OK) {
616 goto fn_exit;
617 }
618 papi_errno = event_name_get_gpuid(evt_rec->name, &gpu_id);
619 if (papi_errno != PAPI_OK) {
620 goto fn_exit;
621 }
622 acq_mask |= (1 << gpu_id);
623 }
624 *bitmask = acq_mask;
625fn_exit:
626 return papi_errno;
627}
int i
int64_t gpu_occupancy_t
Definition: cupti_common.c:580
static int event_name_get_gpuid(const char *name, int *gpuid)
Definition: cupti_common.c:583
int cuptiu_event_table_get_item(cuptiu_event_table_t *evt_table, int evt_idx, cuptiu_event_t **record)
Definition: cupti_utils.c:47
#define PAPI_OK
Definition: f90papi.h:73
char name[PAPI_2MAX_STR_LEN]
Definition: cupti_utils.h:13
unsigned int count
Definition: cupti_utils.h:22
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuptic_ctxarr_create()

int cuptic_ctxarr_create ( cuptic_info_t *  pinfo)

Definition at line 520 of file cupti_common.c.

521{
522 COMPDBG("Entering.\n");
523 int total_gpus;
524 int papi_errno = cuptic_device_get_count(&total_gpus);
525 if (papi_errno != PAPI_OK) {
526 return PAPI_EMISC;
527 }
528 cuptic_info_t cuCtx = (cuptic_info_t) papi_calloc (total_gpus, sizeof(*pinfo));
529 if (cuCtx == NULL) {
530 return PAPI_ENOMEM;
531 }
532 *pinfo = cuCtx;
533 return PAPI_OK;
534}
int cuptic_device_get_count(int *num_gpus)
Definition: cupti_common.c:303
#define PAPI_EMISC
Definition: f90papi.h:122
#define PAPI_ENOMEM
Definition: f90papi.h:16
#define COMPDBG(format, args...)
Definition: lcuda_debug.h:21
#define papi_calloc(a, b)
Definition: papi_memory.h:37
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuptic_ctxarr_destroy()

int cuptic_ctxarr_destroy ( cuptic_info_t *  pinfo)

Definition at line 572 of file cupti_common.c.

573{
574 papi_free(*pinfo);
575 *pinfo = NULL;
576 return PAPI_OK;
577}
#define papi_free(a)
Definition: papi_memory.h:35
Here is the caller graph for this function:

◆ cuptic_ctxarr_get_ctx()

int cuptic_ctxarr_get_ctx ( cuptic_info_t  info,
int  gpu_idx,
CUcontext *  ctx 
)

Definition at line 566 of file cupti_common.c.

567{
568 *ctx = info[gpu_idx].ctx;
569 return PAPI_OK;
570}
Here is the caller graph for this function:

◆ cuptic_ctxarr_update_current()

int cuptic_ctxarr_update_current ( cuptic_info_t  info)

Definition at line 536 of file cupti_common.c.

537{
538 int papi_errno, gpu_id;
539 CUcontext tempCtx;
540 papi_errno = cudaGetDevicePtr(&gpu_id);
541 if (papi_errno != cudaSuccess) {
542 return PAPI_EMISC;
543 }
544 papi_errno = cuCtxGetCurrentPtr(&tempCtx);
545 if (papi_errno != CUDA_SUCCESS) {
546 return PAPI_EMISC;
547 }
548 if (info[gpu_id].ctx == NULL) {
549 if (tempCtx != NULL) {
550 LOGDBG("Registering device = %d with ctx = %p.\n", gpu_id, tempCtx);
551 CUDA_CALL(cuCtxGetCurrentPtr(&info[gpu_id].ctx), return PAPI_EMISC);
552 }
553 else {
554 CUDART_CALL(cudaFreePtr(NULL), return PAPI_EMISC);
555 CUDA_CALL(cuCtxGetCurrentPtr(&info[gpu_id].ctx), return PAPI_EMISC);
556 LOGDBG("Using primary device context %p for device %d.\n", info[gpu_id].ctx, gpu_id);
557 }
558 }
559 /* If context has changed keep the first seen one but with warning */
560 else if (info[gpu_id].ctx != tempCtx) {
561 ERRDBG("Warning: cuda context for gpu %d has changed from %p to %p\n", gpu_id, info[gpu_id].ctx, tempCtx);
562 }
563 return PAPI_OK;
564}
cudaError_t(* cudaFreePtr)(void *)
Definition: cupti_common.c:46
CUresult(* cuCtxGetCurrentPtr)(CUcontext *)
Definition: cupti_common.c:23
cudaError_t(* cudaGetDevicePtr)(int *)
Definition: cupti_common.c:41
#define CUDA_CALL(call, handleerror)
Definition: cupti_common.h:58
#define CUDART_CALL(call, handleerror)
Definition: cupti_common.h:68
#define ERRDBG(format, args...)
Definition: lcuda_debug.h:30
#define LOGDBG(format, args...)
Definition: lcuda_debug.h:24
Here is the caller graph for this function:

◆ cuptic_device_acquire()

int cuptic_device_acquire ( cuptiu_event_table_t evt_table)

Definition at line 629 of file cupti_common.c.

630{
631 gpu_occupancy_t bitmask;
632 int papi_errno = _devmask_events_get(evt_table, &bitmask);
633 if (papi_errno != PAPI_OK)
634 return papi_errno;
635 if (bitmask & global_gpu_bitmask) {
636 return PAPI_ECNFLCT;
637 }
639 global_gpu_bitmask |= bitmask;
641 return PAPI_OK;
642}
static int _devmask_events_get(cuptiu_event_table_t *evt_table, gpu_occupancy_t *bitmask)
Definition: cupti_common.c:607
unsigned int _cuda_lock
Definition: cupti_common.c:21
static gpu_occupancy_t global_gpu_bitmask
Definition: cupti_common.c:581
#define PAPI_ECNFLCT
Definition: f90papi.h:234
inline_static int _papi_hwi_lock(int lck)
Definition: threads.h:69
inline_static int _papi_hwi_unlock(int lck)
Definition: threads.h:83
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuptic_device_get_count()

int cuptic_device_get_count ( int num_gpus)

Definition at line 303 of file cupti_common.c.

304{
305 cudaError_t cuda_errno = cudaGetDeviceCountPtr(num_gpus);
306 if (cuda_errno != cudaSuccess) {
308 return PAPI_EMISC;
309 }
310 return PAPI_OK;
311}
void cuptic_disabled_reason_set(const char *msg)
Definition: cupti_common.c:385
const char *(* cudaGetErrorStringPtr)(cudaError_t)
Definition: cupti_common.c:42
cudaError_t(* cudaGetDeviceCountPtr)(int *)
Definition: cupti_common.c:40
static int num_gpus
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuptic_device_release()

int cuptic_device_release ( cuptiu_event_table_t evt_table)

Definition at line 644 of file cupti_common.c.

645{
646 gpu_occupancy_t bitmask;
647 int papi_errno = _devmask_events_get(evt_table, &bitmask);
648 if (papi_errno != PAPI_OK) {
649 return papi_errno;
650 }
651 if ((bitmask & global_gpu_bitmask) != bitmask) {
652 return PAPI_EMISC;
653 }
655 global_gpu_bitmask ^= bitmask;
657 return PAPI_OK;
658}
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuptic_disabled_reason_get()

void cuptic_disabled_reason_get ( const char **  pmsg)

Definition at line 390 of file cupti_common.c.

391{
393}
const char * cuptic_disabled_reason_g
Definition: cupti_common.c:383
Here is the caller graph for this function:

◆ cuptic_disabled_reason_set()

void cuptic_disabled_reason_set ( const char *  msg)

Definition at line 385 of file cupti_common.c.

386{
388}
Here is the caller graph for this function:

◆ cuptic_init()

int cuptic_init ( void  )

Definition at line 417 of file cupti_common.c.

418{
419 int papi_errno = get_user_cudart_path();
420 if (papi_errno == PAPI_OK) {
421 LOGDBG("Linked cudart root: %s\n", linked_cudart_path);
422 }
423 else {
424 LOGDBG("Target application not linked with cuda runtime libraries.\n");
425 }
426 papi_errno = util_load_cuda_sym();
427 if (papi_errno != PAPI_OK) {
428 cuptic_disabled_reason_set("Unable to load CUDA library functions.");
429 goto fn_exit;
430 }
431
432 gpu_collection_e kind;
433 papi_errno = util_gpu_collection_kind(&kind);
434 if (papi_errno != PAPI_OK) {
435 goto fn_exit;
436 }
437
438 if (kind == GPU_COLLECTION_MIXED) {
439 cuptic_disabled_reason_set("No support for systems with mixed compute capabilities, such as CC < 7.0 and CC > 7.0 GPUS.");
440 papi_errno = PAPI_ECMP;
441 goto fn_exit;
442 }
443fn_exit:
444 return papi_errno;
445}
static int util_gpu_collection_kind(gpu_collection_e *coll_kind)
Definition: cupti_common.c:333
static int util_load_cuda_sym(void)
Definition: cupti_common.c:259
static int get_user_cudart_path(void)
Definition: cupti_common.c:408
const char * linked_cudart_path
Definition: cupti_common.c:18
#define PAPI_ECMP
Definition: f90papi.h:214
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuptic_is_runtime_events_api()

int cuptic_is_runtime_events_api ( void  )

Definition at line 488 of file cupti_common.c.

489{
490 static int is_events_api = -1;
491 if (is_events_api != -1) {
492 goto fn_exit;
493 }
494
495 gpu_collection_e gpus_kind;
496 int papi_errno = util_gpu_collection_kind(&gpus_kind);
497 if (papi_errno != PAPI_OK) {
498 goto fn_exit;
499 }
500
501 /*
502 * See cupti_config.h: When NVIDIA removes the events API add a check in the following condition
503 * to check the `util_dylib_cupti_version()` is also <= CUPTI_EVENTS_API_MAX_SUPPORTED_VERSION.
504 */
505 if ((gpus_kind == GPU_COLLECTION_ALL_EVENTS || gpus_kind == GPU_COLLECTION_ALL_CC70)) {
506 is_events_api = 1;
507 goto fn_exit;
508 } else {
509 is_events_api = 0;
510 goto fn_exit;
511 }
512fn_exit:
513 return is_events_api;
514}
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuptic_is_runtime_perfworks_api()

int cuptic_is_runtime_perfworks_api ( void  )

Definition at line 447 of file cupti_common.c.

448{
449 static int is_perfworks_api = -1;
450 if (is_perfworks_api != -1) {
451 goto fn_exit;
452 }
453 char *papi_cuda_110_cc70_perfworks_api = getenv("PAPI_CUDA_110_CC_70_PERFWORKS_API");
454
455 gpu_collection_e gpus_kind;
456 int papi_errno = util_gpu_collection_kind(&gpus_kind);
457 if (papi_errno != PAPI_OK) {
458 goto fn_exit;
459 }
460
461 unsigned int cuptiVersion = util_dylib_cupti_version();
462
463 if (gpus_kind == GPU_COLLECTION_ALL_CC70 &&
465 {
466 if (papi_cuda_110_cc70_perfworks_api != NULL) {
467 is_perfworks_api = 1;
468 goto fn_exit;
469 }
470 else {
471 is_perfworks_api = 0;
472 goto fn_exit;
473 }
474 }
475
476 if ((gpus_kind == GPU_COLLECTION_ALL_PERF || gpus_kind == GPU_COLLECTION_ALL_CC70) && cuptiVersion >= CUPTI_PROFILER_API_MIN_SUPPORTED_VERSION) {
477 is_perfworks_api = 1;
478 goto fn_exit;
479 } else {
480 is_perfworks_api = 0;
481 goto fn_exit;
482 }
483
484fn_exit:
485 return is_perfworks_api;
486}
static int util_dylib_cupti_version(void)
Definition: cupti_common.c:296
static int util_dylib_cu_runtime_version(void)
Definition: cupti_common.c:289
#define CUPTI_PROFILER_API_MIN_SUPPORTED_VERSION
Definition: cupti_config.h:12
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuptic_load_dynamic_syms()

void * cuptic_load_dynamic_syms ( const char *  parent_path,
const char *  dlname,
const char *  search_subpaths[] 
)

Definition at line 110 of file cupti_common.c.

111{
112 void *dl = NULL;
113 char lookup_path[PATH_MAX];
114 char *found_files[CUPTIU_MAX_FILES];
115 int i, count;
116 for (i = 0; search_subpaths[i] != NULL; i++) {
117 sprintf(lookup_path, search_subpaths[i], parent_path, dlname);
118 dl = dlopen(lookup_path, RTLD_NOW | RTLD_GLOBAL);
119 if (dl) {
120 return dl;
121 }
122 }
123 count = cuptiu_files_search_in_path(dlname, parent_path, found_files);
124 for (i = 0; i < count; i++) {
125 dl = dlopen(found_files[i], RTLD_NOW | RTLD_GLOBAL);
126 if (dl) {
127 break;
128 }
129 }
130 for (i = 0; i < count; i++) {
131 papi_free(found_files[i]);
132 }
133 return dl;
134}
static long count
int cuptiu_files_search_in_path(const char *file_name, const char *search_path, char **file_paths)
Definition: cupti_utils.c:176
#define CUPTIU_MAX_FILES
Definition: cupti_utils.h:38
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuptic_shutdown()

int cuptic_shutdown ( void  )

Definition at line 280 of file cupti_common.c.

281{
286 return PAPI_OK;
287}
static int unload_cupti_common_sym(void)
Definition: cupti_common.c:249
static int unload_cudart_sym(void)
Definition: cupti_common.c:187
static int unload_cuda_sym(void)
Definition: cupti_common.c:85
static void unload_linked_cudart_path(void)
Definition: cupti_common.c:272
Here is the call graph for this function:
Here is the caller graph for this function:

◆ dl_iterate_phdr_cb()

static int dl_iterate_phdr_cb ( struct dl_phdr_info *  info,
__attribute__((unused)) size_t  size,
__attribute__((unused)) void *  data 
)
static

Definition at line 395 of file cupti_common.c.

396{
397 const char *library_name = "libcudart.so";
398 char *library_path = strdup(info->dlpi_name);
399
400 if (library_path != NULL && strstr(library_path, library_name) != NULL) {
401 linked_cudart_path = strdup(dirname(dirname((char *) library_path)));
402 }
403
404 free(library_path);
405 return PAPI_OK;
406}
Here is the caller graph for this function:

◆ event_name_get_gpuid()

static int event_name_get_gpuid ( const char *  name,
int gpuid 
)
static

Definition at line 583 of file cupti_common.c.

584{
585 int papi_errno = PAPI_OK;
586 char *token;
587 char *copy = strdup(name);
588
589 token = strtok(copy, "=");
590 if (token == NULL) {
591 goto fn_fail;
592 }
593 token = strtok(NULL, "\0");
594 if (token == NULL) {
595 goto fn_fail;
596 }
597 *gpuid = strtol(token, NULL, 10);
598
599fn_exit:
600 papi_free(copy);
601 return papi_errno;
602fn_fail:
603 papi_errno = PAPI_EINVAL;
604 goto fn_exit;
605}
#define PAPI_EINVAL
Definition: f90papi.h:115
const char * name
Definition: rocs.c:225
Here is the caller graph for this function:

◆ get_gpu_compute_capability()

static int get_gpu_compute_capability ( int  dev_num,
int cc 
)
static

Definition at line 313 of file cupti_common.c.

314{
315 int cc_major, cc_minor;
316 cudaError_t cuda_errno;
317 cuda_errno = cudaDeviceGetAttributePtr(&cc_major, cudaDevAttrComputeCapabilityMajor, dev_num);
318 if (cuda_errno != cudaSuccess) {
320 return PAPI_EMISC;
321 }
322 cuda_errno = cudaDeviceGetAttributePtr(&cc_minor, cudaDevAttrComputeCapabilityMinor, dev_num);
323 if (cuda_errno != cudaSuccess) {
325 return PAPI_EMISC;
326 }
327 *cc = cc_major * 10 + cc_minor;
328 return PAPI_OK;
329}
cudaError_t(* cudaDeviceGetAttributePtr)(int *value, enum cudaDeviceAttr attr, int device)
Definition: cupti_common.c:45
Here is the call graph for this function:
Here is the caller graph for this function:

◆ get_user_cudart_path()

static int get_user_cudart_path ( void  )
static

Definition at line 408 of file cupti_common.c.

409{
410 dl_iterate_phdr(dl_iterate_phdr_cb, NULL);
411 if (NULL == linked_cudart_path) {
412 return PAPI_EMISC;
413 }
414 return PAPI_OK;
415}
static int dl_iterate_phdr_cb(struct dl_phdr_info *info, __attribute__((unused)) size_t size, __attribute__((unused)) void *data)
Definition: cupti_common.c:395
Here is the call graph for this function:
Here is the caller graph for this function:

◆ load_cuda_sym()

static int load_cuda_sym ( void  )
static

Definition at line 52 of file cupti_common.c.

53{
54 dl_drv = dlopen("libcuda.so", RTLD_NOW | RTLD_GLOBAL);
55 if (!dl_drv) {
56 ERRDBG("Loading installed libcuda.so failed. Check that cuda drivers are installed.\n");
57 goto fn_fail;
58 }
59
60 cuCtxSetCurrentPtr = DLSYM_AND_CHECK(dl_drv, "cuCtxSetCurrent");
61 cuCtxGetCurrentPtr = DLSYM_AND_CHECK(dl_drv, "cuCtxGetCurrent");
62 cuCtxDestroyPtr = DLSYM_AND_CHECK(dl_drv, "cuCtxDestroy");
63 cuCtxCreatePtr = DLSYM_AND_CHECK(dl_drv, "cuCtxCreate");
64 cuCtxGetDevicePtr = DLSYM_AND_CHECK(dl_drv, "cuCtxGetDevice");
65 cuDeviceGetPtr = DLSYM_AND_CHECK(dl_drv, "cuDeviceGet");
66 cuDeviceGetCountPtr = DLSYM_AND_CHECK(dl_drv, "cuDeviceGetCount");
67 cuDeviceGetNamePtr = DLSYM_AND_CHECK(dl_drv, "cuDeviceGetName");
68 cuDevicePrimaryCtxRetainPtr = DLSYM_AND_CHECK(dl_drv, "cuDevicePrimaryCtxRetain");
69 cuDevicePrimaryCtxReleasePtr = DLSYM_AND_CHECK(dl_drv, "cuDevicePrimaryCtxRelease");
70 cuInitPtr = DLSYM_AND_CHECK(dl_drv, "cuInit");
71 cuGetErrorStringPtr = DLSYM_AND_CHECK(dl_drv, "cuGetErrorString");
72 cuCtxPopCurrentPtr = DLSYM_AND_CHECK(dl_drv, "cuCtxPopCurrent");
73 cuCtxPushCurrentPtr = DLSYM_AND_CHECK(dl_drv, "cuCtxPushCurrent");
74 cuCtxSynchronizePtr = DLSYM_AND_CHECK(dl_drv, "cuCtxSynchronize");
75 cuDeviceGetAttributePtr = DLSYM_AND_CHECK(dl_drv, "cuDeviceGetAttribute");
76
77 Dl_info info;
78 dladdr(cuCtxSetCurrentPtr, &info);
79 LOGDBG("CUDA driver library loaded from %s\n", info.dli_fname);
80 return PAPI_OK;
81fn_fail:
82 return PAPI_EMISC;
83}
CUresult(* cuCtxGetDevicePtr)(CUdevice *)
Definition: cupti_common.c:27
CUresult(* cuGetErrorStringPtr)(CUresult error, const char **pStr)
Definition: cupti_common.c:34
CUresult(* cuDeviceGetCountPtr)(int *)
Definition: cupti_common.c:29
CUresult(* cuCtxSetCurrentPtr)(CUcontext)
Definition: cupti_common.c:24
CUresult(* cuDeviceGetAttributePtr)(int *, CUdevice_attribute, CUdevice)
Definition: cupti_common.c:38
CUresult(* cuDevicePrimaryCtxRetainPtr)(CUcontext *pctx, CUdevice)
Definition: cupti_common.c:31
static void * dl_drv
Definition: cupti_common.c:16
CUresult(* cuDeviceGetPtr)(CUdevice *, int)
Definition: cupti_common.c:28
CUresult(* cuCtxPopCurrentPtr)(CUcontext *pctx)
Definition: cupti_common.c:35
CUresult(* cuCtxCreatePtr)(CUcontext *pctx, unsigned int flags, CUdevice dev)
Definition: cupti_common.c:26
CUresult(* cuDeviceGetNamePtr)(char *, int, CUdevice)
Definition: cupti_common.c:30
CUresult(* cuCtxDestroyPtr)(CUcontext)
Definition: cupti_common.c:25
CUresult(* cuInitPtr)(unsigned int)
Definition: cupti_common.c:33
CUresult(* cuCtxSynchronizePtr)()
Definition: cupti_common.c:37
CUresult(* cuCtxPushCurrentPtr)(CUcontext pctx)
Definition: cupti_common.c:36
CUresult(* cuDevicePrimaryCtxReleasePtr)(CUdevice)
Definition: cupti_common.c:32
#define DLSYM_AND_CHECK(dllib, name)
Definition: cupti_common.h:52
Here is the caller graph for this function:

◆ load_cudart_sym()

static int load_cudart_sym ( void  )
static

Definition at line 136 of file cupti_common.c.

137{
138 char dlname[] = "libcudart.so";
139 char lookup_path[PATH_MAX];
140
141 char *papi_cuda_runtime = getenv("PAPI_CUDA_RUNTIME");
142 if (papi_cuda_runtime) {
143 sprintf(lookup_path, "%s/%s", papi_cuda_runtime, dlname);
144 dl_rt = dlopen(lookup_path, RTLD_NOW | RTLD_GLOBAL);
145 }
146
147 const char *standard_paths[] = {
148 "%s/lib64/%s",
149 NULL,
150 };
151
152 if (linked_cudart_path && !dl_rt) {
153 dl_rt = cuptic_load_dynamic_syms(linked_cudart_path, dlname, standard_paths);
154 }
155
156 char *papi_cuda_root = getenv("PAPI_CUDA_ROOT");
157 if (papi_cuda_root && !dl_rt) {
158 dl_rt = cuptic_load_dynamic_syms(papi_cuda_root, dlname, standard_paths);
159 }
160
161 if (!dl_rt) {
162 dl_rt = dlopen(dlname, RTLD_NOW | RTLD_GLOBAL);
163 if (!dl_rt) {
164 ERRDBG("Loading libcudart.so failed. Try setting PAPI_CUDA_ROOT\n");
165 goto fn_fail;
166 }
167 }
168
169 cudaGetDevicePtr = DLSYM_AND_CHECK(dl_rt, "cudaGetDevice");
170 cudaGetDeviceCountPtr = DLSYM_AND_CHECK(dl_rt, "cudaGetDeviceCount");
171 cudaGetDevicePropertiesPtr = DLSYM_AND_CHECK(dl_rt, "cudaGetDeviceProperties");
172 cudaGetErrorStringPtr = DLSYM_AND_CHECK(dl_rt, "cudaGetErrorString");
173 cudaDeviceGetAttributePtr = DLSYM_AND_CHECK(dl_rt, "cudaDeviceGetAttribute");
174 cudaSetDevicePtr = DLSYM_AND_CHECK(dl_rt, "cudaSetDevice");
175 cudaFreePtr = DLSYM_AND_CHECK(dl_rt, "cudaFree");
176 cudaDriverGetVersionPtr = DLSYM_AND_CHECK(dl_rt, "cudaDriverGetVersion");
177 cudaRuntimeGetVersionPtr = DLSYM_AND_CHECK(dl_rt, "cudaRuntimeGetVersion");
178
179 Dl_info info;
180 dladdr(cudaGetDevicePtr, &info);
181 LOGDBG("CUDA runtime library loaded from %s\n", info.dli_fname);
182 return PAPI_OK;
183fn_fail:
184 return PAPI_EMISC;
185}
void * cuptic_load_dynamic_syms(const char *parent_path, const char *dlname, const char *search_subpaths[])
Definition: cupti_common.c:110
cudaError_t(* cudaDriverGetVersionPtr)(int *)
Definition: cupti_common.c:47
static void * dl_rt
Definition: cupti_common.c:16
cudaError_t(* cudaGetDevicePropertiesPtr)(struct cudaDeviceProp *prop, int device)
Definition: cupti_common.c:44
cudaError_t(* cudaRuntimeGetVersionPtr)(int *)
Definition: cupti_common.c:48
cudaError_t(* cudaSetDevicePtr)(int)
Definition: cupti_common.c:43
Here is the call graph for this function:
Here is the caller graph for this function:

◆ load_cupti_common_sym()

static int load_cupti_common_sym ( void  )
static

Definition at line 205 of file cupti_common.c.

206{
207 char dlname[] = "libcupti.so";
208 char lookup_path[PATH_MAX];
209
210 char *papi_cuda_cupti = getenv("PAPI_CUDA_CUPTI");
211 if (papi_cuda_cupti) {
212 sprintf(lookup_path, "%s/%s", papi_cuda_cupti, dlname);
213 dl_cupti = dlopen(lookup_path, RTLD_NOW | RTLD_GLOBAL);
214 }
215
216 const char *standard_paths[] = {
217 "%s/extras/CUPTI/lib64/%s",
218 "%s/lib64/%s",
219 NULL,
220 };
221
223 dl_cupti = cuptic_load_dynamic_syms(linked_cudart_path, dlname, standard_paths);
224 }
225
226 char *papi_cuda_root = getenv("PAPI_CUDA_ROOT");
227 if (papi_cuda_root && !dl_cupti) {
228 dl_cupti = cuptic_load_dynamic_syms(papi_cuda_root, dlname, standard_paths);
229 }
230
231 if (!dl_cupti) {
232 dl_cupti = dlopen(dlname, RTLD_NOW | RTLD_GLOBAL);
233 if (!dl_cupti) {
234 ERRDBG("Loading libcupti.so failed. Try setting PAPI_CUDA_ROOT\n");
235 goto fn_fail;
236 }
237 }
238
239 cuptiGetVersionPtr = DLSYM_AND_CHECK(dl_cupti, "cuptiGetVersion");
240
241 Dl_info info;
242 dladdr(cuptiGetVersionPtr, &info);
243 LOGDBG("CUPTI library loaded from %s\n", info.dli_fname);
244 return PAPI_OK;
245fn_fail:
246 return PAPI_EMISC;
247}
CUptiResult(* cuptiGetVersionPtr)(uint32_t *)
Definition: cupti_common.c:50
void * dl_cupti
Definition: cupti_common.c:19
Here is the call graph for this function:
Here is the caller graph for this function:

◆ unload_cuda_sym()

static int unload_cuda_sym ( void  )
static

Definition at line 85 of file cupti_common.c.

86{
87 if (dl_drv) {
88 dlclose(dl_drv);
89 dl_drv = NULL;
90 }
91 cuCtxSetCurrentPtr = NULL;
92 cuCtxGetCurrentPtr = NULL;
93 cuCtxDestroyPtr = NULL;
94 cuCtxCreatePtr = NULL;
95 cuCtxGetDevicePtr = NULL;
96 cuDeviceGetPtr = NULL;
98 cuDeviceGetNamePtr = NULL;
101 cuInitPtr = NULL;
102 cuGetErrorStringPtr = NULL;
103 cuCtxPopCurrentPtr = NULL;
104 cuCtxPushCurrentPtr = NULL;
105 cuCtxSynchronizePtr = NULL;
107 return PAPI_OK;
108}
Here is the caller graph for this function:

◆ unload_cudart_sym()

static int unload_cudart_sym ( void  )
static

Definition at line 187 of file cupti_common.c.

188{
189 if (dl_rt) {
190 dlclose(dl_rt);
191 dl_rt = NULL;
192 }
193 cudaGetDevicePtr = NULL;
198 cudaSetDevicePtr = NULL;
199 cudaFreePtr = NULL;
202 return PAPI_OK;
203}
Here is the caller graph for this function:

◆ unload_cupti_common_sym()

static int unload_cupti_common_sym ( void  )
static

Definition at line 249 of file cupti_common.c.

250{
251 if (dl_cupti) {
252 dlclose(dl_cupti);
253 dl_cupti = NULL;
254 }
255 cuptiGetVersionPtr = NULL;
256 return PAPI_OK;
257}
Here is the caller graph for this function:

◆ unload_linked_cudart_path()

static void unload_linked_cudart_path ( void  )
static

Definition at line 272 of file cupti_common.c.

273{
274 if (linked_cudart_path) {
276 linked_cudart_path = NULL;
277 }
278}
Here is the caller graph for this function:

◆ util_dylib_cu_runtime_version()

static int util_dylib_cu_runtime_version ( void  )
static

Definition at line 289 of file cupti_common.c.

290{
291 int runtimeVersion;
292 CUDART_CALL(cudaRuntimeGetVersionPtr(&runtimeVersion), return PAPI_EMISC );
293 return runtimeVersion;
294}
Here is the caller graph for this function:

◆ util_dylib_cupti_version()

static int util_dylib_cupti_version ( void  )
static

Definition at line 296 of file cupti_common.c.

297{
298 unsigned int cuptiVersion;
299 CUPTI_CALL(cuptiGetVersionPtr(&cuptiVersion), return PAPI_EMISC );
300 return cuptiVersion;
301}
#define CUPTI_CALL(call, handleerror)
Definition: cupti_common.h:78
Here is the caller graph for this function:

◆ util_gpu_collection_kind()

static int util_gpu_collection_kind ( gpu_collection_e coll_kind)
static

Definition at line 333 of file cupti_common.c.

334{
335 int papi_errno = PAPI_OK;
337 if (kind != GPU_COLLECTION_UNKNOWN) {
338 goto fn_exit;
339 }
340
341 int total_gpus;
342 papi_errno = cuptic_device_get_count(&total_gpus);
343 if (papi_errno != PAPI_OK) {
344 goto fn_exit;
345 }
346
347 int i, cc;
348 int count_perf = 0, count_evt = 0, count_cc70 = 0;
349 for (i=0; i<total_gpus; i++) {
350 papi_errno = get_gpu_compute_capability(i, &cc);
351 if (papi_errno != PAPI_OK) {
352 return papi_errno;
353 }
354 if (cc == 70) {
355 ++count_cc70;
356 }
357 if (cc >= 70) {
358 ++count_perf;
359 }
360 if (cc <= 70) {
361 ++count_evt;
362 }
363 }
364 if (count_cc70 == total_gpus) {
366 goto fn_exit;
367 }
368 if (count_perf == total_gpus) {
370 goto fn_exit;
371 }
372 if (count_evt == total_gpus) {
374 goto fn_exit;
375 }
377
378fn_exit:
379 *coll_kind = kind;
380 return papi_errno;
381}
static int get_gpu_compute_capability(int dev_num, int *cc)
Definition: cupti_common.c:313
Here is the call graph for this function:
Here is the caller graph for this function:

◆ util_load_cuda_sym()

static int util_load_cuda_sym ( void  )
static

Definition at line 259 of file cupti_common.c.

260{
261 int papi_errno;
262 papi_errno = load_cuda_sym();
263 papi_errno += load_cudart_sym();
264 papi_errno += load_cupti_common_sym();
265 if (papi_errno != PAPI_OK) {
266 return PAPI_EMISC;
267 }
268 else
269 return PAPI_OK;
270}
static int load_cupti_common_sym(void)
Definition: cupti_common.c:205
static int load_cudart_sym(void)
Definition: cupti_common.c:136
static int load_cuda_sym(void)
Definition: cupti_common.c:52
Here is the call graph for this function:
Here is the caller graph for this function:

Variable Documentation

◆ _cuda_lock

unsigned int _cuda_lock

Definition at line 21 of file cupti_common.c.

◆ cuCtxCreatePtr

CUresult(* cuCtxCreatePtr) (CUcontext *pctx, unsigned int flags, CUdevice dev) ( CUcontext *  pctx,
unsigned int  flags,
CUdevice  dev 
)

Definition at line 26 of file cupti_common.c.

◆ cuCtxDestroyPtr

CUresult(* cuCtxDestroyPtr) (CUcontext) ( CUcontext  )

Definition at line 25 of file cupti_common.c.

◆ cuCtxGetCurrentPtr

CUresult(* cuCtxGetCurrentPtr) (CUcontext *) ( CUcontext *  )

Definition at line 23 of file cupti_common.c.

◆ cuCtxGetDevicePtr

CUresult(* cuCtxGetDevicePtr) (CUdevice *) ( CUdevice *  )

Definition at line 27 of file cupti_common.c.

◆ cuCtxPopCurrentPtr

CUresult(* cuCtxPopCurrentPtr) (CUcontext *pctx) ( CUcontext *  pctx)

Definition at line 35 of file cupti_common.c.

◆ cuCtxPushCurrentPtr

CUresult(* cuCtxPushCurrentPtr) (CUcontext pctx) ( CUcontext  pctx)

Definition at line 36 of file cupti_common.c.

◆ cuCtxSetCurrentPtr

CUresult(* cuCtxSetCurrentPtr) (CUcontext) ( CUcontext  )

Definition at line 24 of file cupti_common.c.

◆ cuCtxSynchronizePtr

CUresult(* cuCtxSynchronizePtr) () ( )

Definition at line 37 of file cupti_common.c.

◆ cudaDeviceGetAttributePtr

cudaError_t(* cudaDeviceGetAttributePtr) (int *value, enum cudaDeviceAttr attr, int device) ( int value,
enum cudaDeviceAttr  attr,
int  device 
)

Definition at line 45 of file cupti_common.c.

◆ cudaDriverGetVersionPtr

cudaError_t(* cudaDriverGetVersionPtr) (int *) ( int )

Definition at line 47 of file cupti_common.c.

◆ cudaFreePtr

cudaError_t(* cudaFreePtr) (void *) ( void *  )

Definition at line 46 of file cupti_common.c.

◆ cudaGetDeviceCountPtr

cudaError_t(* cudaGetDeviceCountPtr) (int *) ( int )

Definition at line 40 of file cupti_common.c.

◆ cudaGetDevicePropertiesPtr

cudaError_t(* cudaGetDevicePropertiesPtr) (struct cudaDeviceProp *prop, int device) ( struct cudaDeviceProp *  prop,
int  device 
)

Definition at line 44 of file cupti_common.c.

◆ cudaGetDevicePtr

cudaError_t(* cudaGetDevicePtr) (int *) ( int )

Definition at line 41 of file cupti_common.c.

◆ cudaGetErrorStringPtr

const char *(* cudaGetErrorStringPtr) (cudaError_t) ( cudaError_t  )

Definition at line 42 of file cupti_common.c.

◆ cudaRuntimeGetVersionPtr

cudaError_t(* cudaRuntimeGetVersionPtr) (int *) ( int )

Definition at line 48 of file cupti_common.c.

◆ cudaSetDevicePtr

cudaError_t(* cudaSetDevicePtr) (int) ( int  )

Definition at line 43 of file cupti_common.c.

◆ cuDeviceGetAttributePtr

CUresult(* cuDeviceGetAttributePtr) (int *, CUdevice_attribute, CUdevice) ( int ,
CUdevice_attribute  ,
CUdevice   
)

Definition at line 38 of file cupti_common.c.

◆ cuDeviceGetCountPtr

CUresult(* cuDeviceGetCountPtr) (int *) ( int )

Definition at line 29 of file cupti_common.c.

◆ cuDeviceGetNamePtr

CUresult(* cuDeviceGetNamePtr) (char *, int, CUdevice) ( char *  ,
int  ,
CUdevice   
)

Definition at line 30 of file cupti_common.c.

◆ cuDeviceGetPtr

CUresult(* cuDeviceGetPtr) (CUdevice *, int) ( CUdevice *  ,
int   
)

Definition at line 28 of file cupti_common.c.

◆ cuDevicePrimaryCtxReleasePtr

CUresult(* cuDevicePrimaryCtxReleasePtr) (CUdevice) ( CUdevice  )

Definition at line 32 of file cupti_common.c.

◆ cuDevicePrimaryCtxRetainPtr

CUresult(* cuDevicePrimaryCtxRetainPtr) (CUcontext *pctx, CUdevice) ( CUcontext *  pctx,
CUdevice   
)

Definition at line 31 of file cupti_common.c.

◆ cuGetErrorStringPtr

CUresult(* cuGetErrorStringPtr) (CUresult error, const char **pStr) ( CUresult  error,
const char **  pStr 
)

Definition at line 34 of file cupti_common.c.

◆ cuInitPtr

CUresult(* cuInitPtr) (unsigned int) ( unsigned int  )

Definition at line 33 of file cupti_common.c.

◆ cuptic_disabled_reason_g

const char* cuptic_disabled_reason_g

Definition at line 383 of file cupti_common.c.

◆ cuptiGetVersionPtr

CUptiResult(* cuptiGetVersionPtr) (uint32_t *) ( uint32_t *  )

Definition at line 50 of file cupti_common.c.

◆ dl_cupti

void* dl_cupti

Definition at line 19 of file cupti_common.c.

◆ dl_drv

void* dl_drv
static

Definition at line 16 of file cupti_common.c.

◆ dl_rt

void * dl_rt
static

Definition at line 16 of file cupti_common.c.

◆ global_gpu_bitmask

gpu_occupancy_t global_gpu_bitmask
static

Definition at line 581 of file cupti_common.c.

◆ linked_cudart_path

const char* linked_cudart_path

Definition at line 18 of file cupti_common.c.