PAPI 7.1.0.0
Loading...
Searching...
No Matches
cupti_common.h File Reference
Include dependency graph for cupti_common.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Macros

#define DLSYM_AND_CHECK(dllib, name)
 
#define CUDA_CALL(call, handleerror)
 
#define CUDART_CALL(call, handleerror)
 
#define CUPTI_CALL(call, handleerror)
 

Functions

void cuptic_disabled_reason_set (const char *msg)
 
void cuptic_disabled_reason_get (const char **pmsg)
 
void * cuptic_load_dynamic_syms (const char *parent_path, const char *dlname, const char *search_subpaths[])
 
int cuptic_shutdown (void)
 
int cuptic_device_get_count (int *num_gpus)
 
int cuptic_init (void)
 
int cuptic_is_runtime_perfworks_api (void)
 
int cuptic_is_runtime_events_api (void)
 
int cuptic_ctxarr_create (cuptic_info_t *pinfo)
 
int cuptic_ctxarr_update_current (cuptic_info_t info)
 
int cuptic_ctxarr_get_ctx (cuptic_info_t info, int gpu_idx, CUcontext *ctx)
 
int cuptic_ctxarr_destroy (cuptic_info_t *pinfo)
 
int cuptic_device_acquire (cuptiu_event_table_t *evt_table)
 
int cuptic_device_release (cuptiu_event_table_t *evt_table)
 

Variables

const char * linked_cudart_path
 
void * dl_cupti
 
unsigned int _cuda_lock
 
CUresult(* cuCtxGetCurrentPtr )(CUcontext *)
 
CUresult(* cuCtxSetCurrentPtr )(CUcontext)
 
CUresult(* cuCtxDestroyPtr )(CUcontext)
 
CUresult(* cuCtxCreatePtr )(CUcontext *pctx, unsigned int flags, CUdevice dev)
 
CUresult(* cuCtxGetDevicePtr )(CUdevice *)
 
CUresult(* cuDeviceGetPtr )(CUdevice *, int)
 
CUresult(* cuDeviceGetCountPtr )(int *)
 
CUresult(* cuDeviceGetNamePtr )(char *, int, CUdevice)
 
CUresult(* cuDevicePrimaryCtxRetainPtr )(CUcontext *pctx, CUdevice)
 
CUresult(* cuDevicePrimaryCtxReleasePtr )(CUdevice)
 
CUresult(* cuInitPtr )(unsigned int)
 
CUresult(* cuGetErrorStringPtr )(CUresult error, const char **pStr)
 
CUresult(* cuCtxPopCurrentPtr )(CUcontext *pctx)
 
CUresult(* cuCtxPushCurrentPtr )(CUcontext pctx)
 
CUresult(* cuCtxSynchronizePtr )()
 
CUresult(* cuDeviceGetAttributePtr )(int *, CUdevice_attribute, CUdevice)
 
cudaError_t(* cudaGetDeviceCountPtr )(int *)
 
cudaError_t(* cudaGetDevicePtr )(int *)
 
cudaError_t(* cudaSetDevicePtr )(int)
 
cudaError_t(* cudaGetDevicePropertiesPtr )(struct cudaDeviceProp *prop, int device)
 
cudaError_t(* cudaDeviceGetAttributePtr )(int *value, enum cudaDeviceAttr attr, int device)
 
cudaError_t(* cudaFreePtr )(void *)
 
cudaError_t(* cudaDriverGetVersionPtr )(int *)
 
cudaError_t(* cudaRuntimeGetVersionPtr )(int *)
 
CUptiResult(* cuptiGetVersionPtr )(uint32_t *)
 

Detailed Description

Macro Definition Documentation

◆ CUDA_CALL

#define CUDA_CALL (   call,
  handleerror 
)
Value:
do { \
CUresult _status = (call); \
LOGCUDACALL("\t" #call "\n"); \
if (_status != CUDA_SUCCESS) { \
ERRDBG("CUDA Error %d: Error in call to " #call "\n", _status); \
handleerror; \
} \
} while (0);
#define EXIT_OR_NOT
Definition: lcuda_debug.h:14

Definition at line 58 of file cupti_common.h.

◆ CUDART_CALL

#define CUDART_CALL (   call,
  handleerror 
)
Value:
do { \
cudaError_t _status = (call); \
LOGCUDACALL("\t" #call "\n"); \
if (_status != cudaSuccess) { \
ERRDBG("CUDART Error %d: Error in call to " #call "\n", _status); \
handleerror; \
} \
} while (0);

Definition at line 68 of file cupti_common.h.

◆ CUPTI_CALL

#define CUPTI_CALL (   call,
  handleerror 
)
Value:
do { \
CUptiResult _status = (call); \
LOGCUPTICALL("\t" #call "\n"); \
if (_status != CUPTI_SUCCESS) { \
ERRDBG("CUPTI Error %d: Error in call to " #call "\n", _status); \
handleerror; \
} \
} while (0);

Definition at line 78 of file cupti_common.h.

◆ DLSYM_AND_CHECK

#define DLSYM_AND_CHECK (   dllib,
  name 
)
Value:
dlsym( dllib, name ); \
if (dlerror() != NULL) { \
ERRDBG("A CUDA required function '%s' was not found in lib '%s'.\n", name, #dllib); \
return PAPI_EMISC; \
}
#define PAPI_EMISC
Definition: f90papi.h:122
const char * name
Definition: rocs.c:225

Definition at line 52 of file cupti_common.h.

Function Documentation

◆ cuptic_ctxarr_create()

int cuptic_ctxarr_create ( cuptic_info_t *  pinfo)

Definition at line 520 of file cupti_common.c.

521{
522 COMPDBG("Entering.\n");
523 int total_gpus;
524 int papi_errno = cuptic_device_get_count(&total_gpus);
525 if (papi_errno != PAPI_OK) {
526 return PAPI_EMISC;
527 }
528 cuptic_info_t cuCtx = (cuptic_info_t) papi_calloc (total_gpus, sizeof(*pinfo));
529 if (cuCtx == NULL) {
530 return PAPI_ENOMEM;
531 }
532 *pinfo = cuCtx;
533 return PAPI_OK;
534}
int cuptic_device_get_count(int *num_gpus)
Definition: cupti_common.c:303
#define PAPI_OK
Definition: f90papi.h:73
#define PAPI_ENOMEM
Definition: f90papi.h:16
#define COMPDBG(format, args...)
Definition: lcuda_debug.h:21
#define papi_calloc(a, b)
Definition: papi_memory.h:37
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuptic_ctxarr_destroy()

int cuptic_ctxarr_destroy ( cuptic_info_t *  pinfo)

Definition at line 572 of file cupti_common.c.

573{
574 papi_free(*pinfo);
575 *pinfo = NULL;
576 return PAPI_OK;
577}
#define papi_free(a)
Definition: papi_memory.h:35
Here is the caller graph for this function:

◆ cuptic_ctxarr_get_ctx()

int cuptic_ctxarr_get_ctx ( cuptic_info_t  info,
int  gpu_idx,
CUcontext *  ctx 
)

Definition at line 566 of file cupti_common.c.

567{
568 *ctx = info[gpu_idx].ctx;
569 return PAPI_OK;
570}
Here is the caller graph for this function:

◆ cuptic_ctxarr_update_current()

int cuptic_ctxarr_update_current ( cuptic_info_t  info)

Definition at line 536 of file cupti_common.c.

537{
538 int papi_errno, gpu_id;
539 CUcontext tempCtx;
540 papi_errno = cudaGetDevicePtr(&gpu_id);
541 if (papi_errno != cudaSuccess) {
542 return PAPI_EMISC;
543 }
544 papi_errno = cuCtxGetCurrentPtr(&tempCtx);
545 if (papi_errno != CUDA_SUCCESS) {
546 return PAPI_EMISC;
547 }
548 if (info[gpu_id].ctx == NULL) {
549 if (tempCtx != NULL) {
550 LOGDBG("Registering device = %d with ctx = %p.\n", gpu_id, tempCtx);
551 CUDA_CALL(cuCtxGetCurrentPtr(&info[gpu_id].ctx), return PAPI_EMISC);
552 }
553 else {
554 CUDART_CALL(cudaFreePtr(NULL), return PAPI_EMISC);
555 CUDA_CALL(cuCtxGetCurrentPtr(&info[gpu_id].ctx), return PAPI_EMISC);
556 LOGDBG("Using primary device context %p for device %d.\n", info[gpu_id].ctx, gpu_id);
557 }
558 }
559 /* If context has changed keep the first seen one but with warning */
560 else if (info[gpu_id].ctx != tempCtx) {
561 ERRDBG("Warning: cuda context for gpu %d has changed from %p to %p\n", gpu_id, info[gpu_id].ctx, tempCtx);
562 }
563 return PAPI_OK;
564}
cudaError_t(* cudaFreePtr)(void *)
Definition: cupti_common.c:46
CUresult(* cuCtxGetCurrentPtr)(CUcontext *)
Definition: cupti_common.c:23
cudaError_t(* cudaGetDevicePtr)(int *)
Definition: cupti_common.c:41
#define CUDA_CALL(call, handleerror)
Definition: cupti_common.h:58
#define CUDART_CALL(call, handleerror)
Definition: cupti_common.h:68
#define ERRDBG(format, args...)
Definition: lcuda_debug.h:30
#define LOGDBG(format, args...)
Definition: lcuda_debug.h:24
Here is the caller graph for this function:

◆ cuptic_device_acquire()

int cuptic_device_acquire ( cuptiu_event_table_t evt_table)

Definition at line 629 of file cupti_common.c.

630{
631 gpu_occupancy_t bitmask;
632 int papi_errno = _devmask_events_get(evt_table, &bitmask);
633 if (papi_errno != PAPI_OK)
634 return papi_errno;
635 if (bitmask & global_gpu_bitmask) {
636 return PAPI_ECNFLCT;
637 }
639 global_gpu_bitmask |= bitmask;
641 return PAPI_OK;
642}
static int _devmask_events_get(cuptiu_event_table_t *evt_table, gpu_occupancy_t *bitmask)
Definition: cupti_common.c:607
unsigned int _cuda_lock
Definition: cupti_common.c:21
static gpu_occupancy_t global_gpu_bitmask
Definition: cupti_common.c:581
int64_t gpu_occupancy_t
Definition: cupti_common.c:580
#define PAPI_ECNFLCT
Definition: f90papi.h:234
inline_static int _papi_hwi_lock(int lck)
Definition: threads.h:69
inline_static int _papi_hwi_unlock(int lck)
Definition: threads.h:83
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuptic_device_get_count()

int cuptic_device_get_count ( int num_gpus)

Definition at line 303 of file cupti_common.c.

304{
305 cudaError_t cuda_errno = cudaGetDeviceCountPtr(num_gpus);
306 if (cuda_errno != cudaSuccess) {
308 return PAPI_EMISC;
309 }
310 return PAPI_OK;
311}
void cuptic_disabled_reason_set(const char *msg)
Definition: cupti_common.c:385
const char *(* cudaGetErrorStringPtr)(cudaError_t)
Definition: cupti_common.c:42
cudaError_t(* cudaGetDeviceCountPtr)(int *)
Definition: cupti_common.c:40
static int num_gpus
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuptic_device_release()

int cuptic_device_release ( cuptiu_event_table_t evt_table)

Definition at line 644 of file cupti_common.c.

645{
646 gpu_occupancy_t bitmask;
647 int papi_errno = _devmask_events_get(evt_table, &bitmask);
648 if (papi_errno != PAPI_OK) {
649 return papi_errno;
650 }
651 if ((bitmask & global_gpu_bitmask) != bitmask) {
652 return PAPI_EMISC;
653 }
655 global_gpu_bitmask ^= bitmask;
657 return PAPI_OK;
658}
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuptic_disabled_reason_get()

void cuptic_disabled_reason_get ( const char **  pmsg)

Definition at line 390 of file cupti_common.c.

391{
393}
const char * cuptic_disabled_reason_g
Definition: cupti_common.c:383
Here is the caller graph for this function:

◆ cuptic_disabled_reason_set()

void cuptic_disabled_reason_set ( const char *  msg)

Definition at line 385 of file cupti_common.c.

386{
388}
Here is the caller graph for this function:

◆ cuptic_init()

int cuptic_init ( void  )

Definition at line 417 of file cupti_common.c.

418{
419 int papi_errno = get_user_cudart_path();
420 if (papi_errno == PAPI_OK) {
421 LOGDBG("Linked cudart root: %s\n", linked_cudart_path);
422 }
423 else {
424 LOGDBG("Target application not linked with cuda runtime libraries.\n");
425 }
426 papi_errno = util_load_cuda_sym();
427 if (papi_errno != PAPI_OK) {
428 cuptic_disabled_reason_set("Unable to load CUDA library functions.");
429 goto fn_exit;
430 }
431
432 gpu_collection_e kind;
433 papi_errno = util_gpu_collection_kind(&kind);
434 if (papi_errno != PAPI_OK) {
435 goto fn_exit;
436 }
437
438 if (kind == GPU_COLLECTION_MIXED) {
439 cuptic_disabled_reason_set("No support for systems with mixed compute capabilities, such as CC < 7.0 and CC > 7.0 GPUS.");
440 papi_errno = PAPI_ECMP;
441 goto fn_exit;
442 }
443fn_exit:
444 return papi_errno;
445}
gpu_collection_e
Definition: cupti_common.c:331
@ GPU_COLLECTION_MIXED
Definition: cupti_common.c:331
static int util_gpu_collection_kind(gpu_collection_e *coll_kind)
Definition: cupti_common.c:333
static int util_load_cuda_sym(void)
Definition: cupti_common.c:259
static int get_user_cudart_path(void)
Definition: cupti_common.c:408
const char * linked_cudart_path
Definition: cupti_common.c:18
#define PAPI_ECMP
Definition: f90papi.h:214
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuptic_is_runtime_events_api()

int cuptic_is_runtime_events_api ( void  )

Definition at line 488 of file cupti_common.c.

489{
490 static int is_events_api = -1;
491 if (is_events_api != -1) {
492 goto fn_exit;
493 }
494
495 gpu_collection_e gpus_kind;
496 int papi_errno = util_gpu_collection_kind(&gpus_kind);
497 if (papi_errno != PAPI_OK) {
498 goto fn_exit;
499 }
500
501 /*
502 * See cupti_config.h: When NVIDIA removes the events API add a check in the following condition
503 * to check the `util_dylib_cupti_version()` is also <= CUPTI_EVENTS_API_MAX_SUPPORTED_VERSION.
504 */
505 if ((gpus_kind == GPU_COLLECTION_ALL_EVENTS || gpus_kind == GPU_COLLECTION_ALL_CC70)) {
506 is_events_api = 1;
507 goto fn_exit;
508 } else {
509 is_events_api = 0;
510 goto fn_exit;
511 }
512fn_exit:
513 return is_events_api;
514}
@ GPU_COLLECTION_ALL_CC70
Definition: cupti_common.c:331
@ GPU_COLLECTION_ALL_EVENTS
Definition: cupti_common.c:331
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuptic_is_runtime_perfworks_api()

int cuptic_is_runtime_perfworks_api ( void  )

Definition at line 447 of file cupti_common.c.

448{
449 static int is_perfworks_api = -1;
450 if (is_perfworks_api != -1) {
451 goto fn_exit;
452 }
453 char *papi_cuda_110_cc70_perfworks_api = getenv("PAPI_CUDA_110_CC_70_PERFWORKS_API");
454
455 gpu_collection_e gpus_kind;
456 int papi_errno = util_gpu_collection_kind(&gpus_kind);
457 if (papi_errno != PAPI_OK) {
458 goto fn_exit;
459 }
460
461 unsigned int cuptiVersion = util_dylib_cupti_version();
462
463 if (gpus_kind == GPU_COLLECTION_ALL_CC70 &&
465 {
466 if (papi_cuda_110_cc70_perfworks_api != NULL) {
467 is_perfworks_api = 1;
468 goto fn_exit;
469 }
470 else {
471 is_perfworks_api = 0;
472 goto fn_exit;
473 }
474 }
475
476 if ((gpus_kind == GPU_COLLECTION_ALL_PERF || gpus_kind == GPU_COLLECTION_ALL_CC70) && cuptiVersion >= CUPTI_PROFILER_API_MIN_SUPPORTED_VERSION) {
477 is_perfworks_api = 1;
478 goto fn_exit;
479 } else {
480 is_perfworks_api = 0;
481 goto fn_exit;
482 }
483
484fn_exit:
485 return is_perfworks_api;
486}
@ GPU_COLLECTION_ALL_PERF
Definition: cupti_common.c:331
static int util_dylib_cupti_version(void)
Definition: cupti_common.c:296
static int util_dylib_cu_runtime_version(void)
Definition: cupti_common.c:289
#define CUPTI_PROFILER_API_MIN_SUPPORTED_VERSION
Definition: cupti_config.h:12
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuptic_load_dynamic_syms()

void * cuptic_load_dynamic_syms ( const char *  parent_path,
const char *  dlname,
const char *  search_subpaths[] 
)

Definition at line 110 of file cupti_common.c.

111{
112 void *dl = NULL;
113 char lookup_path[PATH_MAX];
114 char *found_files[CUPTIU_MAX_FILES];
115 int i, count;
116 for (i = 0; search_subpaths[i] != NULL; i++) {
117 sprintf(lookup_path, search_subpaths[i], parent_path, dlname);
118 dl = dlopen(lookup_path, RTLD_NOW | RTLD_GLOBAL);
119 if (dl) {
120 return dl;
121 }
122 }
123 count = cuptiu_files_search_in_path(dlname, parent_path, found_files);
124 for (i = 0; i < count; i++) {
125 dl = dlopen(found_files[i], RTLD_NOW | RTLD_GLOBAL);
126 if (dl) {
127 break;
128 }
129 }
130 for (i = 0; i < count; i++) {
131 papi_free(found_files[i]);
132 }
133 return dl;
134}
int i
static long count
int cuptiu_files_search_in_path(const char *file_name, const char *search_path, char **file_paths)
Definition: cupti_utils.c:176
#define CUPTIU_MAX_FILES
Definition: cupti_utils.h:38
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuptic_shutdown()

int cuptic_shutdown ( void  )

Definition at line 280 of file cupti_common.c.

281{
286 return PAPI_OK;
287}
static int unload_cupti_common_sym(void)
Definition: cupti_common.c:249
static int unload_cudart_sym(void)
Definition: cupti_common.c:187
static int unload_cuda_sym(void)
Definition: cupti_common.c:85
static void unload_linked_cudart_path(void)
Definition: cupti_common.c:272
Here is the call graph for this function:
Here is the caller graph for this function:

Variable Documentation

◆ _cuda_lock

unsigned int _cuda_lock
extern

Definition at line 21 of file cupti_common.c.

◆ cuCtxCreatePtr

CUresult(* cuCtxCreatePtr) (CUcontext *pctx, unsigned int flags, CUdevice dev) ( CUcontext *  pctx,
unsigned int  flags,
CUdevice  dev 
)
extern

Definition at line 26 of file cupti_common.c.

◆ cuCtxDestroyPtr

CUresult(* cuCtxDestroyPtr) (CUcontext) ( CUcontext  )
extern

Definition at line 25 of file cupti_common.c.

◆ cuCtxGetCurrentPtr

CUresult(* cuCtxGetCurrentPtr) (CUcontext *) ( CUcontext *  )
extern

Definition at line 23 of file cupti_common.c.

◆ cuCtxGetDevicePtr

CUresult(* cuCtxGetDevicePtr) (CUdevice *) ( CUdevice *  )
extern

Definition at line 27 of file cupti_common.c.

◆ cuCtxPopCurrentPtr

CUresult(* cuCtxPopCurrentPtr) (CUcontext *pctx) ( CUcontext *  pctx)
extern

Definition at line 35 of file cupti_common.c.

◆ cuCtxPushCurrentPtr

CUresult(* cuCtxPushCurrentPtr) (CUcontext pctx) ( CUcontext  pctx)
extern

Definition at line 36 of file cupti_common.c.

◆ cuCtxSetCurrentPtr

CUresult(* cuCtxSetCurrentPtr) (CUcontext) ( CUcontext  )
extern

Definition at line 24 of file cupti_common.c.

◆ cuCtxSynchronizePtr

CUresult(* cuCtxSynchronizePtr) () ( )
extern

Definition at line 37 of file cupti_common.c.

◆ cudaDeviceGetAttributePtr

cudaError_t(* cudaDeviceGetAttributePtr) (int *value, enum cudaDeviceAttr attr, int device) ( int value,
enum cudaDeviceAttr  attr,
int  device 
)
extern

Definition at line 45 of file cupti_common.c.

◆ cudaDriverGetVersionPtr

cudaError_t(* cudaDriverGetVersionPtr) (int *) ( int )
extern

Definition at line 47 of file cupti_common.c.

◆ cudaFreePtr

cudaError_t(* cudaFreePtr) (void *) ( void *  )
extern

Definition at line 46 of file cupti_common.c.

◆ cudaGetDeviceCountPtr

cudaError_t(* cudaGetDeviceCountPtr) (int *) ( int )
extern

Definition at line 40 of file cupti_common.c.

◆ cudaGetDevicePropertiesPtr

cudaError_t(* cudaGetDevicePropertiesPtr) (struct cudaDeviceProp *prop, int device) ( struct cudaDeviceProp *  prop,
int  device 
)
extern

Definition at line 44 of file cupti_common.c.

◆ cudaGetDevicePtr

cudaError_t(* cudaGetDevicePtr) (int *) ( int )
extern

Definition at line 41 of file cupti_common.c.

◆ cudaRuntimeGetVersionPtr

cudaError_t(* cudaRuntimeGetVersionPtr) (int *) ( int )
extern

Definition at line 48 of file cupti_common.c.

◆ cudaSetDevicePtr

cudaError_t(* cudaSetDevicePtr) (int) ( int  )
extern

Definition at line 43 of file cupti_common.c.

◆ cuDeviceGetAttributePtr

CUresult(* cuDeviceGetAttributePtr) (int *, CUdevice_attribute, CUdevice) ( int ,
CUdevice_attribute  ,
CUdevice   
)
extern

Definition at line 38 of file cupti_common.c.

◆ cuDeviceGetCountPtr

CUresult(* cuDeviceGetCountPtr) (int *) ( int )
extern

Definition at line 29 of file cupti_common.c.

◆ cuDeviceGetNamePtr

CUresult(* cuDeviceGetNamePtr) (char *, int, CUdevice) ( char *  ,
int  ,
CUdevice   
)
extern

Definition at line 30 of file cupti_common.c.

◆ cuDeviceGetPtr

CUresult(* cuDeviceGetPtr) (CUdevice *, int) ( CUdevice *  ,
int   
)
extern

Definition at line 28 of file cupti_common.c.

◆ cuDevicePrimaryCtxReleasePtr

CUresult(* cuDevicePrimaryCtxReleasePtr) (CUdevice) ( CUdevice  )
extern

Definition at line 32 of file cupti_common.c.

◆ cuDevicePrimaryCtxRetainPtr

CUresult(* cuDevicePrimaryCtxRetainPtr) (CUcontext *pctx, CUdevice) ( CUcontext *  pctx,
CUdevice   
)
extern

Definition at line 31 of file cupti_common.c.

◆ cuGetErrorStringPtr

CUresult(* cuGetErrorStringPtr) (CUresult error, const char **pStr) ( CUresult  error,
const char **  pStr 
)
extern

Definition at line 34 of file cupti_common.c.

◆ cuInitPtr

CUresult(* cuInitPtr) (unsigned int) ( unsigned int  )
extern

Definition at line 33 of file cupti_common.c.

◆ cuptiGetVersionPtr

CUptiResult(* cuptiGetVersionPtr) (uint32_t *) ( uint32_t *  )
extern

Definition at line 50 of file cupti_common.c.

◆ dl_cupti

void* dl_cupti
extern

Definition at line 19 of file cupti_common.c.

◆ linked_cudart_path

const char* linked_cudart_path
extern

Definition at line 18 of file cupti_common.c.