26CUresult ( *
cuCtxCreatePtr ) (CUcontext *pctx,
unsigned int flags, CUdevice dev);
42const char *( *cudaGetErrorStringPtr ) (cudaError_t);
54 dl_drv = dlopen(
"libcuda.so", RTLD_NOW | RTLD_GLOBAL);
56 ERRDBG(
"Loading installed libcuda.so failed. Check that cuda drivers are installed.\n");
79 LOGDBG(
"CUDA driver library loaded from %s\n", info.dli_fname);
113 char lookup_path[PATH_MAX];
116 for (
i = 0; search_subpaths[
i] != NULL;
i++) {
117 sprintf(lookup_path, search_subpaths[
i], parent_path, dlname);
118 dl = dlopen(lookup_path, RTLD_NOW | RTLD_GLOBAL);
125 dl = dlopen(found_files[
i], RTLD_NOW | RTLD_GLOBAL);
138 char dlname[] =
"libcudart.so";
139 char lookup_path[PATH_MAX];
141 char *papi_cuda_runtime = getenv(
"PAPI_CUDA_RUNTIME");
142 if (papi_cuda_runtime) {
143 sprintf(lookup_path,
"%s/%s", papi_cuda_runtime, dlname);
144 dl_rt = dlopen(lookup_path, RTLD_NOW | RTLD_GLOBAL);
147 const char *standard_paths[] = {
156 char *papi_cuda_root = getenv(
"PAPI_CUDA_ROOT");
157 if (papi_cuda_root && !
dl_rt) {
162 dl_rt = dlopen(dlname, RTLD_NOW | RTLD_GLOBAL);
164 ERRDBG(
"Loading libcudart.so failed. Try setting PAPI_CUDA_ROOT\n");
181 LOGDBG(
"CUDA runtime library loaded from %s\n", info.dli_fname);
207 char dlname[] =
"libcupti.so";
208 char lookup_path[PATH_MAX];
210 char *papi_cuda_cupti = getenv(
"PAPI_CUDA_CUPTI");
211 if (papi_cuda_cupti) {
212 sprintf(lookup_path,
"%s/%s", papi_cuda_cupti, dlname);
213 dl_cupti = dlopen(lookup_path, RTLD_NOW | RTLD_GLOBAL);
216 const char *standard_paths[] = {
217 "%s/extras/CUPTI/lib64/%s",
226 char *papi_cuda_root = getenv(
"PAPI_CUDA_ROOT");
232 dl_cupti = dlopen(dlname, RTLD_NOW | RTLD_GLOBAL);
234 ERRDBG(
"Loading libcupti.so failed. Try setting PAPI_CUDA_ROOT\n");
243 LOGDBG(
"CUPTI library loaded from %s\n", info.dli_fname);
293 return runtimeVersion;
298 unsigned int cuptiVersion;
306 if (cuda_errno != cudaSuccess) {
315 int cc_major, cc_minor;
316 cudaError_t cuda_errno;
318 if (cuda_errno != cudaSuccess) {
323 if (cuda_errno != cudaSuccess) {
327 *cc = cc_major * 10 + cc_minor;
348 int count_perf = 0, count_evt = 0, count_cc70 = 0;
349 for (
i=0;
i<total_gpus;
i++) {
364 if (count_cc70 == total_gpus) {
368 if (count_perf == total_gpus) {
372 if (count_evt == total_gpus) {
397 const char *library_name =
"libcudart.so";
398 char *library_path = strdup(info->dlpi_name);
400 if (library_path != NULL && strstr(library_path, library_name) != NULL) {
424 LOGDBG(
"Target application not linked with cuda runtime libraries.\n");
449 static int is_perfworks_api = -1;
450 if (is_perfworks_api != -1) {
453 char *papi_cuda_110_cc70_perfworks_api = getenv(
"PAPI_CUDA_110_CC_70_PERFWORKS_API");
466 if (papi_cuda_110_cc70_perfworks_api != NULL) {
467 is_perfworks_api = 1;
471 is_perfworks_api = 0;
477 is_perfworks_api = 1;
480 is_perfworks_api = 0;
485 return is_perfworks_api;
490 static int is_events_api = -1;
491 if (is_events_api != -1) {
513 return is_events_api;
528 cuptic_info_t cuCtx = (cuptic_info_t)
papi_calloc (total_gpus,
sizeof(*pinfo));
538 int papi_errno, gpu_id;
541 if (papi_errno != cudaSuccess) {
545 if (papi_errno != CUDA_SUCCESS) {
548 if (info[gpu_id].ctx == NULL) {
549 if (tempCtx != NULL) {
550 LOGDBG(
"Registering device = %d with ctx = %p.\n", gpu_id, tempCtx);
556 LOGDBG(
"Using primary device context %p for device %d.\n", info[gpu_id].ctx, gpu_id);
560 else if (info[gpu_id].ctx != tempCtx) {
561 ERRDBG(
"Warning: cuda context for gpu %d has changed from %p to %p\n", gpu_id, info[gpu_id].ctx, tempCtx);
568 *ctx = info[gpu_idx].ctx;
587 char *copy = strdup(
name);
589 token = strtok(copy,
"=");
593 token = strtok(NULL,
"\0");
597 *gpuid = strtol(token, NULL, 10);
609 int papi_errno =
PAPI_OK, gpu_id;
613 for (
i = 0;
i < evt_table->
count;
i++) {
622 acq_mask |= (1 << gpu_id);
@ GPU_COLLECTION_ALL_CC70
@ GPU_COLLECTION_ALL_PERF
@ GPU_COLLECTION_ALL_EVENTS
CUresult(* cuCtxGetDevicePtr)(CUdevice *)
void * cuptic_load_dynamic_syms(const char *parent_path, const char *dlname, const char *search_subpaths[])
CUresult(* cuGetErrorStringPtr)(CUresult error, const char **pStr)
void cuptic_disabled_reason_set(const char *msg)
static int unload_cupti_common_sym(void)
int cuptic_is_runtime_perfworks_api(void)
cudaError_t(* cudaFreePtr)(void *)
void cuptic_disabled_reason_get(const char **pmsg)
CUresult(* cuDeviceGetCountPtr)(int *)
CUresult(* cuCtxSetCurrentPtr)(CUcontext)
int cuptic_ctxarr_create(cuptic_info_t *pinfo)
int cuptic_device_release(cuptiu_event_table_t *evt_table)
cudaError_t(* cudaDriverGetVersionPtr)(int *)
CUresult(* cuDeviceGetAttributePtr)(int *, CUdevice_attribute, CUdevice)
static int util_gpu_collection_kind(gpu_collection_e *coll_kind)
CUresult(* cuDevicePrimaryCtxRetainPtr)(CUcontext *pctx, CUdevice)
CUptiResult(* cuptiGetVersionPtr)(uint32_t *)
int cuptic_ctxarr_get_ctx(cuptic_info_t info, int gpu_idx, CUcontext *ctx)
int cuptic_ctxarr_destroy(cuptic_info_t *pinfo)
int cuptic_is_runtime_events_api(void)
static int load_cupti_common_sym(void)
CUresult(* cuDeviceGetPtr)(CUdevice *, int)
static int _devmask_events_get(cuptiu_event_table_t *evt_table, gpu_occupancy_t *bitmask)
static int util_load_cuda_sym(void)
CUresult(* cuCtxPopCurrentPtr)(CUcontext *pctx)
static int unload_cudart_sym(void)
int cuptic_ctxarr_update_current(cuptic_info_t info)
static int unload_cuda_sym(void)
const char *(* cudaGetErrorStringPtr)(cudaError_t)
int cuptic_device_get_count(int *num_gpus)
CUresult(* cuCtxCreatePtr)(CUcontext *pctx, unsigned int flags, CUdevice dev)
static int load_cudart_sym(void)
CUresult(* cuDeviceGetNamePtr)(char *, int, CUdevice)
static int get_user_cudart_path(void)
CUresult(* cuCtxDestroyPtr)(CUcontext)
cudaError_t(* cudaGetDeviceCountPtr)(int *)
cudaError_t(* cudaDeviceGetAttributePtr)(int *value, enum cudaDeviceAttr attr, int device)
const char * cuptic_disabled_reason_g
static gpu_occupancy_t global_gpu_bitmask
const char * linked_cudart_path
CUresult(* cuCtxGetCurrentPtr)(CUcontext *)
static int load_cuda_sym(void)
static int dl_iterate_phdr_cb(struct dl_phdr_info *info, __attribute__((unused)) size_t size, __attribute__((unused)) void *data)
static void unload_linked_cudart_path(void)
cudaError_t(* cudaGetDevicePropertiesPtr)(struct cudaDeviceProp *prop, int device)
CUresult(* cuInitPtr)(unsigned int)
cudaError_t(* cudaRuntimeGetVersionPtr)(int *)
cudaError_t(* cudaGetDevicePtr)(int *)
int cuptic_device_acquire(cuptiu_event_table_t *evt_table)
CUresult(* cuCtxSynchronizePtr)()
static int get_gpu_compute_capability(int dev_num, int *cc)
static int event_name_get_gpuid(const char *name, int *gpuid)
cudaError_t(* cudaSetDevicePtr)(int)
static int util_dylib_cupti_version(void)
CUresult(* cuCtxPushCurrentPtr)(CUcontext pctx)
static int util_dylib_cu_runtime_version(void)
CUresult(* cuDevicePrimaryCtxReleasePtr)(CUdevice)
int cuptic_shutdown(void)
#define CUDA_CALL(call, handleerror)
#define CUDART_CALL(call, handleerror)
#define CUPTI_CALL(call, handleerror)
#define DLSYM_AND_CHECK(dllib, name)
#define CUPTI_PROFILER_API_MIN_SUPPORTED_VERSION
int cuptiu_event_table_get_item(cuptiu_event_table_t *evt_table, int evt_idx, cuptiu_event_t **record)
int cuptiu_files_search_in_path(const char *file_name, const char *search_path, char **file_paths)
#define ERRDBG(format, args...)
#define LOGDBG(format, args...)
#define COMPDBG(format, args...)
unsigned long AO_t __attribute__((__aligned__(4)))
Return codes and api definitions.
#define papi_calloc(a, b)
char name[PAPI_2MAX_STR_LEN]
inline_static int _papi_hwi_lock(int lck)
inline_static int _papi_hwi_unlock(int lck)