PAPI 7.1.0.0
Loading...
Searching...
No Matches
cupti_common.c
Go to the documentation of this file.
1
7#include <dlfcn.h>
8#include <link.h>
9#include <libgen.h>
10#include <papi.h>
11#include "papi_memory.h"
12
13#include "cupti_config.h"
14#include "cupti_common.h"
15
16static void *dl_drv, *dl_rt;
17
20
21unsigned int _cuda_lock;
22
23CUresult ( *cuCtxGetCurrentPtr ) (CUcontext *);
24CUresult ( *cuCtxSetCurrentPtr ) (CUcontext);
25CUresult ( *cuCtxDestroyPtr ) (CUcontext);
26CUresult ( *cuCtxCreatePtr ) (CUcontext *pctx, unsigned int flags, CUdevice dev);
27CUresult ( *cuCtxGetDevicePtr ) (CUdevice *);
28CUresult ( *cuDeviceGetPtr ) (CUdevice *, int);
29CUresult ( *cuDeviceGetCountPtr ) (int *);
30CUresult ( *cuDeviceGetNamePtr ) (char *, int, CUdevice);
31CUresult ( *cuDevicePrimaryCtxRetainPtr ) (CUcontext *pctx, CUdevice);
32CUresult ( *cuDevicePrimaryCtxReleasePtr ) (CUdevice);
33CUresult ( *cuInitPtr ) (unsigned int);
34CUresult ( *cuGetErrorStringPtr ) (CUresult error, const char** pStr);
35CUresult ( *cuCtxPopCurrentPtr ) (CUcontext * pctx);
36CUresult ( *cuCtxPushCurrentPtr ) (CUcontext pctx);
37CUresult ( *cuCtxSynchronizePtr ) ();
38CUresult ( *cuDeviceGetAttributePtr ) (int *, CUdevice_attribute, CUdevice);
39
40cudaError_t ( *cudaGetDeviceCountPtr ) (int *);
41cudaError_t ( *cudaGetDevicePtr ) (int *);
42const char *( *cudaGetErrorStringPtr ) (cudaError_t);
43cudaError_t ( *cudaSetDevicePtr ) (int);
44cudaError_t ( *cudaGetDevicePropertiesPtr ) (struct cudaDeviceProp* prop, int device);
45cudaError_t ( *cudaDeviceGetAttributePtr ) (int *value, enum cudaDeviceAttr attr, int device);
46cudaError_t ( *cudaFreePtr ) (void *);
47cudaError_t ( *cudaDriverGetVersionPtr ) (int *);
48cudaError_t ( *cudaRuntimeGetVersionPtr ) (int *);
49
50CUptiResult ( *cuptiGetVersionPtr ) (uint32_t* );
51
52static int load_cuda_sym(void)
53{
54 dl_drv = dlopen("libcuda.so", RTLD_NOW | RTLD_GLOBAL);
55 if (!dl_drv) {
56 ERRDBG("Loading installed libcuda.so failed. Check that cuda drivers are installed.\n");
57 goto fn_fail;
58 }
59
60 cuCtxSetCurrentPtr = DLSYM_AND_CHECK(dl_drv, "cuCtxSetCurrent");
61 cuCtxGetCurrentPtr = DLSYM_AND_CHECK(dl_drv, "cuCtxGetCurrent");
62 cuCtxDestroyPtr = DLSYM_AND_CHECK(dl_drv, "cuCtxDestroy");
63 cuCtxCreatePtr = DLSYM_AND_CHECK(dl_drv, "cuCtxCreate");
64 cuCtxGetDevicePtr = DLSYM_AND_CHECK(dl_drv, "cuCtxGetDevice");
65 cuDeviceGetPtr = DLSYM_AND_CHECK(dl_drv, "cuDeviceGet");
66 cuDeviceGetCountPtr = DLSYM_AND_CHECK(dl_drv, "cuDeviceGetCount");
67 cuDeviceGetNamePtr = DLSYM_AND_CHECK(dl_drv, "cuDeviceGetName");
68 cuDevicePrimaryCtxRetainPtr = DLSYM_AND_CHECK(dl_drv, "cuDevicePrimaryCtxRetain");
69 cuDevicePrimaryCtxReleasePtr = DLSYM_AND_CHECK(dl_drv, "cuDevicePrimaryCtxRelease");
70 cuInitPtr = DLSYM_AND_CHECK(dl_drv, "cuInit");
71 cuGetErrorStringPtr = DLSYM_AND_CHECK(dl_drv, "cuGetErrorString");
72 cuCtxPopCurrentPtr = DLSYM_AND_CHECK(dl_drv, "cuCtxPopCurrent");
73 cuCtxPushCurrentPtr = DLSYM_AND_CHECK(dl_drv, "cuCtxPushCurrent");
74 cuCtxSynchronizePtr = DLSYM_AND_CHECK(dl_drv, "cuCtxSynchronize");
75 cuDeviceGetAttributePtr = DLSYM_AND_CHECK(dl_drv, "cuDeviceGetAttribute");
76
77 Dl_info info;
78 dladdr(cuCtxSetCurrentPtr, &info);
79 LOGDBG("CUDA driver library loaded from %s\n", info.dli_fname);
80 return PAPI_OK;
81fn_fail:
82 return PAPI_EMISC;
83}
84
85static int unload_cuda_sym(void)
86{
87 if (dl_drv) {
88 dlclose(dl_drv);
89 dl_drv = NULL;
90 }
91 cuCtxSetCurrentPtr = NULL;
92 cuCtxGetCurrentPtr = NULL;
93 cuCtxDestroyPtr = NULL;
94 cuCtxCreatePtr = NULL;
95 cuCtxGetDevicePtr = NULL;
96 cuDeviceGetPtr = NULL;
98 cuDeviceGetNamePtr = NULL;
101 cuInitPtr = NULL;
102 cuGetErrorStringPtr = NULL;
103 cuCtxPopCurrentPtr = NULL;
104 cuCtxPushCurrentPtr = NULL;
105 cuCtxSynchronizePtr = NULL;
107 return PAPI_OK;
108}
109
110void *cuptic_load_dynamic_syms(const char *parent_path, const char *dlname, const char *search_subpaths[])
111{
112 void *dl = NULL;
113 char lookup_path[PATH_MAX];
114 char *found_files[CUPTIU_MAX_FILES];
115 int i, count;
116 for (i = 0; search_subpaths[i] != NULL; i++) {
117 sprintf(lookup_path, search_subpaths[i], parent_path, dlname);
118 dl = dlopen(lookup_path, RTLD_NOW | RTLD_GLOBAL);
119 if (dl) {
120 return dl;
121 }
122 }
123 count = cuptiu_files_search_in_path(dlname, parent_path, found_files);
124 for (i = 0; i < count; i++) {
125 dl = dlopen(found_files[i], RTLD_NOW | RTLD_GLOBAL);
126 if (dl) {
127 break;
128 }
129 }
130 for (i = 0; i < count; i++) {
131 papi_free(found_files[i]);
132 }
133 return dl;
134}
135
136static int load_cudart_sym(void)
137{
138 char dlname[] = "libcudart.so";
139 char lookup_path[PATH_MAX];
140
141 char *papi_cuda_runtime = getenv("PAPI_CUDA_RUNTIME");
142 if (papi_cuda_runtime) {
143 sprintf(lookup_path, "%s/%s", papi_cuda_runtime, dlname);
144 dl_rt = dlopen(lookup_path, RTLD_NOW | RTLD_GLOBAL);
145 }
146
147 const char *standard_paths[] = {
148 "%s/lib64/%s",
149 NULL,
150 };
151
152 if (linked_cudart_path && !dl_rt) {
153 dl_rt = cuptic_load_dynamic_syms(linked_cudart_path, dlname, standard_paths);
154 }
155
156 char *papi_cuda_root = getenv("PAPI_CUDA_ROOT");
157 if (papi_cuda_root && !dl_rt) {
158 dl_rt = cuptic_load_dynamic_syms(papi_cuda_root, dlname, standard_paths);
159 }
160
161 if (!dl_rt) {
162 dl_rt = dlopen(dlname, RTLD_NOW | RTLD_GLOBAL);
163 if (!dl_rt) {
164 ERRDBG("Loading libcudart.so failed. Try setting PAPI_CUDA_ROOT\n");
165 goto fn_fail;
166 }
167 }
168
169 cudaGetDevicePtr = DLSYM_AND_CHECK(dl_rt, "cudaGetDevice");
170 cudaGetDeviceCountPtr = DLSYM_AND_CHECK(dl_rt, "cudaGetDeviceCount");
171 cudaGetDevicePropertiesPtr = DLSYM_AND_CHECK(dl_rt, "cudaGetDeviceProperties");
172 cudaGetErrorStringPtr = DLSYM_AND_CHECK(dl_rt, "cudaGetErrorString");
173 cudaDeviceGetAttributePtr = DLSYM_AND_CHECK(dl_rt, "cudaDeviceGetAttribute");
174 cudaSetDevicePtr = DLSYM_AND_CHECK(dl_rt, "cudaSetDevice");
175 cudaFreePtr = DLSYM_AND_CHECK(dl_rt, "cudaFree");
176 cudaDriverGetVersionPtr = DLSYM_AND_CHECK(dl_rt, "cudaDriverGetVersion");
177 cudaRuntimeGetVersionPtr = DLSYM_AND_CHECK(dl_rt, "cudaRuntimeGetVersion");
178
179 Dl_info info;
180 dladdr(cudaGetDevicePtr, &info);
181 LOGDBG("CUDA runtime library loaded from %s\n", info.dli_fname);
182 return PAPI_OK;
183fn_fail:
184 return PAPI_EMISC;
185}
186
187static int unload_cudart_sym(void)
188{
189 if (dl_rt) {
190 dlclose(dl_rt);
191 dl_rt = NULL;
192 }
193 cudaGetDevicePtr = NULL;
198 cudaSetDevicePtr = NULL;
199 cudaFreePtr = NULL;
202 return PAPI_OK;
203}
204
205static int load_cupti_common_sym(void)
206{
207 char dlname[] = "libcupti.so";
208 char lookup_path[PATH_MAX];
209
210 char *papi_cuda_cupti = getenv("PAPI_CUDA_CUPTI");
211 if (papi_cuda_cupti) {
212 sprintf(lookup_path, "%s/%s", papi_cuda_cupti, dlname);
213 dl_cupti = dlopen(lookup_path, RTLD_NOW | RTLD_GLOBAL);
214 }
215
216 const char *standard_paths[] = {
217 "%s/extras/CUPTI/lib64/%s",
218 "%s/lib64/%s",
219 NULL,
220 };
221
223 dl_cupti = cuptic_load_dynamic_syms(linked_cudart_path, dlname, standard_paths);
224 }
225
226 char *papi_cuda_root = getenv("PAPI_CUDA_ROOT");
227 if (papi_cuda_root && !dl_cupti) {
228 dl_cupti = cuptic_load_dynamic_syms(papi_cuda_root, dlname, standard_paths);
229 }
230
231 if (!dl_cupti) {
232 dl_cupti = dlopen(dlname, RTLD_NOW | RTLD_GLOBAL);
233 if (!dl_cupti) {
234 ERRDBG("Loading libcupti.so failed. Try setting PAPI_CUDA_ROOT\n");
235 goto fn_fail;
236 }
237 }
238
239 cuptiGetVersionPtr = DLSYM_AND_CHECK(dl_cupti, "cuptiGetVersion");
240
241 Dl_info info;
242 dladdr(cuptiGetVersionPtr, &info);
243 LOGDBG("CUPTI library loaded from %s\n", info.dli_fname);
244 return PAPI_OK;
245fn_fail:
246 return PAPI_EMISC;
247}
248
250{
251 if (dl_cupti) {
252 dlclose(dl_cupti);
253 dl_cupti = NULL;
254 }
255 cuptiGetVersionPtr = NULL;
256 return PAPI_OK;
257}
258
259static int util_load_cuda_sym(void)
260{
261 int papi_errno;
262 papi_errno = load_cuda_sym();
263 papi_errno += load_cudart_sym();
264 papi_errno += load_cupti_common_sym();
265 if (papi_errno != PAPI_OK) {
266 return PAPI_EMISC;
267 }
268 else
269 return PAPI_OK;
270}
271
273{
274 if (linked_cudart_path) {
276 linked_cudart_path = NULL;
277 }
278}
279
281{
286 return PAPI_OK;
287}
288
290{
291 int runtimeVersion;
292 CUDART_CALL(cudaRuntimeGetVersionPtr(&runtimeVersion), return PAPI_EMISC );
293 return runtimeVersion;
294}
295
297{
298 unsigned int cuptiVersion;
299 CUPTI_CALL(cuptiGetVersionPtr(&cuptiVersion), return PAPI_EMISC );
300 return cuptiVersion;
301}
302
304{
305 cudaError_t cuda_errno = cudaGetDeviceCountPtr(num_gpus);
306 if (cuda_errno != cudaSuccess) {
308 return PAPI_EMISC;
309 }
310 return PAPI_OK;
311}
312
313static int get_gpu_compute_capability(int dev_num, int *cc)
314{
315 int cc_major, cc_minor;
316 cudaError_t cuda_errno;
317 cuda_errno = cudaDeviceGetAttributePtr(&cc_major, cudaDevAttrComputeCapabilityMajor, dev_num);
318 if (cuda_errno != cudaSuccess) {
320 return PAPI_EMISC;
321 }
322 cuda_errno = cudaDeviceGetAttributePtr(&cc_minor, cudaDevAttrComputeCapabilityMinor, dev_num);
323 if (cuda_errno != cudaSuccess) {
325 return PAPI_EMISC;
326 }
327 *cc = cc_major * 10 + cc_minor;
328 return PAPI_OK;
329}
330
332
334{
335 int papi_errno = PAPI_OK;
337 if (kind != GPU_COLLECTION_UNKNOWN) {
338 goto fn_exit;
339 }
340
341 int total_gpus;
342 papi_errno = cuptic_device_get_count(&total_gpus);
343 if (papi_errno != PAPI_OK) {
344 goto fn_exit;
345 }
346
347 int i, cc;
348 int count_perf = 0, count_evt = 0, count_cc70 = 0;
349 for (i=0; i<total_gpus; i++) {
350 papi_errno = get_gpu_compute_capability(i, &cc);
351 if (papi_errno != PAPI_OK) {
352 return papi_errno;
353 }
354 if (cc == 70) {
355 ++count_cc70;
356 }
357 if (cc >= 70) {
358 ++count_perf;
359 }
360 if (cc <= 70) {
361 ++count_evt;
362 }
363 }
364 if (count_cc70 == total_gpus) {
366 goto fn_exit;
367 }
368 if (count_perf == total_gpus) {
370 goto fn_exit;
371 }
372 if (count_evt == total_gpus) {
374 goto fn_exit;
375 }
377
378fn_exit:
379 *coll_kind = kind;
380 return papi_errno;
381}
382
384
385void cuptic_disabled_reason_set(const char *msg)
386{
388}
389
390void cuptic_disabled_reason_get(const char **pmsg)
391{
393}
394
395static int dl_iterate_phdr_cb(struct dl_phdr_info *info, __attribute__((unused)) size_t size, __attribute__((unused)) void *data)
396{
397 const char *library_name = "libcudart.so";
398 char *library_path = strdup(info->dlpi_name);
399
400 if (library_path != NULL && strstr(library_path, library_name) != NULL) {
401 linked_cudart_path = strdup(dirname(dirname((char *) library_path)));
402 }
403
404 free(library_path);
405 return PAPI_OK;
406}
407
408static int get_user_cudart_path(void)
409{
410 dl_iterate_phdr(dl_iterate_phdr_cb, NULL);
411 if (NULL == linked_cudart_path) {
412 return PAPI_EMISC;
413 }
414 return PAPI_OK;
415}
416
417int cuptic_init(void)
418{
419 int papi_errno = get_user_cudart_path();
420 if (papi_errno == PAPI_OK) {
421 LOGDBG("Linked cudart root: %s\n", linked_cudart_path);
422 }
423 else {
424 LOGDBG("Target application not linked with cuda runtime libraries.\n");
425 }
426 papi_errno = util_load_cuda_sym();
427 if (papi_errno != PAPI_OK) {
428 cuptic_disabled_reason_set("Unable to load CUDA library functions.");
429 goto fn_exit;
430 }
431
432 gpu_collection_e kind;
433 papi_errno = util_gpu_collection_kind(&kind);
434 if (papi_errno != PAPI_OK) {
435 goto fn_exit;
436 }
437
438 if (kind == GPU_COLLECTION_MIXED) {
439 cuptic_disabled_reason_set("No support for systems with mixed compute capabilities, such as CC < 7.0 and CC > 7.0 GPUS.");
440 papi_errno = PAPI_ECMP;
441 goto fn_exit;
442 }
443fn_exit:
444 return papi_errno;
445}
446
448{
449 static int is_perfworks_api = -1;
450 if (is_perfworks_api != -1) {
451 goto fn_exit;
452 }
453 char *papi_cuda_110_cc70_perfworks_api = getenv("PAPI_CUDA_110_CC_70_PERFWORKS_API");
454
455 gpu_collection_e gpus_kind;
456 int papi_errno = util_gpu_collection_kind(&gpus_kind);
457 if (papi_errno != PAPI_OK) {
458 goto fn_exit;
459 }
460
461 unsigned int cuptiVersion = util_dylib_cupti_version();
462
463 if (gpus_kind == GPU_COLLECTION_ALL_CC70 &&
465 {
466 if (papi_cuda_110_cc70_perfworks_api != NULL) {
467 is_perfworks_api = 1;
468 goto fn_exit;
469 }
470 else {
471 is_perfworks_api = 0;
472 goto fn_exit;
473 }
474 }
475
476 if ((gpus_kind == GPU_COLLECTION_ALL_PERF || gpus_kind == GPU_COLLECTION_ALL_CC70) && cuptiVersion >= CUPTI_PROFILER_API_MIN_SUPPORTED_VERSION) {
477 is_perfworks_api = 1;
478 goto fn_exit;
479 } else {
480 is_perfworks_api = 0;
481 goto fn_exit;
482 }
483
484fn_exit:
485 return is_perfworks_api;
486}
487
489{
490 static int is_events_api = -1;
491 if (is_events_api != -1) {
492 goto fn_exit;
493 }
494
495 gpu_collection_e gpus_kind;
496 int papi_errno = util_gpu_collection_kind(&gpus_kind);
497 if (papi_errno != PAPI_OK) {
498 goto fn_exit;
499 }
500
501 /*
502 * See cupti_config.h: When NVIDIA removes the events API add a check in the following condition
503 * to check the `util_dylib_cupti_version()` is also <= CUPTI_EVENTS_API_MAX_SUPPORTED_VERSION.
504 */
505 if ((gpus_kind == GPU_COLLECTION_ALL_EVENTS || gpus_kind == GPU_COLLECTION_ALL_CC70)) {
506 is_events_api = 1;
507 goto fn_exit;
508 } else {
509 is_events_api = 0;
510 goto fn_exit;
511 }
512fn_exit:
513 return is_events_api;
514}
515
517 CUcontext ctx;
518};
519
520int cuptic_ctxarr_create(cuptic_info_t *pinfo)
521{
522 COMPDBG("Entering.\n");
523 int total_gpus;
524 int papi_errno = cuptic_device_get_count(&total_gpus);
525 if (papi_errno != PAPI_OK) {
526 return PAPI_EMISC;
527 }
528 cuptic_info_t cuCtx = (cuptic_info_t) papi_calloc (total_gpus, sizeof(*pinfo));
529 if (cuCtx == NULL) {
530 return PAPI_ENOMEM;
531 }
532 *pinfo = cuCtx;
533 return PAPI_OK;
534}
535
536int cuptic_ctxarr_update_current(cuptic_info_t info)
537{
538 int papi_errno, gpu_id;
539 CUcontext tempCtx;
540 papi_errno = cudaGetDevicePtr(&gpu_id);
541 if (papi_errno != cudaSuccess) {
542 return PAPI_EMISC;
543 }
544 papi_errno = cuCtxGetCurrentPtr(&tempCtx);
545 if (papi_errno != CUDA_SUCCESS) {
546 return PAPI_EMISC;
547 }
548 if (info[gpu_id].ctx == NULL) {
549 if (tempCtx != NULL) {
550 LOGDBG("Registering device = %d with ctx = %p.\n", gpu_id, tempCtx);
551 CUDA_CALL(cuCtxGetCurrentPtr(&info[gpu_id].ctx), return PAPI_EMISC);
552 }
553 else {
554 CUDART_CALL(cudaFreePtr(NULL), return PAPI_EMISC);
555 CUDA_CALL(cuCtxGetCurrentPtr(&info[gpu_id].ctx), return PAPI_EMISC);
556 LOGDBG("Using primary device context %p for device %d.\n", info[gpu_id].ctx, gpu_id);
557 }
558 }
559 /* If context has changed keep the first seen one but with warning */
560 else if (info[gpu_id].ctx != tempCtx) {
561 ERRDBG("Warning: cuda context for gpu %d has changed from %p to %p\n", gpu_id, info[gpu_id].ctx, tempCtx);
562 }
563 return PAPI_OK;
564}
565
566int cuptic_ctxarr_get_ctx(cuptic_info_t info, int gpu_idx, CUcontext *ctx)
567{
568 *ctx = info[gpu_idx].ctx;
569 return PAPI_OK;
570}
571
572int cuptic_ctxarr_destroy(cuptic_info_t *pinfo)
573{
574 papi_free(*pinfo);
575 *pinfo = NULL;
576 return PAPI_OK;
577}
578
579/* Functions based on bitmasking to detect gpu exclusivity */
580typedef int64_t gpu_occupancy_t;
582
583static int event_name_get_gpuid(const char *name, int *gpuid)
584{
585 int papi_errno = PAPI_OK;
586 char *token;
587 char *copy = strdup(name);
588
589 token = strtok(copy, "=");
590 if (token == NULL) {
591 goto fn_fail;
592 }
593 token = strtok(NULL, "\0");
594 if (token == NULL) {
595 goto fn_fail;
596 }
597 *gpuid = strtol(token, NULL, 10);
598
599fn_exit:
600 papi_free(copy);
601 return papi_errno;
602fn_fail:
603 papi_errno = PAPI_EINVAL;
604 goto fn_exit;
605}
606
608{
609 int papi_errno = PAPI_OK, gpu_id;
610 long i;
611 gpu_occupancy_t acq_mask = 0;
612 cuptiu_event_t *evt_rec;
613 for (i = 0; i < evt_table->count; i++) {
614 papi_errno = cuptiu_event_table_get_item(evt_table, i, (cuptiu_event_t **) &evt_rec);
615 if (papi_errno != PAPI_OK) {
616 goto fn_exit;
617 }
618 papi_errno = event_name_get_gpuid(evt_rec->name, &gpu_id);
619 if (papi_errno != PAPI_OK) {
620 goto fn_exit;
621 }
622 acq_mask |= (1 << gpu_id);
623 }
624 *bitmask = acq_mask;
625fn_exit:
626 return papi_errno;
627}
628
630{
631 gpu_occupancy_t bitmask;
632 int papi_errno = _devmask_events_get(evt_table, &bitmask);
633 if (papi_errno != PAPI_OK)
634 return papi_errno;
635 if (bitmask & global_gpu_bitmask) {
636 return PAPI_ECNFLCT;
637 }
639 global_gpu_bitmask |= bitmask;
641 return PAPI_OK;
642}
643
645{
646 gpu_occupancy_t bitmask;
647 int papi_errno = _devmask_events_get(evt_table, &bitmask);
648 if (papi_errno != PAPI_OK) {
649 return papi_errno;
650 }
651 if ((bitmask & global_gpu_bitmask) != bitmask) {
652 return PAPI_EMISC;
653 }
655 global_gpu_bitmask ^= bitmask;
657 return PAPI_OK;
658}
int i
static long count
gpu_collection_e
Definition: cupti_common.c:331
@ GPU_COLLECTION_ALL_CC70
Definition: cupti_common.c:331
@ GPU_COLLECTION_ALL_PERF
Definition: cupti_common.c:331
@ GPU_COLLECTION_ALL_EVENTS
Definition: cupti_common.c:331
@ GPU_COLLECTION_MIXED
Definition: cupti_common.c:331
@ GPU_COLLECTION_UNKNOWN
Definition: cupti_common.c:331
CUresult(* cuCtxGetDevicePtr)(CUdevice *)
Definition: cupti_common.c:27
void * cuptic_load_dynamic_syms(const char *parent_path, const char *dlname, const char *search_subpaths[])
Definition: cupti_common.c:110
CUresult(* cuGetErrorStringPtr)(CUresult error, const char **pStr)
Definition: cupti_common.c:34
void cuptic_disabled_reason_set(const char *msg)
Definition: cupti_common.c:385
static int unload_cupti_common_sym(void)
Definition: cupti_common.c:249
int cuptic_is_runtime_perfworks_api(void)
Definition: cupti_common.c:447
cudaError_t(* cudaFreePtr)(void *)
Definition: cupti_common.c:46
void cuptic_disabled_reason_get(const char **pmsg)
Definition: cupti_common.c:390
CUresult(* cuDeviceGetCountPtr)(int *)
Definition: cupti_common.c:29
CUresult(* cuCtxSetCurrentPtr)(CUcontext)
Definition: cupti_common.c:24
int cuptic_ctxarr_create(cuptic_info_t *pinfo)
Definition: cupti_common.c:520
int cuptic_device_release(cuptiu_event_table_t *evt_table)
Definition: cupti_common.c:644
cudaError_t(* cudaDriverGetVersionPtr)(int *)
Definition: cupti_common.c:47
CUresult(* cuDeviceGetAttributePtr)(int *, CUdevice_attribute, CUdevice)
Definition: cupti_common.c:38
static int util_gpu_collection_kind(gpu_collection_e *coll_kind)
Definition: cupti_common.c:333
CUresult(* cuDevicePrimaryCtxRetainPtr)(CUcontext *pctx, CUdevice)
Definition: cupti_common.c:31
static void * dl_rt
Definition: cupti_common.c:16
CUptiResult(* cuptiGetVersionPtr)(uint32_t *)
Definition: cupti_common.c:50
int cuptic_ctxarr_get_ctx(cuptic_info_t info, int gpu_idx, CUcontext *ctx)
Definition: cupti_common.c:566
int cuptic_ctxarr_destroy(cuptic_info_t *pinfo)
Definition: cupti_common.c:572
int cuptic_is_runtime_events_api(void)
Definition: cupti_common.c:488
static void * dl_drv
Definition: cupti_common.c:16
static int load_cupti_common_sym(void)
Definition: cupti_common.c:205
CUresult(* cuDeviceGetPtr)(CUdevice *, int)
Definition: cupti_common.c:28
static int _devmask_events_get(cuptiu_event_table_t *evt_table, gpu_occupancy_t *bitmask)
Definition: cupti_common.c:607
static int util_load_cuda_sym(void)
Definition: cupti_common.c:259
CUresult(* cuCtxPopCurrentPtr)(CUcontext *pctx)
Definition: cupti_common.c:35
static int unload_cudart_sym(void)
Definition: cupti_common.c:187
void * dl_cupti
Definition: cupti_common.c:19
int cuptic_ctxarr_update_current(cuptic_info_t info)
Definition: cupti_common.c:536
static int unload_cuda_sym(void)
Definition: cupti_common.c:85
const char *(* cudaGetErrorStringPtr)(cudaError_t)
Definition: cupti_common.c:42
int cuptic_device_get_count(int *num_gpus)
Definition: cupti_common.c:303
CUresult(* cuCtxCreatePtr)(CUcontext *pctx, unsigned int flags, CUdevice dev)
Definition: cupti_common.c:26
unsigned int _cuda_lock
Definition: cupti_common.c:21
static int load_cudart_sym(void)
Definition: cupti_common.c:136
CUresult(* cuDeviceGetNamePtr)(char *, int, CUdevice)
Definition: cupti_common.c:30
static int get_user_cudart_path(void)
Definition: cupti_common.c:408
CUresult(* cuCtxDestroyPtr)(CUcontext)
Definition: cupti_common.c:25
cudaError_t(* cudaGetDeviceCountPtr)(int *)
Definition: cupti_common.c:40
cudaError_t(* cudaDeviceGetAttributePtr)(int *value, enum cudaDeviceAttr attr, int device)
Definition: cupti_common.c:45
const char * cuptic_disabled_reason_g
Definition: cupti_common.c:383
int cuptic_init(void)
Definition: cupti_common.c:417
static gpu_occupancy_t global_gpu_bitmask
Definition: cupti_common.c:581
const char * linked_cudart_path
Definition: cupti_common.c:18
CUresult(* cuCtxGetCurrentPtr)(CUcontext *)
Definition: cupti_common.c:23
static int load_cuda_sym(void)
Definition: cupti_common.c:52
static int dl_iterate_phdr_cb(struct dl_phdr_info *info, __attribute__((unused)) size_t size, __attribute__((unused)) void *data)
Definition: cupti_common.c:395
int64_t gpu_occupancy_t
Definition: cupti_common.c:580
static void unload_linked_cudart_path(void)
Definition: cupti_common.c:272
cudaError_t(* cudaGetDevicePropertiesPtr)(struct cudaDeviceProp *prop, int device)
Definition: cupti_common.c:44
CUresult(* cuInitPtr)(unsigned int)
Definition: cupti_common.c:33
cudaError_t(* cudaRuntimeGetVersionPtr)(int *)
Definition: cupti_common.c:48
cudaError_t(* cudaGetDevicePtr)(int *)
Definition: cupti_common.c:41
int cuptic_device_acquire(cuptiu_event_table_t *evt_table)
Definition: cupti_common.c:629
CUresult(* cuCtxSynchronizePtr)()
Definition: cupti_common.c:37
static int get_gpu_compute_capability(int dev_num, int *cc)
Definition: cupti_common.c:313
static int event_name_get_gpuid(const char *name, int *gpuid)
Definition: cupti_common.c:583
cudaError_t(* cudaSetDevicePtr)(int)
Definition: cupti_common.c:43
static int util_dylib_cupti_version(void)
Definition: cupti_common.c:296
CUresult(* cuCtxPushCurrentPtr)(CUcontext pctx)
Definition: cupti_common.c:36
static int util_dylib_cu_runtime_version(void)
Definition: cupti_common.c:289
CUresult(* cuDevicePrimaryCtxReleasePtr)(CUdevice)
Definition: cupti_common.c:32
int cuptic_shutdown(void)
Definition: cupti_common.c:280
#define CUDA_CALL(call, handleerror)
Definition: cupti_common.h:58
#define CUDART_CALL(call, handleerror)
Definition: cupti_common.h:68
#define CUPTI_CALL(call, handleerror)
Definition: cupti_common.h:78
#define DLSYM_AND_CHECK(dllib, name)
Definition: cupti_common.h:52
#define CUPTI_PROFILER_API_MIN_SUPPORTED_VERSION
Definition: cupti_config.h:12
static int num_gpus
int cuptiu_event_table_get_item(cuptiu_event_table_t *evt_table, int evt_idx, cuptiu_event_t **record)
Definition: cupti_utils.c:47
int cuptiu_files_search_in_path(const char *file_name, const char *search_path, char **file_paths)
Definition: cupti_utils.c:176
#define CUPTIU_MAX_FILES
Definition: cupti_utils.h:38
#define PAPI_OK
Definition: f90papi.h:73
#define PAPI_ECNFLCT
Definition: f90papi.h:234
#define PAPI_EINVAL
Definition: f90papi.h:115
#define PAPI_EMISC
Definition: f90papi.h:122
#define PAPI_ECMP
Definition: f90papi.h:214
#define PAPI_ENOMEM
Definition: f90papi.h:16
#define ERRDBG(format, args...)
Definition: lcuda_debug.h:30
#define LOGDBG(format, args...)
Definition: lcuda_debug.h:24
#define COMPDBG(format, args...)
Definition: lcuda_debug.h:21
unsigned long AO_t __attribute__((__aligned__(4)))
Definition: m68k.h:21
Return codes and api definitions.
#define papi_calloc(a, b)
Definition: papi_memory.h:37
#define papi_free(a)
Definition: papi_memory.h:35
const char * name
Definition: rocs.c:225
int
Definition: sde_internal.h:89
CUcontext ctx
Definition: cupti_common.c:517
char name[PAPI_2MAX_STR_LEN]
Definition: cupti_utils.h:13
unsigned int count
Definition: cupti_utils.h:22
inline_static int _papi_hwi_lock(int lck)
Definition: threads.h:69
inline_static int _papi_hwi_unlock(int lck)
Definition: threads.h:83