PAPI 7.1.0.0
Loading...
Searching...
No Matches
cupti_profiler.c
Go to the documentation of this file.
1
7#include <dlfcn.h>
8#include <papi.h>
9#include "papi_memory.h"
10
11#include <cupti_target.h>
12#include <cupti_profiler_target.h>
13#include <nvperf_host.h>
14#include <nvperf_cuda_host.h>
15#include <nvperf_target.h>
16
17#include "cupti_common.h"
18#include "cupti_profiler.h"
19#include "lcuda_debug.h"
20
21typedef struct byte_array_s byte_array_t;
22typedef struct cuptip_gpu_state_s cuptip_gpu_state_t;
23typedef struct list_metrics_s list_metrics_t;
25typedef NVPW_CUDA_MetricsContext_Create_Params MCCP_t;
28
29static void *dl_nvpw;
30static int num_gpus;
31static list_metrics_t *avail_events;
32
33static int load_cupti_perf_sym(void);
34static int unload_cupti_perf_sym(void);
35static int load_nvpw_sym(void);
36static int unload_nvpw_sym(void);
37static int initialize_cupti_profiler_api(void);
38static int finalize_cupti_profiler_api(void);
39static int initialize_perfworks_api(void);
40static int get_chip_name(int dev_num, char* chipName);
41static int init_all_metrics(void);
42static int find_same_chipname(int gpu_id);
43static void free_all_enumerated_metrics(void);
44static int event_name_tokenize(const char *name, char *nv_name, int *gpuid);
45static int retrieve_metric_details(NVPA_MetricsContext *pMetricsContext, const char *nv_name,
46 char *description, int *numDep, NVPA_RawMetricRequest **pRMR);
47static int check_num_passes(struct NVPA_RawMetricsConfig *pRawMetricsConfig, int rmr_count,
48 NVPA_RawMetricRequest *rmr, int *num_pass);
50
51static int nvpw_cuda_metricscontext_create(cuptip_control_t state);
52static int nvpw_cuda_metricscontext_destroy(cuptip_control_t state);
53static int add_events_per_gpu(cuptip_control_t state, cuptiu_event_table_t *event_names);
54static int control_state_validate(cuptip_control_t state);
55
56static int get_event_names_rmr(cuptip_gpu_state_t *gpu_ctl);
57static int get_counter_availability(cuptip_gpu_state_t *gpu_ctl);
58static int metric_get_config_image(cuptip_gpu_state_t *gpu_ctl);
59static int metric_get_counter_data_prefix_image(cuptip_gpu_state_t *gpu_ctl);
60static int create_counter_data_image(cuptip_gpu_state_t *gpu_ctl);
61static int reset_cupti_prof_config_images(cuptip_gpu_state_t *gpu_ctl);
62static int begin_profiling(cuptip_gpu_state_t *gpu_ctl);
63static int end_profiling(cuptip_gpu_state_t *gpu_ctl);
64static int get_measured_values(cuptip_gpu_state_t *gpu_ctl);
65
66NVPA_Status ( *NVPW_GetSupportedChipNamesPtr ) (NVPW_GetSupportedChipNames_Params* params);
67NVPA_Status ( *NVPW_CUDA_MetricsContext_CreatePtr ) (NVPW_CUDA_MetricsContext_Create_Params* params);
68NVPA_Status ( *NVPW_MetricsContext_DestroyPtr ) (NVPW_MetricsContext_Destroy_Params * params);
69NVPA_Status ( *NVPW_MetricsContext_GetMetricNames_BeginPtr ) (NVPW_MetricsContext_GetMetricNames_Begin_Params* params);
70NVPA_Status ( *NVPW_MetricsContext_GetMetricNames_EndPtr ) (NVPW_MetricsContext_GetMetricNames_End_Params* params);
71NVPA_Status ( *NVPW_InitializeHostPtr ) (NVPW_InitializeHost_Params* params);
72NVPA_Status ( *NVPW_MetricsContext_GetMetricProperties_BeginPtr ) (NVPW_MetricsContext_GetMetricProperties_Begin_Params* p);
73NVPA_Status ( *NVPW_MetricsContext_GetMetricProperties_EndPtr ) (NVPW_MetricsContext_GetMetricProperties_End_Params* p);
74NVPA_Status ( *NVPW_CUDA_RawMetricsConfig_CreatePtr ) (NVPW_CUDA_RawMetricsConfig_Create_Params*);
75
76NVPA_Status ( *NVPW_RawMetricsConfig_DestroyPtr ) (NVPW_RawMetricsConfig_Destroy_Params* params);
77NVPA_Status ( *NVPW_RawMetricsConfig_BeginPassGroupPtr ) (NVPW_RawMetricsConfig_BeginPassGroup_Params* params);
78NVPA_Status ( *NVPW_RawMetricsConfig_EndPassGroupPtr ) (NVPW_RawMetricsConfig_EndPassGroup_Params* params);
79NVPA_Status ( *NVPW_RawMetricsConfig_AddMetricsPtr ) (NVPW_RawMetricsConfig_AddMetrics_Params* params);
80NVPA_Status ( *NVPW_RawMetricsConfig_GenerateConfigImagePtr ) (NVPW_RawMetricsConfig_GenerateConfigImage_Params* params);
81NVPA_Status ( *NVPW_RawMetricsConfig_GetConfigImagePtr ) (NVPW_RawMetricsConfig_GetConfigImage_Params* params);
82NVPA_Status ( *NVPW_CounterDataBuilder_CreatePtr ) (NVPW_CounterDataBuilder_Create_Params* params);
83NVPA_Status ( *NVPW_CounterDataBuilder_DestroyPtr ) (NVPW_CounterDataBuilder_Destroy_Params* params);
84NVPA_Status ( *NVPW_CounterDataBuilder_AddMetricsPtr ) (NVPW_CounterDataBuilder_AddMetrics_Params* params);
85NVPA_Status ( *NVPW_CounterDataBuilder_GetCounterDataPrefixPtr ) (NVPW_CounterDataBuilder_GetCounterDataPrefix_Params* params);
86NVPA_Status ( *NVPW_CounterData_GetNumRangesPtr ) (NVPW_CounterData_GetNumRanges_Params* params);
87NVPA_Status ( *NVPW_Profiler_CounterData_GetRangeDescriptionsPtr ) (NVPW_Profiler_CounterData_GetRangeDescriptions_Params* params);
88NVPA_Status ( *NVPW_MetricsContext_SetCounterDataPtr ) (NVPW_MetricsContext_SetCounterData_Params* params);
89NVPA_Status ( *NVPW_MetricsContext_EvaluateToGpuValuesPtr ) (NVPW_MetricsContext_EvaluateToGpuValues_Params* params);
90NVPA_Status ( *NVPW_RawMetricsConfig_GetNumPassesPtr ) (NVPW_RawMetricsConfig_GetNumPasses_Params* params);
91NVPA_Status ( *NVPW_RawMetricsConfig_SetCounterAvailabilityPtr ) (NVPW_RawMetricsConfig_SetCounterAvailability_Params* params);
92NVPA_Status ( *NVPW_RawMetricsConfig_IsAddMetricsPossiblePtr ) (NVPW_RawMetricsConfig_IsAddMetricsPossible_Params* params);
93
94NVPA_Status ( *NVPW_MetricsContext_GetCounterNames_BeginPtr ) (NVPW_MetricsContext_GetCounterNames_Begin_Params* pParams);
95NVPA_Status ( *NVPW_MetricsContext_GetCounterNames_EndPtr ) (NVPW_MetricsContext_GetCounterNames_End_Params* pParams);
96
97CUptiResult ( *cuptiDeviceGetChipNamePtr ) (CUpti_Device_GetChipName_Params* params);
98CUptiResult ( *cuptiProfilerInitializePtr ) (CUpti_Profiler_Initialize_Params* params);
99CUptiResult ( *cuptiProfilerDeInitializePtr ) (CUpti_Profiler_DeInitialize_Params* params);
100CUptiResult ( *cuptiProfilerCounterDataImageCalculateSizePtr ) (CUpti_Profiler_CounterDataImage_CalculateSize_Params* params);
101CUptiResult ( *cuptiProfilerCounterDataImageInitializePtr ) (CUpti_Profiler_CounterDataImage_Initialize_Params* params);
102CUptiResult ( *cuptiProfilerCounterDataImageCalculateScratchBufferSizePtr ) (CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params* params);
103CUptiResult ( *cuptiProfilerCounterDataImageInitializeScratchBufferPtr ) (CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params* params);
104CUptiResult ( *cuptiProfilerBeginSessionPtr ) (CUpti_Profiler_BeginSession_Params* params);
105CUptiResult ( *cuptiProfilerSetConfigPtr ) (CUpti_Profiler_SetConfig_Params* params);
106CUptiResult ( *cuptiProfilerBeginPassPtr ) (CUpti_Profiler_BeginPass_Params* params);
107CUptiResult ( *cuptiProfilerEnableProfilingPtr ) (CUpti_Profiler_EnableProfiling_Params* params);
108CUptiResult ( *cuptiProfilerPushRangePtr ) (CUpti_Profiler_PushRange_Params* params);
109CUptiResult ( *cuptiProfilerPopRangePtr ) (CUpti_Profiler_PopRange_Params* params);
110CUptiResult ( *cuptiProfilerDisableProfilingPtr ) (CUpti_Profiler_DisableProfiling_Params* params);
111CUptiResult ( *cuptiProfilerEndPassPtr ) (CUpti_Profiler_EndPass_Params* params);
112CUptiResult ( *cuptiProfilerFlushCounterDataPtr ) (CUpti_Profiler_FlushCounterData_Params* params);
113CUptiResult ( *cuptiProfilerUnsetConfigPtr ) (CUpti_Profiler_UnsetConfig_Params* params);
114CUptiResult ( *cuptiProfilerEndSessionPtr ) (CUpti_Profiler_EndSession_Params* params);
115CUptiResult ( *cuptiProfilerGetCounterAvailabilityPtr ) (CUpti_Profiler_GetCounterAvailability_Params* params);
116CUptiResult ( *cuptiFinalizePtr ) (void);
117
118#define NVPW_CALL( call, handleerror ) \
119 do { \
120 NVPA_Status _status = (call); \
121 LOGCUPTICALL("\t" #call "\n"); \
122 if (_status != NVPA_STATUS_SUCCESS) { \
123 ERRDBG("NVPA Error %d: Error in call to " #call "\n", _status); \
124 EXIT_OR_NOT; \
125 handleerror; \
126 } \
127 } while (0);
128
129static int load_cupti_perf_sym(void)
130{
131 COMPDBG("Entering.\n");
132 int papi_errno = PAPI_OK;
133 if (dl_cupti == NULL) {
134 ERRDBG("libcupti.so should already be loaded.\n");
135 goto fn_fail;
136 }
137
138 cuptiDeviceGetChipNamePtr = DLSYM_AND_CHECK(dl_cupti, "cuptiDeviceGetChipName");
139 cuptiProfilerInitializePtr = DLSYM_AND_CHECK(dl_cupti, "cuptiProfilerInitialize");
140 cuptiProfilerDeInitializePtr = DLSYM_AND_CHECK(dl_cupti, "cuptiProfilerDeInitialize");
141 cuptiProfilerCounterDataImageCalculateSizePtr = DLSYM_AND_CHECK(dl_cupti, "cuptiProfilerCounterDataImageCalculateSize");
142 cuptiProfilerCounterDataImageInitializePtr = DLSYM_AND_CHECK(dl_cupti, "cuptiProfilerCounterDataImageInitialize");
143 cuptiProfilerCounterDataImageCalculateScratchBufferSizePtr = DLSYM_AND_CHECK(dl_cupti, "cuptiProfilerCounterDataImageCalculateScratchBufferSize");
144 cuptiProfilerCounterDataImageInitializeScratchBufferPtr = DLSYM_AND_CHECK(dl_cupti, "cuptiProfilerCounterDataImageInitializeScratchBuffer");
145 cuptiProfilerBeginSessionPtr = DLSYM_AND_CHECK(dl_cupti, "cuptiProfilerBeginSession");
146 cuptiProfilerSetConfigPtr = DLSYM_AND_CHECK(dl_cupti, "cuptiProfilerSetConfig");
147 cuptiProfilerBeginPassPtr = DLSYM_AND_CHECK(dl_cupti, "cuptiProfilerBeginPass");
148 cuptiProfilerEnableProfilingPtr = DLSYM_AND_CHECK(dl_cupti, "cuptiProfilerEnableProfiling");
149 cuptiProfilerPushRangePtr = DLSYM_AND_CHECK(dl_cupti, "cuptiProfilerPushRange");
150 cuptiProfilerPopRangePtr = DLSYM_AND_CHECK(dl_cupti, "cuptiProfilerPopRange");
151 cuptiProfilerDisableProfilingPtr = DLSYM_AND_CHECK(dl_cupti, "cuptiProfilerDisableProfiling");
152 cuptiProfilerEndPassPtr = DLSYM_AND_CHECK(dl_cupti, "cuptiProfilerEndPass");
153 cuptiProfilerFlushCounterDataPtr = DLSYM_AND_CHECK(dl_cupti, "cuptiProfilerFlushCounterData");
154 cuptiProfilerUnsetConfigPtr = DLSYM_AND_CHECK(dl_cupti, "cuptiProfilerUnsetConfig");
155 cuptiProfilerEndSessionPtr = DLSYM_AND_CHECK(dl_cupti, "cuptiProfilerEndSession");
156 cuptiProfilerGetCounterAvailabilityPtr = DLSYM_AND_CHECK(dl_cupti, "cuptiProfilerGetCounterAvailability");
157 cuptiFinalizePtr = DLSYM_AND_CHECK(dl_cupti, "cuptiFinalize");
158
159fn_exit:
160 return papi_errno;
161fn_fail:
162 papi_errno = PAPI_EMISC;
163 goto fn_exit;
164}
165
166static int unload_cupti_perf_sym(void)
167{
168 if (dl_cupti) {
169 dlclose(dl_cupti);
170 dl_cupti = NULL;
171 }
191 cuptiFinalizePtr = NULL;
192 return PAPI_OK;
193}
194
195static int load_nvpw_sym(void)
196{
197 COMPDBG("Entering.\n");
198 char dlname[] = "libnvperf_host.so";
199 char lookup_path[PATH_MAX];
200
201 char *papi_cuda_perfworks = getenv("PAPI_CUDA_PERFWORKS");
202 if (papi_cuda_perfworks) {
203 sprintf(lookup_path, "%s/%s", papi_cuda_perfworks, dlname);
204 dl_nvpw = dlopen(lookup_path, RTLD_NOW | RTLD_GLOBAL);
205 }
206
207 const char *standard_paths[] = {
208 "%s/extras/CUPTI/lib64/%s",
209 "%s/lib64/%s",
210 NULL,
211 };
212
213 if (linked_cudart_path && !dl_nvpw) {
214 dl_nvpw = cuptic_load_dynamic_syms(linked_cudart_path, dlname, standard_paths);
215 }
216
217 char *papi_cuda_root = getenv("PAPI_CUDA_ROOT");
218 if (papi_cuda_root && !dl_nvpw) {
219 dl_nvpw = cuptic_load_dynamic_syms(papi_cuda_root, dlname, standard_paths);
220 }
221
222 if (!dl_nvpw) {
223 dl_nvpw = dlopen(dlname, RTLD_NOW | RTLD_GLOBAL);
224 if (!dl_nvpw) {
225 ERRDBG("Loading libnvperf_host.so failed.\n");
226 goto fn_fail;
227 }
228 }
229
230 NVPW_GetSupportedChipNamesPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_GetSupportedChipNames");
231 NVPW_CUDA_MetricsContext_CreatePtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_CUDA_MetricsContext_Create");
232 NVPW_MetricsContext_DestroyPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsContext_Destroy");
233 NVPW_MetricsContext_GetMetricNames_BeginPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsContext_GetMetricNames_Begin");
234 NVPW_MetricsContext_GetMetricNames_EndPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsContext_GetMetricNames_End");
235 NVPW_InitializeHostPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_InitializeHost");
236 NVPW_MetricsContext_GetMetricProperties_BeginPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsContext_GetMetricProperties_Begin");
237 NVPW_MetricsContext_GetMetricProperties_EndPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsContext_GetMetricProperties_End");
238 NVPW_CUDA_RawMetricsConfig_CreatePtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_CUDA_RawMetricsConfig_Create");
239 NVPW_RawMetricsConfig_DestroyPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_RawMetricsConfig_Destroy");
240 NVPW_RawMetricsConfig_BeginPassGroupPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_RawMetricsConfig_BeginPassGroup");
241 NVPW_RawMetricsConfig_EndPassGroupPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_RawMetricsConfig_EndPassGroup");
242 NVPW_RawMetricsConfig_AddMetricsPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_RawMetricsConfig_AddMetrics");
243 NVPW_RawMetricsConfig_GenerateConfigImagePtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_RawMetricsConfig_GenerateConfigImage");
244 NVPW_RawMetricsConfig_GetConfigImagePtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_RawMetricsConfig_GetConfigImage");
245 NVPW_CounterDataBuilder_CreatePtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_CounterDataBuilder_Create");
246 NVPW_CounterDataBuilder_DestroyPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_CounterDataBuilder_Destroy");
247 NVPW_CounterDataBuilder_AddMetricsPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_CounterDataBuilder_AddMetrics");
248 NVPW_CounterDataBuilder_GetCounterDataPrefixPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_CounterDataBuilder_GetCounterDataPrefix");
249 NVPW_CounterData_GetNumRangesPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_CounterData_GetNumRanges");
250 NVPW_Profiler_CounterData_GetRangeDescriptionsPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_Profiler_CounterData_GetRangeDescriptions");
251 NVPW_MetricsContext_SetCounterDataPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsContext_SetCounterData");
252 NVPW_MetricsContext_EvaluateToGpuValuesPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsContext_EvaluateToGpuValues");
253 NVPW_RawMetricsConfig_GetNumPassesPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_RawMetricsConfig_GetNumPasses");
254 NVPW_RawMetricsConfig_SetCounterAvailabilityPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_RawMetricsConfig_SetCounterAvailability");
255 NVPW_RawMetricsConfig_IsAddMetricsPossiblePtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_RawMetricsConfig_IsAddMetricsPossible");
256 NVPW_MetricsContext_GetCounterNames_BeginPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsContext_GetCounterNames_Begin");
257 NVPW_MetricsContext_GetCounterNames_EndPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsContext_GetCounterNames_End");
258
259 Dl_info info;
260 dladdr(NVPW_GetSupportedChipNamesPtr, &info);
261 LOGDBG("NVPW library loaded from %s\n", info.dli_fname);
262 return PAPI_OK;
263fn_fail:
264 return PAPI_EMISC;
265}
266
267static int unload_nvpw_sym(void)
268{
269 if (dl_nvpw) {
270 dlclose(dl_nvpw);
271 dl_nvpw = NULL;
272 }
301 return PAPI_OK;
302}
303
305{
306 COMPDBG("Entering.\n");
307 int papi_errno;
308 CUpti_Profiler_Initialize_Params profilerInitializeParams = { CUpti_Profiler_Initialize_Params_STRUCT_SIZE, NULL };
309 papi_errno = cuptiProfilerInitializePtr(&profilerInitializeParams);
310 if (papi_errno != CUPTI_SUCCESS) {
311 ERRDBG("CUPTI error %d: cuptiProfilerInitialize failed.\n", papi_errno);
312 return PAPI_EMISC;
313 }
314 return PAPI_OK;
315}
316
318{
319 COMPDBG("Entering.\n");
320 int papi_errno;
321 CUpti_Profiler_DeInitialize_Params profilerDeInitializeParams = { CUpti_Profiler_DeInitialize_Params_STRUCT_SIZE, NULL };
322 papi_errno = cuptiProfilerDeInitializePtr(&profilerDeInitializeParams);
323 if (papi_errno != CUPTI_SUCCESS) {
324 ERRDBG("CUPTI Error %d: cuptiProfilerDeInitialize failed.\n", papi_errno);
325 return PAPI_EMISC;
326 }
327 return PAPI_OK;
328}
329
331{
332 COMPDBG("Entering.\n");
333 int papi_errno;
334 NVPW_InitializeHost_Params perfInitHostParams = { NVPW_InitializeHost_Params_STRUCT_SIZE, NULL };
335 papi_errno = NVPW_InitializeHostPtr(&perfInitHostParams);
336 if (papi_errno != NVPA_STATUS_SUCCESS) {
337 ERRDBG("NVPW Error %d: NVPW_InitializeHostPtr failed.\n", papi_errno);
338 return PAPI_EMISC;
339 }
340 return PAPI_OK;
341}
342
343static int get_chip_name(int dev_num, char* chipName)
344{
345 int papi_errno;
346 CUpti_Device_GetChipName_Params getChipName = {
347 .structSize = CUpti_Device_GetChipName_Params_STRUCT_SIZE,
348 .pPriv = NULL,
349 .deviceIndex = 0
350 };
351 getChipName.deviceIndex = dev_num;
352 papi_errno = cuptiDeviceGetChipNamePtr(&getChipName);
353 if (papi_errno != CUPTI_SUCCESS) {
354 ERRDBG("CUPTI error %d: Failed to get chip name for device %d\n", papi_errno, dev_num);
355 return PAPI_EMISC;
356 }
357 strcpy(chipName, getChipName.pChipName);
358 return PAPI_OK;
359}
360
362 int size;
363 uint8_t *data;
364};
365
370 NVPA_RawMetricRequest *rmr;
373 byte_array_t configImage;
374 byte_array_t counterDataImage;
377 CUpti_Profiler_CounterDataImageOptions counterDataImageOptions;
378 CUpti_Profiler_CounterDataImage_Initialize_Params initializeParams;
379 CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params initScratchBufferParams;
380};
381
383 cuptip_gpu_state_t *gpu_ctl;
386 cuptic_info_t info;
387};
388
390 char chip_name[32];
394};
395
396static int event_name_tokenize(const char *name, char *nv_name, int *gpuid)
397{
398 if (nv_name == NULL) {
399 return PAPI_EINVAL;
400 }
401
402 int numchars;
403 const char token[] = ":device=";
404 const int tok_len = 8;
405 char *rest;
406
407 char *getdevstr = strstr(name, token);
408 if (getdevstr == NULL) {
409 ERRDBG("Event name does not contain device number.\n");
410 return PAPI_EINVAL;
411 }
412 getdevstr += tok_len;
413 *gpuid = strtol(getdevstr, &rest, 10);
414 numchars = strlen(name) - strlen(getdevstr) - tok_len;
415 memcpy(nv_name, name, numchars);
416 nv_name[numchars] = '\0';
417
418 return PAPI_OK;
419}
420
422{
423 COMPDBG("Entering.\n");
424 int i, gpu_id, papi_errno = PAPI_OK;
425 char nvName[PAPI_MAX_STR_LEN];
426 cuptiu_event_t *evt_rec;
427 for (i=0; i < num_gpus; i++) {
428 papi_errno = cuptiu_event_table_create(sizeof(cuptiu_event_t), &(state->gpu_ctl[i].event_names));
429 if (papi_errno != PAPI_OK) {
430 goto fn_exit;
431 }
432 }
433 for (i = 0; i < (int) event_names->count; i++) {
434 papi_errno = cuptiu_event_table_get_item(event_names, i, &evt_rec);
435 if (papi_errno != PAPI_OK) {
436 goto fn_exit;
437 }
438 papi_errno = event_name_tokenize(evt_rec->name, (char*) &nvName, &gpu_id);
439 if (papi_errno != PAPI_OK) {
440 goto fn_exit;
441 }
442 if (gpu_id < 0 || gpu_id > num_gpus) {
443 papi_errno = PAPI_EINVAL;
444 goto fn_exit;
445 }
446 cuptiu_event_table_insert_record(state->gpu_ctl[gpu_id].event_names, evt_rec->name, evt_rec->evt_code, i);
447 LOGDBG("Adding event gpu %d name %s with code %d at pos %d\n", gpu_id, evt_rec->name, evt_rec->evt_code, i);
448 }
449fn_exit:
450 return papi_errno;
451}
452
453static int retrieve_metric_details(NVPA_MetricsContext *pMetricsContext, const char *nv_name,
454 char *description, int *numDep, NVPA_RawMetricRequest **pRMR)
455{
456 COMPDBG("Entering.\n");
457 int num_dep, i, len;
458 NVPA_Status nvpa_err;
459
460 if (nv_name == NULL || description == NULL) {
461 return PAPI_EINVAL;
462 }
463
464 NVPW_MetricsContext_GetMetricProperties_Begin_Params getMetricPropertiesBeginParams = {
465 .structSize = NVPW_MetricsContext_GetMetricProperties_Begin_Params_STRUCT_SIZE,
466 .pPriv = NULL,
467 .pMetricsContext = pMetricsContext,
468 .pMetricName = nv_name,
469 };
470 nvpa_err = NVPW_MetricsContext_GetMetricProperties_BeginPtr(&getMetricPropertiesBeginParams);
471
472 if (nvpa_err != NVPA_STATUS_SUCCESS || getMetricPropertiesBeginParams.ppRawMetricDependencies == NULL) {
473 strcpy(description, "Could not get description.");
474 return PAPI_EINVAL;
475 }
476
477 for (num_dep = 0; getMetricPropertiesBeginParams.ppRawMetricDependencies[num_dep] != NULL; num_dep++) {;}
478
479 NVPA_RawMetricRequest *rmr = (NVPA_RawMetricRequest *) papi_calloc(num_dep, sizeof(NVPA_RawMetricRequest));
480 if (rmr == NULL) {
481 return PAPI_ENOMEM;
482 }
483
484 for (i = 0; i < num_dep; i++) {
485 rmr[i].pMetricName = strdup(getMetricPropertiesBeginParams.ppRawMetricDependencies[i]);
486 rmr[i].isolated = 1;
487 rmr[i].keepInstances = 1;
488 rmr[i].structSize = NVPW_MetricsContext_GetMetricProperties_End_Params_STRUCT_SIZE;
489 }
490
491 len = snprintf(description, PAPI_2MAX_STR_LEN, "%s. Units=(%s)",
492 getMetricPropertiesBeginParams.pDescription,
493 getMetricPropertiesBeginParams.pDimUnits);
494 if (len > PAPI_2MAX_STR_LEN) {
495 ERRDBG("String formatting exceeded max string length.\n");
496 return PAPI_ENOMEM;
497 }
498 *numDep = num_dep;
499 *pRMR = rmr;
500 NVPW_MetricsContext_GetMetricProperties_End_Params getMetricPropertiesEndParams = {
501 .structSize = NVPW_MetricsContext_GetMetricProperties_End_Params_STRUCT_SIZE,
502 .pPriv = NULL,
503 .pMetricsContext = pMetricsContext,
504 };
505 NVPW_CALL( NVPW_MetricsContext_GetMetricProperties_EndPtr(&getMetricPropertiesEndParams), return PAPI_EMISC );
506 return PAPI_OK;
507}
508
509static int get_event_names_rmr(cuptip_gpu_state_t *gpu_ctl)
510{
511 COMPDBG("Entering.\n");
512 int papi_errno = PAPI_OK;
513 NVPA_RawMetricRequest *all_rmr=NULL;
514 int count_raw_metrics = 0;
515 unsigned int i;
516 int j, k, num_dep;
517 NVPA_RawMetricRequest *temp;
518 char nv_name[PAPI_MAX_STR_LEN];
519 int gpuid;
520 cuptiu_event_t *evt_rec;
521 for (i = 0; i < gpu_ctl->event_names->count; i++) {
522 papi_errno = cuptiu_event_table_get_item(gpu_ctl->event_names, i, &evt_rec);
523 if (papi_errno != PAPI_OK) {
524 goto fn_exit;
525 }
526 papi_errno = event_name_tokenize(evt_rec->name, (char *) &nv_name, &gpuid);
527
528 papi_errno = retrieve_metric_details(gpu_ctl->pmetricsContextCreateParams->pMetricsContext, nv_name, evt_rec->desc, &num_dep, &temp);
529 if (papi_errno != PAPI_OK) {
530 papi_errno = PAPI_ENOEVNT;
531 goto fn_exit;
532 }
533
534 all_rmr = (NVPA_RawMetricRequest *) papi_realloc(all_rmr, (count_raw_metrics + num_dep) * sizeof(NVPA_RawMetricRequest));
535 if (all_rmr == NULL) {
536 papi_errno = PAPI_ENOMEM;
537 goto fn_exit;
538 }
539 for (j = 0; j < num_dep; j++) {
540 k = j + count_raw_metrics;
541 all_rmr[k].structSize = temp[j].structSize;
542 all_rmr[k].pPriv = NULL;
543 all_rmr[k].pMetricName = strdup(temp[j].pMetricName);
544 all_rmr[k].keepInstances = 1;
545 all_rmr[k].isolated = 1;
546 papi_free((void *) temp[j].pMetricName);
547 }
548 count_raw_metrics += num_dep;
549 papi_free(temp);
550 }
551 gpu_ctl->rmr = all_rmr;
552 gpu_ctl->rmr_count = count_raw_metrics;
553fn_exit:
554 return papi_errno;
555}
556
557static int check_num_passes(struct NVPA_RawMetricsConfig *pRawMetricsConfig, int rmr_count, NVPA_RawMetricRequest *rmr, int *num_pass)
558{
559 COMPDBG("Entering.\n");
560 NVPW_RawMetricsConfig_BeginPassGroup_Params beginPassGroupParams = {
561 .structSize = NVPW_RawMetricsConfig_BeginPassGroup_Params_STRUCT_SIZE,
562 .pPriv = NULL,
563 .pRawMetricsConfig = pRawMetricsConfig,
564 .maxPassCount = 1,
565 };
566 NVPW_CALL( NVPW_RawMetricsConfig_BeginPassGroupPtr(&beginPassGroupParams), goto fn_fail );
567
568 NVPW_RawMetricsConfig_AddMetrics_Params addMetricsParams = {
569 .structSize = NVPW_RawMetricsConfig_AddMetrics_Params_STRUCT_SIZE,
570 .pPriv = NULL,
571 .pRawMetricsConfig = pRawMetricsConfig,
572 .pRawMetricRequests = rmr,
573 .numMetricRequests = rmr_count,
574 };
575 NVPW_CALL( NVPW_RawMetricsConfig_AddMetricsPtr(&addMetricsParams), goto fn_fail );
576
577 NVPW_RawMetricsConfig_EndPassGroup_Params endPassGroupParams = {
578 .structSize = NVPW_RawMetricsConfig_EndPassGroup_Params_STRUCT_SIZE,
579 .pPriv = NULL,
580 .pRawMetricsConfig = pRawMetricsConfig,
581 };
582 NVPW_CALL( NVPW_RawMetricsConfig_EndPassGroupPtr(&endPassGroupParams), goto fn_fail );
583
584 NVPW_RawMetricsConfig_GetNumPasses_Params rawMetricsConfigGetNumPassesParams = {
585 .structSize = NVPW_RawMetricsConfig_GetNumPasses_Params_STRUCT_SIZE,
586 .pPriv = NULL,
587 .pRawMetricsConfig = pRawMetricsConfig,
588 };
589 NVPW_CALL( NVPW_RawMetricsConfig_GetNumPassesPtr(&rawMetricsConfigGetNumPassesParams), goto fn_fail );
590
591 int numNestingLevels = 1, numIsolatedPasses, numPipelinedPasses;
592 numIsolatedPasses = rawMetricsConfigGetNumPassesParams.numIsolatedPasses;
593 numPipelinedPasses = rawMetricsConfigGetNumPassesParams.numPipelinedPasses;
594
595 *num_pass = numPipelinedPasses + numIsolatedPasses * numNestingLevels;
596
597 if (*num_pass > 1) {
598 ERRDBG("Metrics requested requires multiple passes to profile.\n");
599 return PAPI_EMULPASS;
600 }
601
602 return PAPI_OK;
603fn_fail:
604 return PAPI_EMISC;
605}
606
607static int nvpw_cuda_metricscontext_create(cuptip_control_t state)
608{
609 int gpu_id, found, papi_errno = PAPI_OK;
610 cuptip_gpu_state_t *gpu_ctl;
611
612 for (gpu_id = 0; gpu_id < num_gpus; gpu_id++) {
613 gpu_ctl = &(state->gpu_ctl[gpu_id]);
614 found = find_same_chipname(gpu_id);
615 if (found > -1) {
616 gpu_ctl->pmetricsContextCreateParams = state->gpu_ctl[found].pmetricsContextCreateParams;
617 continue;
618 }
619 MCCP_t *pMCCP = (MCCP_t *) papi_calloc(1, sizeof(MCCP_t));
620 if (pMCCP == NULL) {
621 papi_errno = PAPI_ENOMEM;
622 goto fn_exit;
623 }
624 pMCCP->structSize = NVPW_CUDA_MetricsContext_Create_Params_STRUCT_SIZE;
625 pMCCP->pChipName = avail_events[gpu_id].chip_name;
626 NVPW_CALL( NVPW_CUDA_MetricsContext_CreatePtr(pMCCP), goto fn_fail );
627 gpu_ctl->pmetricsContextCreateParams = pMCCP;
628 }
629fn_exit:
630 return papi_errno;
631fn_fail:
632 papi_errno = PAPI_EMISC;
633 goto fn_exit;
634}
635
636static int nvpw_cuda_metricscontext_destroy(cuptip_control_t state)
637{
638 int gpu_id, found, papi_errno = PAPI_OK;
639 cuptip_gpu_state_t *gpu_ctl;
640
641 for (gpu_id = 0; gpu_id < num_gpus; gpu_id++) {
642 gpu_ctl = &(state->gpu_ctl[gpu_id]);
643 found = find_same_chipname(gpu_id);
644 if (found > -1) {
645 gpu_ctl->pmetricsContextCreateParams = NULL;
646 continue;
647 }
648 if (gpu_ctl->pmetricsContextCreateParams->pMetricsContext) {
649 NVPW_MetricsContext_Destroy_Params mCDP = {
650 .structSize = NVPW_MetricsContext_Destroy_Params_STRUCT_SIZE,
651 .pPriv = NULL,
652 .pMetricsContext = gpu_ctl->pmetricsContextCreateParams->pMetricsContext,
653 };
654 NVPW_CALL( NVPW_MetricsContext_DestroyPtr(&mCDP), goto fn_fail );
655 papi_free(gpu_ctl->pmetricsContextCreateParams);
656 gpu_ctl->pmetricsContextCreateParams = NULL;
657 }
658 }
659fn_exit:
660 return papi_errno;
661fn_fail:
662 papi_errno = PAPI_EMISC;
663 goto fn_exit;
664}
665
666static int control_state_validate(cuptip_control_t state)
667{
668 COMPDBG("Entering.\n");
669 int gpu_id, papi_errno = PAPI_OK, passes;
670 cuptip_gpu_state_t *gpu_ctl;
671
672 for (gpu_id = 0; gpu_id < num_gpus; gpu_id++) {
673 gpu_ctl = &(state->gpu_ctl[gpu_id]);
674 if (gpu_ctl->event_names->count == 0) {
675 continue;
676 }
677
678 papi_errno = get_event_names_rmr(gpu_ctl);
679
680 if (papi_errno != PAPI_OK) {
681 goto fn_exit;
682 }
683 NVPW_CUDA_RawMetricsConfig_Create_Params nvpw_metricsConfigCreateParams = {
684 .structSize = NVPW_CUDA_RawMetricsConfig_Create_Params_STRUCT_SIZE,
685 .pPriv = NULL,
686 .activityKind = NVPA_ACTIVITY_KIND_PROFILER,
687 .pChipName = avail_events[gpu_id].chip_name,
688 };
689 NVPW_CALL( NVPW_CUDA_RawMetricsConfig_CreatePtr(&nvpw_metricsConfigCreateParams), goto fn_fail );
690
691 papi_errno = check_num_passes(nvpw_metricsConfigCreateParams.pRawMetricsConfig,
692 gpu_ctl->rmr_count, gpu_ctl->rmr, &passes);
693
694 NVPW_RawMetricsConfig_Destroy_Params rawMetricsConfigDestroyParams = {
695 .structSize = NVPW_RawMetricsConfig_Destroy_Params_STRUCT_SIZE,
696 .pPriv = NULL,
697 .pRawMetricsConfig = nvpw_metricsConfigCreateParams.pRawMetricsConfig,
698 };
699 NVPW_CALL( NVPW_RawMetricsConfig_DestroyPtr((NVPW_RawMetricsConfig_Destroy_Params *) &rawMetricsConfigDestroyParams), goto fn_fail );
700 if (papi_errno != PAPI_OK) {
701 goto fn_exit;
702 }
703 }
704fn_exit:
705 return papi_errno;
706fn_fail:
707 papi_errno = PAPI_EMISC;
708 goto fn_exit;
709}
710
711static int get_counter_availability(cuptip_gpu_state_t *gpu_ctl)
712{
713 int papi_errno;
714 /* Get size of counterAvailabilityImage - in first pass, GetCounterAvailability return size needed for data */
715 CUpti_Profiler_GetCounterAvailability_Params getCounterAvailabilityParams = {
716 .structSize = CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE,
717 .pPriv = NULL,
718 .ctx = NULL,
719 .pCounterAvailabilityImage = NULL,
720 };
721 papi_errno = cuptiProfilerGetCounterAvailabilityPtr(&getCounterAvailabilityParams);
722 if (papi_errno != CUPTI_SUCCESS) {
723 ERRDBG("CUPTI error %d: Failed to get size.\n", papi_errno);
724 return PAPI_EMISC;
725 }
726 /* Allocate sized counterAvailabilityImage */
727 gpu_ctl->counterAvailabilityImage.size = getCounterAvailabilityParams.counterAvailabilityImageSize;
728 gpu_ctl->counterAvailabilityImage.data = (uint8_t *) papi_malloc(gpu_ctl->counterAvailabilityImage.size);
729 if (gpu_ctl->counterAvailabilityImage.data == NULL) {
730 return PAPI_ENOMEM;
731 }
732 /* Initialize counterAvailabilityImage */
733 getCounterAvailabilityParams.pCounterAvailabilityImage = gpu_ctl->counterAvailabilityImage.data;
734 papi_errno = cuptiProfilerGetCounterAvailabilityPtr(&getCounterAvailabilityParams);
735 if (papi_errno != CUPTI_SUCCESS) {
736 ERRDBG("CUPTI error %d: Failed to get bytes.\n", papi_errno);
737 return PAPI_EMISC;
738 }
739 return PAPI_OK;
740}
741
742static int metric_get_config_image(cuptip_gpu_state_t *gpu_ctl)
743{
744 COMPDBG("Entering.\n");
745 NVPW_CUDA_RawMetricsConfig_Create_Params nvpw_metricsConfigCreateParams = {
746 .structSize = NVPW_CUDA_RawMetricsConfig_Create_Params_STRUCT_SIZE,
747 .pPriv = NULL,
748 .activityKind = NVPA_ACTIVITY_KIND_PROFILER,
749 .pChipName = avail_events[gpu_ctl->gpu_id].chip_name,
750 };
751 NVPW_CALL( NVPW_CUDA_RawMetricsConfig_CreatePtr(&nvpw_metricsConfigCreateParams), goto fn_fail );
752
753 if( gpu_ctl->counterAvailabilityImage.data != NULL) {
754 NVPW_RawMetricsConfig_SetCounterAvailability_Params setCounterAvailabilityParams = {
755 .structSize = NVPW_RawMetricsConfig_SetCounterAvailability_Params_STRUCT_SIZE,
756 .pPriv = NULL,
757 .pRawMetricsConfig = nvpw_metricsConfigCreateParams.pRawMetricsConfig,
758 .pCounterAvailabilityImage = gpu_ctl->counterAvailabilityImage.data,
759 };
760 NVPW_CALL( NVPW_RawMetricsConfig_SetCounterAvailabilityPtr(&setCounterAvailabilityParams), goto fn_fail );
761 };
762
763 NVPW_RawMetricsConfig_BeginPassGroup_Params beginPassGroupParams = {
764 .structSize = NVPW_RawMetricsConfig_BeginPassGroup_Params_STRUCT_SIZE,
765 .pPriv = NULL,
766 .pRawMetricsConfig = nvpw_metricsConfigCreateParams.pRawMetricsConfig,
767 .maxPassCount = 1,
768 };
769 NVPW_CALL( NVPW_RawMetricsConfig_BeginPassGroupPtr(&beginPassGroupParams), goto fn_fail );
770
771 NVPW_RawMetricsConfig_AddMetrics_Params addMetricsParams = {
772 .structSize = NVPW_RawMetricsConfig_AddMetrics_Params_STRUCT_SIZE,
773 .pPriv = NULL,
774 .pRawMetricsConfig = nvpw_metricsConfigCreateParams.pRawMetricsConfig,
775 .pRawMetricRequests = gpu_ctl->rmr,
776 .numMetricRequests = gpu_ctl->rmr_count,
777 };
778 NVPW_CALL( NVPW_RawMetricsConfig_AddMetricsPtr(&addMetricsParams), goto fn_fail );
779
780 NVPW_RawMetricsConfig_EndPassGroup_Params endPassGroupParams = {
781 .structSize = NVPW_RawMetricsConfig_EndPassGroup_Params_STRUCT_SIZE,
782 .pPriv = NULL,
783 .pRawMetricsConfig = nvpw_metricsConfigCreateParams.pRawMetricsConfig,
784 };
785 NVPW_CALL( NVPW_RawMetricsConfig_EndPassGroupPtr(&endPassGroupParams), goto fn_fail );
786
787 NVPW_RawMetricsConfig_GenerateConfigImage_Params generateConfigImageParams = {
788 .structSize = NVPW_RawMetricsConfig_GenerateConfigImage_Params_STRUCT_SIZE,
789 .pPriv = NULL,
790 .pRawMetricsConfig = nvpw_metricsConfigCreateParams.pRawMetricsConfig,
791 };
792 NVPW_CALL( NVPW_RawMetricsConfig_GenerateConfigImagePtr(&generateConfigImageParams), goto fn_fail );
793
794 NVPW_RawMetricsConfig_GetConfigImage_Params getConfigImageParams = {
795 .structSize = NVPW_RawMetricsConfig_GetConfigImage_Params_STRUCT_SIZE,
796 .pPriv = NULL,
797 .pRawMetricsConfig = nvpw_metricsConfigCreateParams.pRawMetricsConfig,
798 .bytesAllocated = 0,
799 .pBuffer = NULL,
800 };
801 NVPW_CALL( NVPW_RawMetricsConfig_GetConfigImagePtr(&getConfigImageParams), goto fn_fail );
802
803 gpu_ctl->configImage.size = getConfigImageParams.bytesCopied;
804 gpu_ctl->configImage.data = (uint8_t *) papi_calloc(gpu_ctl->configImage.size, sizeof(uint8_t));
805 if (gpu_ctl->configImage.data == NULL) {
806 ERRDBG("calloc gpu_ctl->configImage.data failed!");
807 return PAPI_ENOMEM;
808 }
809
810 getConfigImageParams.bytesAllocated = gpu_ctl->configImage.size;
811 getConfigImageParams.pBuffer = gpu_ctl->configImage.data;
812 NVPW_CALL( NVPW_RawMetricsConfig_GetConfigImagePtr(&getConfigImageParams), goto fn_fail );
813
814 NVPW_RawMetricsConfig_Destroy_Params rawMetricsConfigDestroyParams = {
815 .structSize = NVPW_RawMetricsConfig_Destroy_Params_STRUCT_SIZE,
816 .pPriv = NULL,
817 .pRawMetricsConfig = nvpw_metricsConfigCreateParams.pRawMetricsConfig,
818 };
819 NVPW_CALL( NVPW_RawMetricsConfig_DestroyPtr((NVPW_RawMetricsConfig_Destroy_Params *) &rawMetricsConfigDestroyParams), goto fn_fail );
820
821 return PAPI_OK;
822fn_fail:
823 return PAPI_EMISC;
824}
825
826static int metric_get_counter_data_prefix_image(cuptip_gpu_state_t *gpu_ctl)
827{
828 COMPDBG("Entering.\n");
829 NVPW_CounterDataBuilder_Create_Params counterDataBuilderCreateParams = {
830 .structSize = NVPW_CounterDataBuilder_Create_Params_STRUCT_SIZE,
831 .pPriv = NULL,
832 .pChipName = avail_events[gpu_ctl->gpu_id].chip_name,
833 };
834 NVPW_CALL( NVPW_CounterDataBuilder_CreatePtr(&counterDataBuilderCreateParams), goto fn_fail );
835
836 NVPW_CounterDataBuilder_AddMetrics_Params addMetricsParams = {
837 .structSize = NVPW_CounterDataBuilder_AddMetrics_Params_STRUCT_SIZE,
838 .pPriv = NULL,
839 .pCounterDataBuilder = counterDataBuilderCreateParams.pCounterDataBuilder,
840 .pRawMetricRequests = gpu_ctl->rmr,
841 .numMetricRequests = gpu_ctl->rmr_count,
842 };
843 NVPW_CALL( NVPW_CounterDataBuilder_AddMetricsPtr(&addMetricsParams), goto fn_fail );
844
845 NVPW_CounterDataBuilder_GetCounterDataPrefix_Params getCounterDataPrefixParams = {
846 .structSize = NVPW_CounterDataBuilder_GetCounterDataPrefix_Params_STRUCT_SIZE,
847 .pPriv = NULL,
848 .pCounterDataBuilder = counterDataBuilderCreateParams.pCounterDataBuilder,
849 .bytesAllocated = 0,
850 .pBuffer = NULL,
851 };
852 NVPW_CALL( NVPW_CounterDataBuilder_GetCounterDataPrefixPtr(&getCounterDataPrefixParams), goto fn_fail );
853
854 gpu_ctl->counterDataImagePrefix.size = getCounterDataPrefixParams.bytesCopied;
855 gpu_ctl->counterDataImagePrefix.data = (uint8_t *) papi_calloc(gpu_ctl->counterDataImagePrefix.size, sizeof(uint8_t));
856 if (gpu_ctl->counterDataImagePrefix.data == NULL) {
857 ERRDBG("calloc gpu_ctl->counterDataImagePrefix.data failed!");
858 return PAPI_ENOMEM;
859 }
860
861 getCounterDataPrefixParams.bytesAllocated = gpu_ctl->counterDataImagePrefix.size;
862 getCounterDataPrefixParams.pBuffer = gpu_ctl->counterDataImagePrefix.data;
863 NVPW_CALL( NVPW_CounterDataBuilder_GetCounterDataPrefixPtr(&getCounterDataPrefixParams), goto fn_fail );
864
865 NVPW_CounterDataBuilder_Destroy_Params counterDataBuilderDestroyParams = {
866 .structSize = NVPW_CounterDataBuilder_Destroy_Params_STRUCT_SIZE,
867 .pPriv = NULL,
868 .pCounterDataBuilder = counterDataBuilderCreateParams.pCounterDataBuilder,
869 };
870 NVPW_CALL( NVPW_CounterDataBuilder_DestroyPtr(&counterDataBuilderDestroyParams), goto fn_fail );
871
872 return PAPI_OK;
873fn_fail:
874 return PAPI_EMISC;
875}
876
877static int create_counter_data_image(cuptip_gpu_state_t *gpu_ctl)
878{
879 COMPDBG("Entering.\n");
880 gpu_ctl->counterDataImageOptions = (CUpti_Profiler_CounterDataImageOptions) {
881 .structSize = CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE,
882 .pPriv = NULL,
883 .pCounterDataPrefix = gpu_ctl->counterDataImagePrefix.data,
884 .counterDataPrefixSize = gpu_ctl->counterDataImagePrefix.size,
885 .maxNumRanges = 1,
886 .maxNumRangeTreeNodes = 1,
887 .maxRangeNameLength = 64,
888 };
889
890 CUpti_Profiler_CounterDataImage_CalculateSize_Params calculateSizeParams = {
891 .structSize = CUpti_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE,
892 .pPriv = NULL,
893 .sizeofCounterDataImageOptions = CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE,
894 .pOptions = &gpu_ctl->counterDataImageOptions,
895 };
896 CUPTI_CALL( cuptiProfilerCounterDataImageCalculateSizePtr(&calculateSizeParams), goto fn_fail );
897
898 gpu_ctl->initializeParams = (CUpti_Profiler_CounterDataImage_Initialize_Params) {
899 .structSize = CUpti_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE,
900 .pPriv = NULL,
901 .sizeofCounterDataImageOptions = CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE,
902 .pOptions = &gpu_ctl->counterDataImageOptions,
903 .counterDataImageSize = calculateSizeParams.counterDataImageSize,
904 };
905
906 gpu_ctl->counterDataImage.size = calculateSizeParams.counterDataImageSize;
907 gpu_ctl->counterDataImage.data = (uint8_t *) papi_calloc(gpu_ctl->counterDataImage.size, sizeof(uint8_t));
908 if (gpu_ctl->counterDataImage.data == NULL) {
909 ERRDBG("calloc gpu_ctl->counterDataImage.data failed!\n");
910 return PAPI_ENOMEM;
911 }
912
913 gpu_ctl->initializeParams.pCounterDataImage = gpu_ctl->counterDataImage.data;
914 CUPTI_CALL( cuptiProfilerCounterDataImageInitializePtr(&gpu_ctl->initializeParams), goto fn_fail );
915
916 CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params scratchBufferSizeParams = {
917 .structSize = CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE,
918 .pPriv = NULL,
919 .counterDataImageSize = calculateSizeParams.counterDataImageSize,
920 .pCounterDataImage = gpu_ctl->initializeParams.pCounterDataImage,
921 };
922 CUPTI_CALL( cuptiProfilerCounterDataImageCalculateScratchBufferSizePtr(&scratchBufferSizeParams), goto fn_fail );
923
924 gpu_ctl->counterDataScratchBuffer.size = scratchBufferSizeParams.counterDataScratchBufferSize;
925 gpu_ctl->counterDataScratchBuffer.data = (uint8_t *) papi_calloc(gpu_ctl->counterDataScratchBuffer.size, sizeof(uint8_t));
926 if (gpu_ctl->counterDataScratchBuffer.data == NULL) {
927 ERRDBG("calloc gpu_ctl->counterDataScratchBuffer.data failed!\n");
928 return PAPI_ENOMEM;
929 }
930
931 gpu_ctl->initScratchBufferParams = (CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params) {
932 .structSize = CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE,
933 .pPriv = NULL,
934 .counterDataImageSize = calculateSizeParams.counterDataImageSize,
935 .pCounterDataImage = gpu_ctl->initializeParams.pCounterDataImage,
936 .counterDataScratchBufferSize = gpu_ctl->counterDataScratchBuffer.size,
937 .pCounterDataScratchBuffer = gpu_ctl->counterDataScratchBuffer.data,
938 };
939 CUPTI_CALL( cuptiProfilerCounterDataImageInitializeScratchBufferPtr(&gpu_ctl->initScratchBufferParams), goto fn_fail );
940
941 return PAPI_OK;
942fn_fail:
943 return PAPI_EMISC;
944}
945
946static int reset_cupti_prof_config_images(cuptip_gpu_state_t *gpu_ctl)
947{
948 COMPDBG("Entering.\n");
949 papi_free(gpu_ctl->counterDataImagePrefix.data);
950 papi_free(gpu_ctl->configImage.data);
951 papi_free(gpu_ctl->counterDataImage.data);
952 papi_free(gpu_ctl->counterDataScratchBuffer.data);
953 papi_free(gpu_ctl->counterAvailabilityImage.data);
954 gpu_ctl->counterDataImagePrefix.data = NULL;
955 gpu_ctl->configImage.data = NULL;
956 gpu_ctl->counterDataImage.data = NULL;
957 gpu_ctl->counterDataScratchBuffer.data = NULL;
958 gpu_ctl->counterAvailabilityImage.data = NULL;
959 gpu_ctl->counterDataImagePrefix.size = 0;
960 gpu_ctl->configImage.size = 0;
961 gpu_ctl->counterDataImage.size = 0;
962 gpu_ctl->counterDataScratchBuffer.size = 0;
963 gpu_ctl->counterAvailabilityImage.size = 0;
964 return PAPI_OK;
965}
966
967static int begin_profiling(cuptip_gpu_state_t *gpu_ctl)
968{
969 COMPDBG("Entering.\n");
970 byte_array_t *configImage = &(gpu_ctl->configImage);
971 byte_array_t *counterDataScratchBuffer = &(gpu_ctl->counterDataScratchBuffer);
972 byte_array_t *counterDataImage = &(gpu_ctl->counterDataImage);
973
974 CUpti_Profiler_BeginSession_Params beginSessionParams = {
975 .structSize = CUpti_Profiler_BeginSession_Params_STRUCT_SIZE,
976 .pPriv = NULL,
977 .ctx = NULL,
978 .counterDataImageSize = counterDataImage->size,
979 .pCounterDataImage = counterDataImage->data,
980 .counterDataScratchBufferSize = counterDataScratchBuffer->size,
981 .pCounterDataScratchBuffer = counterDataScratchBuffer->data,
982 .range = CUPTI_UserRange,
983 .replayMode = CUPTI_UserReplay,
984 .maxRangesPerPass = 1,
985 .maxLaunchesPerPass = 1,
986 };
987 CUPTI_CALL( cuptiProfilerBeginSessionPtr(&beginSessionParams), goto fn_fail );
988
989 CUpti_Profiler_SetConfig_Params setConfigParams = {
990 .structSize = CUpti_Profiler_SetConfig_Params_STRUCT_SIZE,
991 .pPriv = NULL,
992 .ctx = NULL,
993 .pConfig = configImage->data,
994 .configSize = configImage->size,
995 .minNestingLevel = 1,
996 .numNestingLevels = 1,
997 .passIndex = 0,
998 .targetNestingLevel = 1,
999 };
1000 CUPTI_CALL( cuptiProfilerSetConfigPtr(&setConfigParams), goto fn_fail );
1001
1002 CUpti_Profiler_BeginPass_Params beginPassParams = {
1003 .structSize = CUpti_Profiler_BeginPass_Params_STRUCT_SIZE,
1004 .pPriv = NULL,
1005 .ctx = NULL,
1006 };
1007 CUPTI_CALL( cuptiProfilerBeginPassPtr(&beginPassParams), goto fn_fail );
1008
1009 CUpti_Profiler_EnableProfiling_Params enableProfilingParams = {
1010 .structSize = CUpti_Profiler_EnableProfiling_Params_STRUCT_SIZE,
1011 .pPriv = NULL,
1012 .ctx = NULL,
1013 };
1014 CUPTI_CALL( cuptiProfilerEnableProfilingPtr(&enableProfilingParams), goto fn_fail );
1015
1016 char rangeName[64];
1017 sprintf(rangeName, "PAPI_Range_%d", gpu_ctl->gpu_id);
1018 CUpti_Profiler_PushRange_Params pushRangeParams = {
1019 .structSize = CUpti_Profiler_PushRange_Params_STRUCT_SIZE,
1020 .pPriv = NULL,
1021 .ctx = NULL,
1022 .pRangeName = (const char*) &rangeName,
1023 .rangeNameLength = 100,
1024 };
1025 CUPTI_CALL( cuptiProfilerPushRangePtr(&pushRangeParams), goto fn_fail );
1026
1027 return PAPI_OK;
1028fn_fail:
1029 return PAPI_EMISC;
1030}
1031
1032static int end_profiling(cuptip_gpu_state_t *gpu_ctl)
1033{
1034
1035 COMPDBG("EndProfiling. dev = %d\n", gpu_ctl->gpu_id);
1036 (void) gpu_ctl;
1037
1038 CUpti_Profiler_DisableProfiling_Params disableProfilingParams = {
1039 .structSize = CUpti_Profiler_DisableProfiling_Params_STRUCT_SIZE,
1040 .pPriv = NULL,
1041 .ctx = NULL,
1042 };
1043 CUPTI_CALL( cuptiProfilerDisableProfilingPtr(&disableProfilingParams), goto fn_fail );
1044
1045 CUpti_Profiler_PopRange_Params popRangeParams = {
1046 .structSize = CUpti_Profiler_PopRange_Params_STRUCT_SIZE,
1047 .pPriv = NULL,
1048 .ctx = NULL,
1049 };
1050 CUPTI_CALL( cuptiProfilerPopRangePtr(&popRangeParams), goto fn_fail );
1051
1052 CUpti_Profiler_EndPass_Params endPassParams = {
1053 .structSize = CUpti_Profiler_EndPass_Params_STRUCT_SIZE,
1054 .pPriv = NULL,
1055 .ctx = NULL,
1056 };
1057 CUPTI_CALL( cuptiProfilerEndPassPtr(&endPassParams), goto fn_fail );
1058
1059 CUpti_Profiler_FlushCounterData_Params flushCounterDataParams = {
1060 .structSize = CUpti_Profiler_FlushCounterData_Params_STRUCT_SIZE,
1061 .pPriv = NULL,
1062 .ctx = NULL,
1063 };
1064 CUPTI_CALL( cuptiProfilerFlushCounterDataPtr(&flushCounterDataParams), goto fn_fail );
1065
1066 CUpti_Profiler_UnsetConfig_Params unsetConfigParams = {
1067 .structSize = CUpti_Profiler_UnsetConfig_Params_STRUCT_SIZE,
1068 .pPriv = NULL,
1069 .ctx = NULL,
1070 };
1071 CUPTI_CALL( cuptiProfilerUnsetConfigPtr(&unsetConfigParams), goto fn_fail );
1072
1073 CUpti_Profiler_EndSession_Params endSessionParams = {
1074 .structSize = CUpti_Profiler_EndSession_Params_STRUCT_SIZE,
1075 .pPriv = NULL,
1076 .ctx = NULL,
1077 };
1078 CUPTI_CALL( cuptiProfilerEndSessionPtr(&endSessionParams), goto fn_fail );
1079
1080 return PAPI_OK;
1081fn_fail:
1082 return PAPI_EMISC;
1083}
1084
1085static int get_measured_values(cuptip_gpu_state_t *gpu_ctl)
1086{
1087 COMPDBG("eval_metric_values. dev = %d\n", gpu_ctl->gpu_id);
1088 if (!gpu_ctl->counterDataImage.size) {
1089 ERRDBG("Counter Data Image is empty!\n");
1090 return PAPI_EINVAL;
1091 }
1092 int i, papi_errno = PAPI_OK;
1093 int numMetrics = gpu_ctl->event_names->count;
1094
1095 int dummy;
1096 char **metricNames = (char**) papi_calloc(numMetrics, sizeof(char *));
1097 if (metricNames == NULL) {
1098 ERRDBG("calloc metricNames failed.\n");
1099 return PAPI_ENOMEM;
1100 }
1101 cuptiu_event_t *evt_rec;
1102 for (i = 0; i < numMetrics; i++) {
1103 papi_errno = cuptiu_event_table_get_item(gpu_ctl->event_names, i, &evt_rec);
1104 if (papi_errno != PAPI_OK) {
1105 goto fn_exit;
1106 }
1107 papi_errno = event_name_tokenize(evt_rec->name, evt_rec->desc, &dummy);
1108 if (papi_errno != PAPI_OK) {
1109 goto fn_exit;
1110 }
1111 metricNames[i] = (char *) &(evt_rec->desc);
1112 LOGDBG("Setting metric name %s\n", metricNames[i]);
1113 }
1114
1115 double *gpuValues = (double*) papi_malloc(numMetrics * sizeof(double));
1116 if (gpuValues == NULL) {
1117 ERRDBG("malloc gpuValues failed.\n");
1118 return PAPI_ENOMEM;
1119 }
1120
1121 NVPW_MetricsContext_SetCounterData_Params setCounterDataParams = {
1122 .structSize = NVPW_MetricsContext_SetCounterData_Params_STRUCT_SIZE,
1123 .pPriv = NULL,
1124 .pMetricsContext = gpu_ctl->pmetricsContextCreateParams->pMetricsContext,
1125 .pCounterDataImage = gpu_ctl->counterDataImage.data,
1126 .rangeIndex = 0,
1127 .isolated = 1,
1128 };
1129 NVPW_CALL( NVPW_MetricsContext_SetCounterDataPtr(&setCounterDataParams), goto fn_fail );
1130 NVPW_MetricsContext_EvaluateToGpuValues_Params evalToGpuParams = {
1131 .structSize = NVPW_MetricsContext_EvaluateToGpuValues_Params_STRUCT_SIZE,
1132 .pPriv = NULL,
1133 .pMetricsContext = gpu_ctl->pmetricsContextCreateParams->pMetricsContext,
1134 .numMetrics = numMetrics,
1135 .ppMetricNames = (const char* const*) metricNames,
1136 .pMetricValues = gpuValues,
1137 };
1138 NVPW_CALL( NVPW_MetricsContext_EvaluateToGpuValuesPtr(&evalToGpuParams), goto fn_fail );
1139 papi_free(metricNames);
1140 for (i = 0; i < (int) gpu_ctl->event_names->count; i++) {
1141 papi_errno = cuptiu_event_table_get_item(gpu_ctl->event_names, i, &evt_rec);
1142 if (papi_errno != PAPI_OK) {
1143 papi_free(gpuValues);
1144 goto fn_exit;
1145 }
1146 evt_rec->value = gpuValues[i];
1147 }
1148 papi_free(gpuValues);
1149fn_exit:
1150 return papi_errno;
1151fn_fail:
1152 return PAPI_EMISC;
1153}
1154
1155/* List metrics API */
1156static int find_same_chipname(int gpu_id)
1157{
1158 int i;
1159 for (i = 0; i < gpu_id; i++) {
1160 if (!strcmp(avail_events[gpu_id].chip_name, avail_events[i].chip_name)) {
1161 return i;
1162 }
1163 }
1164 return -1;
1165}
1166
1167static int init_all_metrics(void)
1168{
1169 int gpu_id, papi_errno = PAPI_OK;
1170 avail_events = (list_metrics_t *) papi_calloc(num_gpus, sizeof(list_metrics_t));
1171 if (avail_events == NULL) {
1172 papi_errno = PAPI_ENOMEM;
1173 goto fn_exit;
1174 }
1175 for (gpu_id = 0; gpu_id < num_gpus; gpu_id++) {
1176 papi_errno = get_chip_name(gpu_id, avail_events[gpu_id].chip_name);
1177 if (papi_errno != PAPI_OK) {
1178 goto fn_exit;
1179 }
1180 }
1181 int found;
1182 for (gpu_id = 0; gpu_id < num_gpus; gpu_id++) {
1183 found = find_same_chipname(gpu_id);
1184 if (found > -1) {
1185 avail_events[gpu_id].pmetricsContextCreateParams = avail_events[found].pmetricsContextCreateParams;
1186 continue;
1187 }
1188 MCCP_t *pMCCP = (MCCP_t *) papi_calloc(1, sizeof(MCCP_t));
1189 if (pMCCP == NULL) {
1190 papi_errno = PAPI_ENOMEM;
1191 goto fn_exit;
1192 }
1193 pMCCP->structSize = NVPW_CUDA_MetricsContext_Create_Params_STRUCT_SIZE;
1194 pMCCP->pChipName = avail_events[gpu_id].chip_name;
1195 NVPW_CALL( NVPW_CUDA_MetricsContext_CreatePtr(pMCCP), goto fn_fail );
1196
1197 avail_events[gpu_id].pmetricsContextCreateParams = pMCCP;
1198 }
1199fn_exit:
1200 return papi_errno;
1201fn_fail:
1202 papi_errno = PAPI_EMISC;
1203 goto fn_exit;
1204}
1205
1207{
1208 int gpu_id, i, found, listsubmetrics = 1, papi_errno = PAPI_OK;
1209 if (avail_events[0].nv_metrics != NULL) {
1210 /* Already eumerated for 1st device? Then exit... */
1211 goto fn_exit;
1212 }
1213 for (gpu_id = 0; gpu_id < num_gpus; gpu_id++) {
1214 LOGDBG("Getting metric names for gpu %d\n", gpu_id);
1215 found = find_same_chipname(gpu_id);
1216 if (found > -1) {
1217 avail_events[gpu_id].num_metrics = avail_events[found].num_metrics;
1218 avail_events[gpu_id].nv_metrics = avail_events[found].nv_metrics;
1219 continue;
1220 }
1221 /* If same chip_name not found, get all the details for this gpu */
1222
1223 NVPW_MetricsContext_GetMetricNames_Begin_Params getMetricNameBeginParams = {
1224 .structSize = NVPW_MetricsContext_GetMetricNames_Begin_Params_STRUCT_SIZE,
1225 .pPriv = NULL,
1226 .pMetricsContext = avail_events[gpu_id].pmetricsContextCreateParams->pMetricsContext,
1227 .hidePeakSubMetrics = !listsubmetrics,
1228 .hidePerCycleSubMetrics = !listsubmetrics,
1229 .hidePctOfPeakSubMetrics = !listsubmetrics,
1230 };
1231 NVPW_CALL( NVPW_MetricsContext_GetMetricNames_BeginPtr(&getMetricNameBeginParams), goto fn_fail );
1232
1233 avail_events[gpu_id].num_metrics = getMetricNameBeginParams.numMetrics;
1234
1235 papi_errno = cuptiu_event_table_create_init_capacity(avail_events[gpu_id].num_metrics, sizeof(cuptiu_event_t), &(avail_events[gpu_id].nv_metrics));
1236 if (papi_errno != PAPI_OK) {
1237 goto fn_exit;
1238 }
1239 for (i = 0; i < avail_events[gpu_id].num_metrics; i++) {
1240 papi_errno = cuptiu_event_table_insert_record(avail_events[gpu_id].nv_metrics,
1241 getMetricNameBeginParams.ppMetricNames[i],
1242 i, 0);
1243 if (papi_errno != PAPI_OK) {
1244 goto fn_exit;
1245 }
1246 }
1247
1248 NVPW_MetricsContext_GetMetricNames_End_Params getMetricNameEndParams = {
1249 .structSize = NVPW_MetricsContext_GetMetricNames_End_Params_STRUCT_SIZE,
1250 .pPriv = NULL,
1251 .pMetricsContext = avail_events[gpu_id].pmetricsContextCreateParams->pMetricsContext,
1252 };
1253 NVPW_CALL( NVPW_MetricsContext_GetMetricNames_EndPtr((NVPW_MetricsContext_GetMetricNames_End_Params *) &getMetricNameEndParams), goto fn_fail );
1254
1255 }
1257 cuptiu_event_t *find = NULL;
1258 cuptiu_event_t *evt_rec = NULL;
1259 int len;
1260 unsigned int curr = all_evt_names->count;
1261 for (gpu_id = 0; gpu_id < num_gpus; gpu_id++) {
1262 for (i = 0; i < avail_events[gpu_id].num_metrics; i++) {
1263 papi_errno = cuptiu_event_table_get_item(avail_events[gpu_id].nv_metrics, i, &evt_rec);
1264 if (papi_errno != PAPI_OK) {
1265 goto fn_exit;
1266 }
1267 len = snprintf(evt_name, PAPI_2MAX_STR_LEN, "%s:device=%d", evt_rec->name, gpu_id);
1268 if (len > PAPI_2MAX_STR_LEN) {
1269 ERRDBG("String formatting exceeded maximum length.\n");
1270 papi_errno = PAPI_ENOMEM;
1271 goto fn_exit;
1272 }
1273 if (cuptiu_event_table_find_name(all_evt_names, evt_name, &find) == PAPI_ENOEVNT) {
1274 papi_errno = cuptiu_event_table_insert_record(all_evt_names, evt_name, curr, 0);
1275 if (papi_errno != PAPI_OK) {
1276 goto fn_exit;
1277 }
1278 ++curr;
1279 }
1280 }
1281 }
1282fn_exit:
1283 return papi_errno;
1284fn_fail:
1285 papi_errno = PAPI_EMISC;
1286 goto fn_exit;
1287}
1288
1289int cuptip_event_name_to_descr(const char *evt_name, char *description)
1290{
1291 int papi_errno, numdep, gpu_id, passes;
1292 char nv_name[PAPI_MAX_STR_LEN];
1293 cuptiu_event_t *evt_rec = NULL;
1294 NVPA_RawMetricRequest *temp;
1295 papi_errno = event_name_tokenize(evt_name, nv_name, &gpu_id);
1296 if (papi_errno != PAPI_OK) {
1297 goto fn_exit;
1298 }
1299 papi_errno = cuptiu_event_table_find_name(avail_events[gpu_id].nv_metrics, nv_name, &evt_rec);
1300 if (papi_errno != PAPI_OK) {
1301 ERRDBG("Event name not found in avail_events array.\n");
1302 goto fn_exit;
1303 }
1304 char *desc = evt_rec->desc;
1305 if (desc[0] == '\0') {
1306 papi_errno = retrieve_metric_details(avail_events[gpu_id].pmetricsContextCreateParams->pMetricsContext,
1307 nv_name, desc, &numdep, &temp);
1308 if (papi_errno == PAPI_OK) {
1309 NVPW_CUDA_RawMetricsConfig_Create_Params nvpw_metricsConfigCreateParams = {
1310 .structSize = NVPW_CUDA_RawMetricsConfig_Create_Params_STRUCT_SIZE,
1311 .pPriv = NULL,
1312 .activityKind = NVPA_ACTIVITY_KIND_PROFILER,
1313 .pChipName = avail_events[gpu_id].chip_name,
1314 };
1315 NVPW_CALL( NVPW_CUDA_RawMetricsConfig_CreatePtr(&nvpw_metricsConfigCreateParams), goto fn_fail );
1316
1317 papi_errno = check_num_passes(nvpw_metricsConfigCreateParams.pRawMetricsConfig,
1318 numdep, temp, &passes);
1319
1320 NVPW_RawMetricsConfig_Destroy_Params rawMetricsConfigDestroyParams = {
1321 .structSize = NVPW_RawMetricsConfig_Destroy_Params_STRUCT_SIZE,
1322 .pPriv = NULL,
1323 .pRawMetricsConfig = nvpw_metricsConfigCreateParams.pRawMetricsConfig,
1324 };
1325 NVPW_CALL( NVPW_RawMetricsConfig_DestroyPtr((NVPW_RawMetricsConfig_Destroy_Params *) &rawMetricsConfigDestroyParams), goto fn_fail );
1326
1327 snprintf(desc + strlen(desc), PAPI_2MAX_STR_LEN - strlen(desc), " Numpass=%d", passes);
1328 if (passes > 1) {
1329 snprintf(desc + strlen(desc), PAPI_2MAX_STR_LEN - strlen(desc), " (multi-pass not supported)");
1330 }
1331
1332 const char *token_sw_evt = "sass";
1333 if (strstr(nv_name, token_sw_evt) != NULL) {
1334 snprintf(desc + strlen(desc), PAPI_2MAX_STR_LEN - strlen(desc), " (SW event)");
1335 }
1336 }
1337 papi_free(temp);
1338 }
1339 strcpy(description, desc);
1340fn_exit:
1341 return papi_errno;
1342fn_fail:
1343 papi_errno = PAPI_EMISC;
1344 goto fn_exit;
1345}
1346
1348{
1349 COMPDBG("Entering.\n");
1350 int gpu_id, found;
1351 NVPW_MetricsContext_Destroy_Params metricsContextDestroyParams;
1352 if (avail_events == NULL) {
1353 return;
1354 }
1355 for (gpu_id = 0; gpu_id < num_gpus; gpu_id++) {
1356 found = find_same_chipname(gpu_id);
1357 if (found > -1) {
1358 avail_events[gpu_id].num_metrics = 0;
1359 avail_events[gpu_id].nv_metrics = NULL;
1360 avail_events[gpu_id].pmetricsContextCreateParams = NULL;
1361 continue;
1362 }
1363 if (avail_events[gpu_id].pmetricsContextCreateParams->pMetricsContext) {
1364 metricsContextDestroyParams = (NVPW_MetricsContext_Destroy_Params) {
1365 .structSize = NVPW_MetricsContext_Destroy_Params_STRUCT_SIZE,
1366 .pPriv = NULL,
1367 .pMetricsContext = avail_events[gpu_id].pmetricsContextCreateParams->pMetricsContext,
1368 };
1369 NVPW_CALL(NVPW_MetricsContext_DestroyPtr(&metricsContextDestroyParams), );
1370 }
1371 papi_free(avail_events[gpu_id].pmetricsContextCreateParams);
1372 avail_events[gpu_id].pmetricsContextCreateParams = NULL;
1373
1374 if (avail_events[gpu_id].nv_metrics) {
1375 cuptiu_event_table_destroy( &(avail_events[gpu_id].nv_metrics) );
1376 }
1377 }
1379 avail_events = NULL;
1380}
1381
1382/* CUPTI Profiler component API functions */
1384{
1385 COMPDBG("Entering.\n");
1386 int papi_errno = PAPI_OK;
1387
1388 papi_errno = load_cupti_perf_sym();
1389 papi_errno += load_nvpw_sym();
1390 if (papi_errno != PAPI_OK) {
1391 cuptic_disabled_reason_set("Unable to load CUDA library functions.");
1392 goto fn_fail;
1393 }
1394
1395 papi_errno = cuptic_device_get_count(&num_gpus);
1396 if (papi_errno != PAPI_OK) {
1397 goto fn_fail;
1398 }
1399
1400 if (num_gpus <= 0) {
1401 cuptic_disabled_reason_set("No GPUs found on system.");
1402 goto fn_fail;
1403 }
1404
1405 papi_errno = initialize_cupti_profiler_api();
1406 papi_errno += initialize_perfworks_api();
1407 if (papi_errno != PAPI_OK) {
1408 cuptic_disabled_reason_set("Unable to initialize CUPTI profiler libraries.");
1409 goto fn_fail;
1410 }
1411 papi_errno = init_all_metrics();
1412 if (papi_errno != PAPI_OK) {
1413 goto fn_fail;
1414 }
1415 papi_errno = cuInitPtr(0);
1416 if (papi_errno != CUDA_SUCCESS) {
1417 cuptic_disabled_reason_set("Failed to initialize CUDA driver API.");
1418 goto fn_fail;
1419 }
1420 return PAPI_OK;
1421fn_fail:
1422 return PAPI_EMISC;
1423}
1424
1425int cuptip_control_create(cuptiu_event_table_t *event_names, cuptic_info_t thr_info, cuptip_control_t *pstate)
1426{
1427 COMPDBG("Entering.\n");
1428 int papi_errno = PAPI_OK, gpu_id;
1429 cuptip_control_t state = (cuptip_control_t) papi_calloc (1, sizeof(struct cuptip_control_s));
1430 if (state == NULL) {
1431 return PAPI_ENOMEM;
1432 }
1433 state->gpu_ctl = (cuptip_gpu_state_t *) papi_calloc(num_gpus, sizeof(cuptip_gpu_state_t));
1434 if (state->gpu_ctl == NULL) {
1435 return PAPI_ENOMEM;
1436 }
1437 for (gpu_id = 0; gpu_id < num_gpus; gpu_id++) {
1438 state->gpu_ctl[gpu_id].gpu_id = gpu_id;
1439 }
1440
1441 /* Register the user created cuda context for the current gpu if not already known */
1442 papi_errno = cuptic_ctxarr_update_current(thr_info);
1443 if (papi_errno != PAPI_OK) {
1444 goto fn_exit;
1445 }
1447 if (papi_errno != PAPI_OK) {
1448 goto fn_exit;
1449 }
1450 papi_errno = add_events_per_gpu(state, event_names);
1451 if (papi_errno != PAPI_OK) {
1452 goto fn_exit;
1453 }
1454 papi_errno = control_state_validate(state);
1455 state->info = thr_info;
1456
1457fn_exit:
1458 *pstate = state;
1459 return papi_errno;
1460}
1461
1462int cuptip_control_destroy(cuptip_control_t *pstate)
1463{
1464 COMPDBG("Entering.\n");
1465 cuptip_control_t state = *pstate;
1466 int i, j;
1467 int papi_errno = nvpw_cuda_metricscontext_destroy(state);
1468 for (i = 0; i < num_gpus; i++) {
1469 reset_cupti_prof_config_images( &(state->gpu_ctl[i]) );
1470 cuptiu_event_table_destroy( &(state->gpu_ctl[i].event_names) );
1471 for (j = 0; j < state->gpu_ctl[i].rmr_count; j++) {
1472 papi_free((void *) state->gpu_ctl[i].rmr[j].pMetricName);
1473 }
1474 papi_free(state->gpu_ctl[i].rmr);
1475 }
1476 papi_free(state->gpu_ctl);
1478 *pstate = NULL;
1479 return papi_errno;
1480}
1481
1482int cuptip_control_start(cuptip_control_t state)
1483{
1484 COMPDBG("Entering.\n");
1485 cuptip_gpu_state_t *gpu_ctl;
1486 CUcontext userCtx, ctx;
1487 CUDA_CALL( cuCtxGetCurrentPtr(&userCtx), goto fn_fail_misc );
1488 if (userCtx == NULL) {
1489 CUDART_CALL( cudaFreePtr(NULL), goto fn_fail_misc );
1490 CUDA_CALL( cuCtxGetCurrentPtr(&userCtx), goto fn_fail_misc );
1491 }
1492 int gpu_id;
1493 int papi_errno = PAPI_OK;
1494 for (gpu_id = 0; gpu_id < num_gpus; gpu_id++) {
1495 gpu_ctl = &(state->gpu_ctl[gpu_id]);
1496 if (gpu_ctl->event_names->count == 0) {
1497 continue;
1498 }
1499 LOGDBG("Device num %d: event_count %d, rmr count %d\n", gpu_id, gpu_ctl->event_names->count, gpu_ctl->rmr_count);
1500 papi_errno = cuptic_device_acquire(state->gpu_ctl[gpu_id].event_names);
1501 if (papi_errno != PAPI_OK) {
1502 ERRDBG("Profiling same gpu from multiple event sets not allowed.\n");
1503 return papi_errno;
1504 }
1505 papi_errno = cuptic_ctxarr_get_ctx(state->info, gpu_id, &ctx);
1506 CUDA_CALL( cuCtxSetCurrentPtr(ctx), goto fn_fail_misc );
1507 papi_errno = get_counter_availability(gpu_ctl);
1508 if (papi_errno != PAPI_OK) {
1509 ERRDBG("Error getting counter availability image.\n");
1510 return papi_errno;
1511 }
1512 /* CUPTI profiler host configuration */
1513 papi_errno = metric_get_config_image(gpu_ctl);
1514 papi_errno += metric_get_counter_data_prefix_image(gpu_ctl);
1515 papi_errno += create_counter_data_image(gpu_ctl);
1516 if (papi_errno != PAPI_OK) {
1517 ERRDBG("Failed to create CUPTI profiler state for gpu %d\n", gpu_id);
1518 goto fn_fail;
1519 }
1520 papi_errno = begin_profiling(gpu_ctl);
1521 if (papi_errno != PAPI_OK) {
1522 ERRDBG("Failed to start profiling for gpu %d\n", gpu_id);
1523 goto fn_fail;
1524 }
1525 }
1526 state->running = True;
1527fn_exit:
1528 CUDA_CALL( cuCtxSetCurrentPtr(userCtx), goto fn_fail_misc );
1529 return papi_errno;
1530fn_fail:
1531 papi_errno = PAPI_ECMP;
1532 goto fn_exit;
1533fn_fail_misc:
1534 papi_errno = PAPI_EMISC;
1535 goto fn_exit;
1536}
1537
1538int cuptip_control_stop(cuptip_control_t state)
1539{
1540 COMPDBG("Entering.\n");
1541 cuptip_gpu_state_t *gpu_ctl;
1542 CUcontext userCtx = NULL, ctx = NULL;
1543 CUDA_CALL( cuCtxGetCurrentPtr(&userCtx), goto fn_fail_misc );
1544 if (userCtx == NULL) {
1545 CUDART_CALL( cudaFreePtr(NULL), goto fn_fail_misc );
1546 CUDA_CALL( cuCtxGetCurrentPtr(&userCtx), goto fn_fail_misc );
1547 }
1548 int gpu_id;
1549 int papi_errno = PAPI_OK;
1550 if (state->running == False) {
1551 ERRDBG("Profiler is already stopped.\n");
1552 papi_errno = PAPI_EINVAL;
1553 goto fn_fail;
1554 }
1555 for (gpu_id=0; gpu_id<num_gpus; gpu_id++) {
1556 gpu_ctl = &(state->gpu_ctl[gpu_id]);
1557 if (gpu_ctl->event_names->count == 0) {
1558 continue;
1559 }
1560 papi_errno = cuptic_ctxarr_get_ctx(state->info, gpu_id, &ctx);
1561 CUDA_CALL( cuCtxSetCurrentPtr(ctx), goto fn_fail_misc );
1562 papi_errno = end_profiling(gpu_ctl);
1563 if (papi_errno != PAPI_OK) {
1564 ERRDBG("Failed to stop profiling on gpu %d\n", gpu_id);
1565 goto fn_fail;
1566 }
1567 papi_errno = cuptic_device_release(state->gpu_ctl[gpu_id].event_names);
1568 if (papi_errno != PAPI_OK) {
1569 goto fn_fail;
1570 }
1571 }
1572 state->running = False;
1573fn_exit:
1574 CUDA_CALL( cuCtxSetCurrentPtr(userCtx), goto fn_fail_misc );
1575 return papi_errno;
1576fn_fail:
1577 goto fn_exit;
1578fn_fail_misc:
1579 papi_errno = PAPI_EMISC;
1580 goto fn_exit;
1581}
1582
1584{
1585 if (strstr(evt_name, ".sum") != NULL) {
1586 return RunningSum;
1587 }
1588 else if (strstr(evt_name, ".min") != NULL) {
1589 return RunningMin;
1590 }
1591 else if (strstr(evt_name, ".max") != NULL) {
1592 return RunningMax;
1593 }
1594 else {
1595 return SpotValue;
1596 }
1597}
1598
1599int cuptip_control_read(cuptip_control_t state, long long *values)
1600{
1601 COMPDBG("Entering.\n");
1602 int papi_errno, gpu_id, i;
1603 cuptip_gpu_state_t *gpu_ctl = NULL;
1604 CUcontext userCtx = NULL, ctx = NULL;
1605 CUDA_CALL( cuCtxGetCurrentPtr(&userCtx), goto fn_fail_misc);
1606 if (userCtx == NULL) {
1607 CUDART_CALL( cudaFreePtr(NULL), goto fn_fail_misc );
1608 CUDART_CALL( cuCtxGetCurrentPtr(&userCtx), goto fn_fail_misc );
1609 }
1610 unsigned int evt_pos;
1611 long long val;
1612 cuptiu_event_t *evt_rec = NULL;
1613 for (gpu_id = 0; gpu_id < num_gpus; gpu_id++) {
1614 gpu_ctl = &(state->gpu_ctl[gpu_id]);
1615 if (gpu_ctl->event_names->count == 0) {
1616 continue;
1617 }
1618
1619 papi_errno = cuptic_ctxarr_get_ctx(state->info, gpu_id, &ctx);
1620 CUDA_CALL( cuCtxSetCurrentPtr(ctx), goto fn_fail_misc );
1621
1622 CUpti_Profiler_PopRange_Params popRangeParams = {
1623 .structSize = CUpti_Profiler_PopRange_Params_STRUCT_SIZE,
1624 .pPriv = NULL,
1625 .ctx = NULL,
1626 };
1627 CUPTI_CALL( cuptiProfilerPopRangePtr(&popRangeParams), goto fn_fail_misc );
1628
1629 CUpti_Profiler_EndPass_Params endPassParams = {
1630 .structSize = CUpti_Profiler_EndPass_Params_STRUCT_SIZE,
1631 .pPriv = NULL,
1632 .ctx = NULL,
1633 };
1634 CUPTI_CALL( cuptiProfilerEndPassPtr(&endPassParams), goto fn_fail_misc );
1635
1636 CUpti_Profiler_FlushCounterData_Params flushCounterDataParams = {
1637 .structSize = CUpti_Profiler_FlushCounterData_Params_STRUCT_SIZE,
1638 .pPriv = NULL,
1639 .ctx = NULL,
1640 };
1641 CUPTI_CALL( cuptiProfilerFlushCounterDataPtr(&flushCounterDataParams), goto fn_fail_misc );
1642
1643 papi_errno = get_measured_values(gpu_ctl);
1644 if (papi_errno != PAPI_OK) {
1645 goto fn_exit;
1646 }
1647 for (i = 0; i < (int) gpu_ctl->event_names->count; i++) {
1648 papi_errno = cuptiu_event_table_get_item(gpu_ctl->event_names, i, &evt_rec);
1649 if (papi_errno != PAPI_OK) {
1650 goto fn_exit;
1651 }
1652 evt_pos = evt_rec->evt_pos;
1653 val = (long long) evt_rec->value;
1654
1655 if (state->read_count == 0) {
1656 values[evt_pos] = val;
1657 }
1658 else {
1659 switch (get_event_collection_method(evt_rec->name)) {
1660 case RunningSum:
1661 values[evt_pos] += val;
1662 break;
1663 case RunningMin:
1664 values[evt_pos] = values[evt_pos] < val ? values[evt_pos] : val;
1665 break;
1666 case RunningMax:
1667 values[evt_pos] = values[evt_pos] > val ? values[evt_pos] : val;
1668 break;
1669 default:
1670 values[evt_pos] = val;
1671 break;
1672 }
1673 }
1674 }
1675
1676 CUPTI_CALL( cuptiProfilerCounterDataImageInitializePtr(&gpu_ctl->initializeParams), goto fn_fail_misc );
1677 CUPTI_CALL( cuptiProfilerCounterDataImageInitializeScratchBufferPtr(&gpu_ctl->initScratchBufferParams), goto fn_fail_misc );
1678
1679 CUpti_Profiler_BeginPass_Params beginPassParams = {
1680 .structSize = CUpti_Profiler_BeginPass_Params_STRUCT_SIZE,
1681 .pPriv = NULL,
1682 .ctx = NULL,
1683 };
1684 CUPTI_CALL( cuptiProfilerBeginPassPtr(&beginPassParams), goto fn_fail_misc );
1685
1686 char rangeName[64];
1687 sprintf(rangeName, "PAPI_Range_%d", gpu_ctl->gpu_id);
1688 CUpti_Profiler_PushRange_Params pushRangeParams = {
1689 .structSize = CUpti_Profiler_PushRange_Params_STRUCT_SIZE,
1690 .pPriv = NULL,
1691 .ctx = NULL,
1692 .pRangeName = (const char*) &rangeName,
1693 .rangeNameLength = 100,
1694 };
1695 CUPTI_CALL( cuptiProfilerPushRangePtr(&pushRangeParams), goto fn_fail_misc );
1696
1697 }
1698 state->read_count++;
1699fn_exit:
1700 CUDA_CALL( cuCtxSetCurrentPtr(userCtx), );
1701 return papi_errno;
1702fn_fail_misc:
1703 papi_errno = PAPI_EMISC;
1704 goto fn_exit;
1705}
1706
1707int cuptip_control_reset(cuptip_control_t state)
1708{
1709 COMPDBG("Entering.\n");
1710 state->read_count = 0;
1711 return PAPI_OK;
1712}
1713
1715{
1716 COMPDBG("Entering.\n");
1721 return PAPI_OK;
1722}
static const char * event_names[2]
Definition: Gamum.c:27
int i
void * cuptic_load_dynamic_syms(const char *parent_path, const char *dlname, const char *search_subpaths[])
Definition: cupti_common.c:110
void cuptic_disabled_reason_set(const char *msg)
Definition: cupti_common.c:385
cudaError_t(* cudaFreePtr)(void *)
Definition: cupti_common.c:46
CUresult(* cuCtxSetCurrentPtr)(CUcontext)
Definition: cupti_common.c:24
int cuptic_device_release(cuptiu_event_table_t *evt_table)
Definition: cupti_common.c:644
int cuptic_ctxarr_get_ctx(cuptic_info_t info, int gpu_idx, CUcontext *ctx)
Definition: cupti_common.c:566
void * dl_cupti
Definition: cupti_common.c:19
int cuptic_ctxarr_update_current(cuptic_info_t info)
Definition: cupti_common.c:536
int cuptic_device_get_count(int *num_gpus)
Definition: cupti_common.c:303
const char * linked_cudart_path
Definition: cupti_common.c:18
CUresult(* cuCtxGetCurrentPtr)(CUcontext *)
Definition: cupti_common.c:23
CUresult(* cuInitPtr)(unsigned int)
Definition: cupti_common.c:33
int cuptic_device_acquire(cuptiu_event_table_t *evt_table)
Definition: cupti_common.c:629
#define CUDA_CALL(call, handleerror)
Definition: cupti_common.h:58
#define CUDART_CALL(call, handleerror)
Definition: cupti_common.h:68
#define CUPTI_CALL(call, handleerror)
Definition: cupti_common.h:78
#define DLSYM_AND_CHECK(dllib, name)
Definition: cupti_common.h:52
static int nvpw_cuda_metricscontext_create(cuptip_control_t state)
static void free_all_enumerated_metrics(void)
NVPA_Status(* NVPW_RawMetricsConfig_EndPassGroupPtr)(NVPW_RawMetricsConfig_EndPassGroup_Params *params)
static int load_nvpw_sym(void)
NVPA_Status(* NVPW_RawMetricsConfig_SetCounterAvailabilityPtr)(NVPW_RawMetricsConfig_SetCounterAvailability_Params *params)
NVPA_Status(* NVPW_MetricsContext_GetCounterNames_EndPtr)(NVPW_MetricsContext_GetCounterNames_End_Params *pParams)
static int init_all_metrics(void)
static int get_measured_values(cuptip_gpu_state_t *gpu_ctl)
NVPA_Status(* NVPW_RawMetricsConfig_AddMetricsPtr)(NVPW_RawMetricsConfig_AddMetrics_Params *params)
static int initialize_perfworks_api(void)
NVPA_Status(* NVPW_MetricsContext_GetMetricProperties_EndPtr)(NVPW_MetricsContext_GetMetricProperties_End_Params *p)
CUptiResult(* cuptiProfilerCounterDataImageCalculateScratchBufferSizePtr)(CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params *params)
static int get_event_names_rmr(cuptip_gpu_state_t *gpu_ctl)
static int reset_cupti_prof_config_images(cuptip_gpu_state_t *gpu_ctl)
NVPA_Status(* NVPW_MetricsContext_EvaluateToGpuValuesPtr)(NVPW_MetricsContext_EvaluateToGpuValues_Params *params)
NVPA_Status(* NVPW_RawMetricsConfig_DestroyPtr)(NVPW_RawMetricsConfig_Destroy_Params *params)
int cuptip_control_reset(cuptip_control_t state)
CUptiResult(* cuptiProfilerPopRangePtr)(CUpti_Profiler_PopRange_Params *params)
NVPA_Status(* NVPW_CUDA_RawMetricsConfig_CreatePtr)(NVPW_CUDA_RawMetricsConfig_Create_Params *)
NVPA_Status(* NVPW_GetSupportedChipNamesPtr)(NVPW_GetSupportedChipNames_Params *params)
CUptiResult(* cuptiProfilerDisableProfilingPtr)(CUpti_Profiler_DisableProfiling_Params *params)
int cuptip_init(void)
CUptiResult(* cuptiDeviceGetChipNamePtr)(CUpti_Device_GetChipName_Params *params)
int cuptip_control_read(cuptip_control_t state, long long *values)
NVPA_Status(* NVPW_MetricsContext_DestroyPtr)(NVPW_MetricsContext_Destroy_Params *params)
NVPA_Status(* NVPW_RawMetricsConfig_GetConfigImagePtr)(NVPW_RawMetricsConfig_GetConfigImage_Params *params)
static list_metrics_t * avail_events
static int nvpw_cuda_metricscontext_destroy(cuptip_control_t state)
static void * dl_nvpw
static int end_profiling(cuptip_gpu_state_t *gpu_ctl)
int cuptip_event_enum(cuptiu_event_table_t *all_evt_names)
NVPA_Status(* NVPW_MetricsContext_GetMetricNames_EndPtr)(NVPW_MetricsContext_GetMetricNames_End_Params *params)
NVPA_Status(* NVPW_RawMetricsConfig_IsAddMetricsPossiblePtr)(NVPW_RawMetricsConfig_IsAddMetricsPossible_Params *params)
CUptiResult(* cuptiProfilerDeInitializePtr)(CUpti_Profiler_DeInitialize_Params *params)
int cuptip_control_create(cuptiu_event_table_t *event_names, cuptic_info_t thr_info, cuptip_control_t *pstate)
static int initialize_cupti_profiler_api(void)
CUptiResult(* cuptiFinalizePtr)(void)
static enum collection_method_e get_event_collection_method(const char *evt_name)
int cuptip_control_stop(cuptip_control_t state)
NVPA_Status(* NVPW_MetricsContext_GetMetricNames_BeginPtr)(NVPW_MetricsContext_GetMetricNames_Begin_Params *params)
CUptiResult(* cuptiProfilerBeginSessionPtr)(CUpti_Profiler_BeginSession_Params *params)
struct NVPA_MetricsContext NVPA_MetricsContext
CUptiResult(* cuptiProfilerInitializePtr)(CUpti_Profiler_Initialize_Params *params)
static int create_counter_data_image(cuptip_gpu_state_t *gpu_ctl)
CUptiResult(* cuptiProfilerEndPassPtr)(CUpti_Profiler_EndPass_Params *params)
int cuptip_control_start(cuptip_control_t state)
static int num_gpus
NVPA_Status(* NVPW_CounterDataBuilder_DestroyPtr)(NVPW_CounterDataBuilder_Destroy_Params *params)
CUptiResult(* cuptiProfilerPushRangePtr)(CUpti_Profiler_PushRange_Params *params)
NVPA_Status(* NVPW_InitializeHostPtr)(NVPW_InitializeHost_Params *params)
CUptiResult(* cuptiProfilerCounterDataImageCalculateSizePtr)(CUpti_Profiler_CounterDataImage_CalculateSize_Params *params)
int cuptip_event_name_to_descr(const char *evt_name, char *description)
static int begin_profiling(cuptip_gpu_state_t *gpu_ctl)
static int unload_nvpw_sym(void)
static int add_events_per_gpu(cuptip_control_t state, cuptiu_event_table_t *event_names)
static int get_counter_availability(cuptip_gpu_state_t *gpu_ctl)
NVPA_Status(* NVPW_RawMetricsConfig_GetNumPassesPtr)(NVPW_RawMetricsConfig_GetNumPasses_Params *params)
NVPA_Status(* NVPW_CUDA_MetricsContext_CreatePtr)(NVPW_CUDA_MetricsContext_Create_Params *params)
static int get_chip_name(int dev_num, char *chipName)
NVPA_Status(* NVPW_MetricsContext_GetCounterNames_BeginPtr)(NVPW_MetricsContext_GetCounterNames_Begin_Params *pParams)
static int check_num_passes(struct NVPA_RawMetricsConfig *pRawMetricsConfig, int rmr_count, NVPA_RawMetricRequest *rmr, int *num_pass)
CUptiResult(* cuptiProfilerFlushCounterDataPtr)(CUpti_Profiler_FlushCounterData_Params *params)
static int load_cupti_perf_sym(void)
NVPW_CUDA_MetricsContext_Create_Params MCCP_t
NVPA_Status(* NVPW_RawMetricsConfig_BeginPassGroupPtr)(NVPW_RawMetricsConfig_BeginPassGroup_Params *params)
static int event_name_tokenize(const char *name, char *nv_name, int *gpuid)
collection_method_e
@ SpotValue
@ RunningSum
@ RunningMax
@ RunningMin
CUptiResult(* cuptiProfilerCounterDataImageInitializePtr)(CUpti_Profiler_CounterDataImage_Initialize_Params *params)
CUptiResult(* cuptiProfilerEndSessionPtr)(CUpti_Profiler_EndSession_Params *params)
static int retrieve_metric_details(NVPA_MetricsContext *pMetricsContext, const char *nv_name, char *description, int *numDep, NVPA_RawMetricRequest **pRMR)
CUptiResult(* cuptiProfilerSetConfigPtr)(CUpti_Profiler_SetConfig_Params *params)
static int finalize_cupti_profiler_api(void)
NVPA_Status(* NVPW_CounterDataBuilder_GetCounterDataPrefixPtr)(NVPW_CounterDataBuilder_GetCounterDataPrefix_Params *params)
#define NVPW_CALL(call, handleerror)
CUptiResult(* cuptiProfilerUnsetConfigPtr)(CUpti_Profiler_UnsetConfig_Params *params)
NVPA_Status(* NVPW_RawMetricsConfig_GenerateConfigImagePtr)(NVPW_RawMetricsConfig_GenerateConfigImage_Params *params)
NVPA_Status(* NVPW_MetricsContext_SetCounterDataPtr)(NVPW_MetricsContext_SetCounterData_Params *params)
static int metric_get_config_image(cuptip_gpu_state_t *gpu_ctl)
CUptiResult(* cuptiProfilerEnableProfilingPtr)(CUpti_Profiler_EnableProfiling_Params *params)
CUptiResult(* cuptiProfilerCounterDataImageInitializeScratchBufferPtr)(CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params *params)
NVPA_Status(* NVPW_MetricsContext_GetMetricProperties_BeginPtr)(NVPW_MetricsContext_GetMetricProperties_Begin_Params *p)
NVPA_Status(* NVPW_CounterData_GetNumRangesPtr)(NVPW_CounterData_GetNumRanges_Params *params)
running_e
@ True
@ False
static int metric_get_counter_data_prefix_image(cuptip_gpu_state_t *gpu_ctl)
static int control_state_validate(cuptip_control_t state)
CUptiResult(* cuptiProfilerBeginPassPtr)(CUpti_Profiler_BeginPass_Params *params)
static int find_same_chipname(int gpu_id)
NVPA_Status(* NVPW_Profiler_CounterData_GetRangeDescriptionsPtr)(NVPW_Profiler_CounterData_GetRangeDescriptions_Params *params)
int cuptip_control_destroy(cuptip_control_t *pstate)
NVPA_Status(* NVPW_CounterDataBuilder_AddMetricsPtr)(NVPW_CounterDataBuilder_AddMetrics_Params *params)
static int unload_cupti_perf_sym(void)
NVPA_Status(* NVPW_CounterDataBuilder_CreatePtr)(NVPW_CounterDataBuilder_Create_Params *params)
CUptiResult(* cuptiProfilerGetCounterAvailabilityPtr)(CUpti_Profiler_GetCounterAvailability_Params *params)
int cuptip_shutdown(void)
void cuptiu_event_table_destroy(cuptiu_event_table_t **pevt_table)
Definition: cupti_utils.c:159
int cuptiu_event_table_get_item(cuptiu_event_table_t *evt_table, int evt_idx, cuptiu_event_t **record)
Definition: cupti_utils.c:47
int cuptiu_event_table_create_init_capacity(int capacity, int sizeof_rec, cuptiu_event_table_t **pevt_table)
Definition: cupti_utils.c:16
int cuptiu_event_table_insert_record(cuptiu_event_table_t *evt_table, const char *evt_name, unsigned int evt_code, int evt_pos)
Definition: cupti_utils.c:88
int cuptiu_event_table_find_name(cuptiu_event_table_t *evt_table, const char *evt_name, cuptiu_event_t **found_rec)
Definition: cupti_utils.c:147
int cuptiu_event_table_create(int sizeof_rec, cuptiu_event_table_t **pevt_table)
Definition: cupti_utils.c:42
void dummy(void *array)
Definition: do_loops.c:306
char * evt_name(evstock *stock, int index)
Definition: eventstock.c:193
#define PAPI_OK
Definition: f90papi.h:73
#define PAPI_EMULPASS
Definition: f90papi.h:40
#define PAPI_ENOEVNT
Definition: f90papi.h:139
#define PAPI_EINVAL
Definition: f90papi.h:115
#define PAPI_MAX_STR_LEN
Definition: f90papi.h:77
#define PAPI_EMISC
Definition: f90papi.h:122
#define PAPI_2MAX_STR_LEN
Definition: f90papi.h:180
#define PAPI_ECMP
Definition: f90papi.h:214
#define PAPI_ENOMEM
Definition: f90papi.h:16
static long long values[NUM_EVENTS]
Definition: init_fini.c:10
#define ERRDBG(format, args...)
Definition: lcuda_debug.h:30
#define LOGDBG(format, args...)
Definition: lcuda_debug.h:24
#define COMPDBG(format, args...)
Definition: lcuda_debug.h:21
Return codes and api definitions.
bool state
Definition: papi_hl.c:155
#define papi_calloc(a, b)
Definition: papi_memory.h:37
#define papi_free(a)
Definition: papi_memory.h:35
#define papi_malloc(a)
Definition: papi_memory.h:34
#define papi_realloc(a, b)
Definition: papi_memory.h:36
if(file==NULL) goto out
const char * name
Definition: rocs.c:225
int
Definition: sde_internal.h:89
long long int long long
Definition: sde_internal.h:85
uint8_t * data
cuptic_info_t info
enum running_e running
cuptip_gpu_state_t * gpu_ctl
CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params initScratchBufferParams
byte_array_t counterDataScratchBuffer
NVPA_RawMetricRequest * rmr
byte_array_t configImage
byte_array_t counterDataImagePrefix
CUpti_Profiler_CounterDataImage_Initialize_Params initializeParams
cuptiu_event_table_t * event_names
byte_array_t counterAvailabilityImage
MCCP_t * pmetricsContextCreateParams
byte_array_t counterDataImage
CUpti_Profiler_CounterDataImageOptions counterDataImageOptions
char name[PAPI_2MAX_STR_LEN]
Definition: cupti_utils.h:13
char desc[PAPI_2MAX_STR_LEN]
Definition: cupti_utils.h:17
unsigned int evt_pos
Definition: cupti_utils.h:15
unsigned int evt_code
Definition: cupti_utils.h:14
unsigned int count
Definition: cupti_utils.h:22
char chip_name[32]
cuptiu_event_table_t * nv_metrics
MCCP_t * pmetricsContextCreateParams