PAPI 7.1.0.0
Loading...
Searching...
No Matches
linux-cuda.c File Reference

This file implements a PAPI component that enables PAPI-C to access hardware monitoring counters for NVIDIA GPU devices through the CuPTI library. More...

Include dependency graph for linux-cuda.c:

Go to the source code of this file.

Data Structures

struct  cuda_ctl_t
 

Macros

#define PAPI_CUDA_MPX_COUNTERS   512
 
#define PAPI_CUDA_MAX_COUNTERS   30
 

Functions

static int cuda_init_component (int cidx)
 
static int cuda_shutdown_component (void)
 
static int cuda_init_thread (hwd_context_t *ctx)
 
static int cuda_shutdown_thread (hwd_context_t *ctx)
 
static int cuda_ntv_enum_events (unsigned int *event_code, int modifier)
 
static int cuda_ntv_code_to_name (unsigned int event_code, char *name, int len)
 
static int cuda_ntv_name_to_code (const char *name, unsigned int *event_code)
 
static int cuda_ntv_code_to_descr (unsigned int event_code, char *descr, int len)
 
static int cuda_init_control_state (hwd_control_state_t *ctl)
 
static int cuda_set_domain (hwd_control_state_t *ctrl, int domain)
 
static int cuda_update_control_state (hwd_control_state_t *ctl, NativeInfo_t *ntv_info, int ntv_count, hwd_context_t *ctx)
 
static int cuda_cleanup_eventset (hwd_control_state_t *ctl)
 
static int cuda_start (hwd_context_t *ctx, hwd_control_state_t *ctl)
 
static int cuda_stop (hwd_context_t *ctx, hwd_control_state_t *ctl)
 
static int cuda_read (hwd_context_t *ctx, hwd_control_state_t *ctl, long long **val, int flags)
 
static int cuda_reset (hwd_context_t *ctx, hwd_control_state_t *ctl)
 
static int cuda_init_private (void)
 
static int check_n_initialize (void)
 
static int cuda_ntv_code_to_descr (unsigned int event_code, char *descr, int __attribute__((unused)) len)
 
static int cuda_init_thread (hwd_context_t __attribute__((unused)) *ctx)
 
static int cuda_shutdown_thread (hwd_context_t __attribute__((unused)) *ctx)
 
static int cuda_init_control_state (hwd_control_state_t __attribute__((unused)) *ctl)
 
static int cuda_set_domain (hwd_control_state_t __attribute__((unused)) *ctrl, int domain)
 
static int cuda_update_control_state (hwd_control_state_t *ctl, NativeInfo_t *ntv_info, int ntv_count, __attribute__((unused)) hwd_context_t *ctx)
 
static int cuda_start (hwd_context_t __attribute__((unused)) *ctx, hwd_control_state_t *ctl)
 
int cuda_stop (hwd_context_t __attribute__((unused)) *ctx, hwd_control_state_t *ctl)
 
static int cuda_read (hwd_context_t __attribute__((unused)) *ctx, hwd_control_state_t *ctl, long long **val, int __attribute__((unused)) flags)
 
static int cuda_reset (hwd_context_t __attribute__((unused)) *ctx, hwd_control_state_t *ctl)
 

Variables

papi_vector_t _cuda_vector
 
ntv_event_table_t global_event_names
 

Detailed Description

Author
Anustuv Pal anust.nosp@m.uv@i.nosp@m.cl.ut.nosp@m.k.ed.nosp@m.u (updated in 2023, redesigned with multi-threading support.)
Tony Castaldo tonyc.nosp@m.asta.nosp@m.ldo@i.nosp@m.cl.u.nosp@m.tk.ed.nosp@m.u (updated in 08/2019, to make counters accumulate.)
Tony Castaldo tonyc.nosp@m.asta.nosp@m.ldo@i.nosp@m.cl.u.nosp@m.tk.ed.nosp@m.u (updated in 2018, to use batch reads and support nvlink metrics.
Asim YarKhan yarkh.nosp@m.an@i.nosp@m.cl.ut.nosp@m.k.ed.nosp@m.u (updated in 2017 to support CUDA metrics)
Asim YarKhan yarkh.nosp@m.an@i.nosp@m.cl.ut.nosp@m.k.ed.nosp@m.u (updated in 2015 for multiple CUDA contexts/devices)
Heike Jagode (First version, in collaboration with Robert Dietrich, TU Dresden) jagod.nosp@m.e@ic.nosp@m.l.utk.nosp@m..edu

The open source software license for PAPI conforms to the BSD License template.

Definition in file linux-cuda.c.

Macro Definition Documentation

◆ PAPI_CUDA_MAX_COUNTERS

#define PAPI_CUDA_MAX_COUNTERS   30

Definition at line 58 of file linux-cuda.c.

◆ PAPI_CUDA_MPX_COUNTERS

#define PAPI_CUDA_MPX_COUNTERS   512

Definition at line 57 of file linux-cuda.c.

Function Documentation

◆ check_n_initialize()

static int check_n_initialize ( void  )
static

Definition at line 170 of file linux-cuda.c.

171{
173 int papi_errno = PAPI_OK;
176 ) {
177 papi_errno = cuda_init_private();
178 }
179
181 return papi_errno;
182}
#define PAPI_OK
Definition: f90papi.h:73
#define PAPI_EDELAY_INIT
Definition: f90papi.h:271
papi_vector_t _cuda_vector
Definition: linux-cuda.c:30
static int cuda_init_private(void)
Definition: linux-cuda.c:144
#define COMPONENT_LOCK
Definition: papi_internal.h:90
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
inline_static int _papi_hwi_lock(int lck)
Definition: threads.h:69
inline_static int _papi_hwi_unlock(int lck)
Definition: threads.h:83
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuda_cleanup_eventset()

static int cuda_cleanup_eventset ( hwd_control_state_t ctl)
static

Definition at line 360 of file linux-cuda.c.

361{
362 COMPDBG("Entering.\n");
363 cuda_ctl_t *control = (cuda_ctl_t *) ctl;
364 int papi_errno = PAPI_OK;
365 if (control->cupti_ctl) {
366 papi_errno += cuptid_control_destroy(&(control->cupti_ctl));
367 }
368 if (control->info) {
369 papi_errno += cuptid_thread_info_destroy(&(control->info));
370 }
371 if (papi_errno != PAPI_OK) {
372 return PAPI_ECMP;
373 }
374 return PAPI_OK;
375}
int cuptid_control_destroy(cuptid_ctl_t *pcupti_ctl)
int cuptid_thread_info_destroy(cuptid_info_t *info)
#define PAPI_ECMP
Definition: f90papi.h:214
#define COMPDBG(format, args...)
Definition: lcuda_debug.h:21
cuptid_ctl_t cupti_ctl
Definition: linux-cuda.c:65
cuptid_info_t info
Definition: linux-cuda.c:64
Here is the call graph for this function:

◆ cuda_init_component()

static int cuda_init_component ( int  cidx)
static

Definition at line 114 of file linux-cuda.c.

115{
116 COMPDBG("Entering with component idx: %d\n", cidx);
117
121
125 "Not initialized. Access component events to initialize it.");
126 return PAPI_EDELAY_INIT;
127}
unsigned int _cuda_lock
Definition: cupti_common.c:21
#define PAPI_NUM_LOCK
Definition: f90papi.h:80
#define NUM_INNER_LOCK
Definition: papi_lock.h:17
static int cidx
char disabled_reason[PAPI_HUGE_STR_LEN]
Definition: papi.h:634

◆ cuda_init_control_state() [1/2]

static int cuda_init_control_state ( hwd_control_state_t ctl)
static

◆ cuda_init_control_state() [2/2]

static int cuda_init_control_state ( hwd_control_state_t __attribute__((unused)) *  ctl)
static

Definition at line 294 of file linux-cuda.c.

295{
296 COMPDBG("Entering.\n");
297 return PAPI_OK;
298}

◆ cuda_init_private()

static int cuda_init_private ( void  )
static

Definition at line 144 of file linux-cuda.c.

145{
146 int papi_errno = PAPI_OK;
147 const char *disabled_reason;
148 COMPDBG("Entering.\n");
149
151 if (papi_errno != PAPI_OK) {
152 goto fn_exit;
153 }
154
155 papi_errno = cuptid_init();
156 if (papi_errno != PAPI_OK) {
157 cuptid_disabled_reason_get(&disabled_reason);
158 sprintf(_cuda_vector.cmp_info.disabled_reason, disabled_reason);
159 _cuda_vector.cmp_info.disabled = papi_errno;
160 goto fn_exit;
161 }
162
165
166fn_exit:
167 return papi_errno;
168}
int cuptid_event_table_create(ntv_event_table_t *evt_table)
int cuptid_init(void)
void cuptid_disabled_reason_get(const char **msg)
ntv_event_table_t global_event_names
Definition: linux-cuda.c:32
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuda_init_thread() [1/2]

static int cuda_init_thread ( hwd_context_t ctx)
static

◆ cuda_init_thread() [2/2]

static int cuda_init_thread ( hwd_context_t __attribute__((unused)) *  ctx)
static

Definition at line 284 of file linux-cuda.c.

285{
286 return PAPI_OK;
287}

◆ cuda_ntv_code_to_descr() [1/2]

static int cuda_ntv_code_to_descr ( unsigned int  event_code,
char *  descr,
int __attribute__((unused))  len 
)
static

Definition at line 259 of file linux-cuda.c.

260{
262 int papi_errno;
263 papi_errno = check_n_initialize();
264 if (papi_errno != PAPI_OK) {
265 goto fn_exit;
266 }
267
271 if (papi_errno != PAPI_OK) {
272 goto fn_exit;
273 }
274
275 papi_errno = cuda_ntv_code_to_name(event_code, evt_name, PAPI_2MAX_STR_LEN);
276 if (papi_errno != PAPI_OK) {
277 goto fn_exit;
278 }
280fn_exit:
281 return papi_errno;
282}
int cuptid_event_enum(cuptiu_event_table_t *all_evt_names)
int cuptid_event_name_to_descr(char *evt_name, char *descr)
char * evt_name(evstock *stock, int index)
Definition: eventstock.c:193
#define PAPI_2MAX_STR_LEN
Definition: f90papi.h:180
static int check_n_initialize(void)
Definition: linux-cuda.c:170
static int cuda_ntv_code_to_name(unsigned int event_code, char *name, int len)
Definition: linux-cuda.c:244
char * descr
Here is the call graph for this function:

◆ cuda_ntv_code_to_descr() [2/2]

static int cuda_ntv_code_to_descr ( unsigned int  event_code,
char *  descr,
int  len 
)
static

◆ cuda_ntv_code_to_name()

static int cuda_ntv_code_to_name ( unsigned int  event_code,
char *  name,
int  len 
)
static

Definition at line 244 of file linux-cuda.c.

245{
246 int papi_errno = check_n_initialize();
247 if (papi_errno != PAPI_OK) {
248 return papi_errno;
249 }
250 ntv_event_t evt_rec;
251 papi_errno = cuptid_event_table_get_item(global_event_names, event_code, &evt_rec);
252 if (papi_errno != PAPI_OK) {
253 return PAPI_ENOEVNT;
254 }
255 strncpy(name, evt_rec->name, len);
256 return PAPI_OK;
257}
int cuptid_event_table_get_item(ntv_event_table_t evt_table, unsigned int evt_idx, ntv_event_t *record)
#define PAPI_ENOEVNT
Definition: f90papi.h:139
const char * name
Definition: rocs.c:225
char * name
Definition: roc_profiler.c:43
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuda_ntv_enum_events()

static int cuda_ntv_enum_events ( unsigned int event_code,
int  modifier 
)
static

Definition at line 184 of file linux-cuda.c.

185{
186 int papi_errno = check_n_initialize();
187 if (papi_errno != PAPI_OK) {
188 goto fn_exit;
189 }
190
192 LOCKDBG("Locked COMPONENT_LOCK to enumerate all events.\n");
195 LOCKDBG("Unlocked COMPONENT_LOCK.\n");
196 if (papi_errno != PAPI_OK) {
197 goto fn_exit;
198 }
199
201 switch (modifier) {
202 case PAPI_ENUM_FIRST:
203 *event_code = 0;
204 papi_errno = PAPI_OK;
205 break;
206 case PAPI_ENUM_EVENTS:
207 if (global_event_names->count == 0) {
208 papi_errno = PAPI_ENOEVNT;
209 } else if (*event_code < global_event_names->count - 1) {
210 *event_code = *event_code + 1;
211 papi_errno = PAPI_OK;
212 } else {
213 papi_errno = PAPI_ENOEVNT;
214 }
215 break;
216 default:
217 papi_errno = PAPI_EINVAL;
218 }
219fn_exit:
220 return papi_errno;
221}
static long count
#define PAPI_ENUM_EVENTS
Definition: f90papi.h:224
#define PAPI_ENUM_FIRST
Definition: f90papi.h:85
#define PAPI_EINVAL
Definition: f90papi.h:115
#define LOCKDBG(format, args...)
Definition: lcuda_debug.h:27
Here is the call graph for this function:

◆ cuda_ntv_name_to_code()

static int cuda_ntv_name_to_code ( const char *  name,
unsigned int event_code 
)
static

Definition at line 223 of file linux-cuda.c.

224{
225 int papi_errno = check_n_initialize();
226 if (papi_errno != PAPI_OK) {
227 goto fn_exit;
228 }
229 ntv_event_t evt_rec;
231 if (papi_errno == PAPI_OK) {
232 *event_code = evt_rec->evt_code;
233 }
234 else {
236 *event_code = global_event_names->count;
239 }
240fn_exit:
241 return papi_errno;
242}
int cuptid_event_table_insert_record(ntv_event_table_t evt_table, const char *evt_name, unsigned int evt_code, int evt_pos)
int cuptid_event_table_find_name(ntv_event_table_t evt_table, const char *evt_name, ntv_event_t *found_rec)
Here is the call graph for this function:

◆ cuda_read() [1/2]

static int cuda_read ( hwd_context_t ctx,
hwd_control_state_t ctl,
long long **  val,
int  flags 
)
static

◆ cuda_read() [2/2]

static int cuda_read ( hwd_context_t __attribute__((unused)) *  ctx,
hwd_control_state_t ctl,
long long **  val,
int __attribute__((unused))  flags 
)
static

Definition at line 417 of file linux-cuda.c.

418{
419 COMPDBG("Entering.\n");
420 cuda_ctl_t *control = (cuda_ctl_t *) ctl;
421 int papi_errno;
422
423 papi_errno = cuptid_control_read( control->cupti_ctl, (long long *) &(control->values) );
424 if (papi_errno != PAPI_OK) {
425 goto fn_exit;
426 }
427 *val = control->values;
428
429fn_exit:
430 return papi_errno;
431}
int cuptid_control_read(cuptid_ctl_t cupti_ctl, long long *values)
long long values[PAPI_CUDA_MAX_COUNTERS]
Definition: linux-cuda.c:63
Here is the call graph for this function:

◆ cuda_reset() [1/2]

static int cuda_reset ( hwd_context_t ctx,
hwd_control_state_t ctl 
)
static

◆ cuda_reset() [2/2]

static int cuda_reset ( hwd_context_t __attribute__((unused)) *  ctx,
hwd_control_state_t ctl 
)
static

Definition at line 433 of file linux-cuda.c.

434{
435 cuda_ctl_t *control = (cuda_ctl_t *) ctl;
436 int i;
437 for (i = 0; i < control->events_count; i++) {
438 control->values[i] = 0;
439 }
440 return cuptid_control_reset( control->cupti_ctl );
441}
int i
int cuptid_control_reset(cuptid_ctl_t cupti_ctl)
int events_count
Definition: linux-cuda.c:61
Here is the call graph for this function:

◆ cuda_set_domain() [1/2]

static int cuda_set_domain ( hwd_control_state_t ctrl,
int  domain 
)
static

◆ cuda_set_domain() [2/2]

static int cuda_set_domain ( hwd_control_state_t __attribute__((unused)) *  ctrl,
int  domain 
)
static

Definition at line 300 of file linux-cuda.c.

301{
302 COMPDBG("Entering\n");
303 if((PAPI_DOM_USER & domain) || (PAPI_DOM_KERNEL & domain) || (PAPI_DOM_OTHER & domain) || (PAPI_DOM_ALL & domain))
304 return (PAPI_OK);
305 else
306 return (PAPI_EINVAL);
307}
#define PAPI_DOM_USER
Definition: f90papi.h:174
#define PAPI_DOM_OTHER
Definition: f90papi.h:21
#define PAPI_DOM_KERNEL
Definition: f90papi.h:254
#define PAPI_DOM_ALL
Definition: f90papi.h:261

◆ cuda_shutdown_component()

static int cuda_shutdown_component ( void  )
static

Definition at line 129 of file linux-cuda.c.

130{
131 COMPDBG("Entering.\n");
133
136 return PAPI_OK;
137 }
138
140
141 return cuptid_shutdown();
142}
void cuptid_event_table_destroy(ntv_event_table_t *evt_table)
int cuptid_shutdown(void)
Here is the call graph for this function:

◆ cuda_shutdown_thread() [1/2]

static int cuda_shutdown_thread ( hwd_context_t ctx)
static

◆ cuda_shutdown_thread() [2/2]

static int cuda_shutdown_thread ( hwd_context_t __attribute__((unused)) *  ctx)
static

Definition at line 289 of file linux-cuda.c.

290{
291 return PAPI_OK;
292}

◆ cuda_start() [1/2]

static int cuda_start ( hwd_context_t ctx,
hwd_control_state_t ctl 
)
static

◆ cuda_start() [2/2]

static int cuda_start ( hwd_context_t __attribute__((unused)) *  ctx,
hwd_control_state_t ctl 
)
static

Definition at line 377 of file linux-cuda.c.

378{
379 COMPDBG("Entering.\n");
380 int papi_errno, i;
381
382 cuda_ctl_t *control = (cuda_ctl_t *) ctl;
383 for (i = 0; i < control->events_count; i++) {
384 control->values[i] = 0;
385 }
386 ntv_event_table_t select_names;
387 papi_errno = cuptid_event_table_select_by_idx(global_event_names, control->events_count, control->events_id, &select_names);
388 if (papi_errno != PAPI_OK) {
389 goto fn_exit;
390 }
391 papi_errno = cuptid_control_create(select_names, control->info, &(control->cupti_ctl));
392 if (papi_errno != PAPI_OK) {
393 goto fn_exit;
394 }
395
396 papi_errno = cuptid_control_start( control->cupti_ctl );
397
398fn_exit:
399 cuptid_event_table_destroy(&select_names);
400 return papi_errno;
401}
int cuptid_control_create(ntv_event_table_t event_names, cuptid_info_t info, cuptid_ctl_t *pcupti_ctl)
int cuptid_control_start(cuptid_ctl_t cupti_ctl)
int cuptid_event_table_select_by_idx(ntv_event_table_t evt_table, int count, int *idcs, ntv_event_table_t *pevt_names)
int events_id[PAPI_CUDA_MAX_COUNTERS]
Definition: linux-cuda.c:62
Here is the call graph for this function:

◆ cuda_stop() [1/2]

static int cuda_stop ( hwd_context_t ctx,
hwd_control_state_t ctl 
)
static

◆ cuda_stop() [2/2]

int cuda_stop ( hwd_context_t __attribute__((unused)) *  ctx,
hwd_control_state_t ctl 
)

Definition at line 403 of file linux-cuda.c.

404{
405 COMPDBG("Entering.\n");
406 cuda_ctl_t *control = (cuda_ctl_t *) ctl;
407 int papi_errno;
408 papi_errno = cuptid_control_stop( control->cupti_ctl );
409 if (papi_errno != PAPI_OK) {
410 goto fn_exit;
411 }
412 papi_errno = cuptid_control_destroy( &(control->cupti_ctl) );
413fn_exit:
414 return papi_errno;
415}
int cuptid_control_stop(cuptid_ctl_t cupti_ctl)
Here is the call graph for this function:

◆ cuda_update_control_state() [1/2]

static int cuda_update_control_state ( hwd_control_state_t ctl,
NativeInfo_t ntv_info,
int  ntv_count,
__attribute__((unused)) hwd_context_t ctx 
)
static

Definition at line 309 of file linux-cuda.c.

312 {
313 COMPDBG("Entering with events_count %d.\n", ntv_count);
314 int i, papi_errno;
315 papi_errno = check_n_initialize();
316 if (papi_errno != PAPI_OK) {
317 return papi_errno;
318 }
319 if (ntv_count == 0) {
320 return PAPI_OK;
321 }
322
323 cuda_ctl_t *control = (cuda_ctl_t *) ctl;
324 if (control->info == NULL) {
325 papi_errno = cuptid_thread_info_create(&(control->info));
326 if (papi_errno != PAPI_OK) {
327 goto fn_exit;
328 }
329 }
330 control->events_count = ntv_count;
331
332 if (ntv_count > PAPI_CUDA_MAX_COUNTERS) {
333 ERRDBG("Too many events added.\n");
334 papi_errno = PAPI_ECMP;
335 goto fn_exit;
336 }
337 for (i=0; i<ntv_count; i++) {
338 control->events_id[i] = ntv_info[i].ni_event;
339 ntv_info[i].ni_position = i;
340 }
341
342 void *tmp_context = NULL;
343 ntv_event_table_t select_names;
344 papi_errno = cuptid_event_table_select_by_idx(global_event_names, control->events_count, control->events_id, &select_names);
345 if (papi_errno != PAPI_OK) {
346 goto fn_exit;
347 }
348 papi_errno = cuptid_control_create(select_names, control->info, &tmp_context);
349 if (papi_errno != PAPI_OK) {
350 cuptid_control_destroy(&tmp_context);
351 goto fn_exit;
352 }
353 papi_errno = cuptid_control_destroy(&tmp_context);
354
355fn_exit:
356 cuptid_event_table_destroy(&select_names);
357 return papi_errno;
358}
int cuptid_thread_info_create(cuptid_info_t *info)
#define ERRDBG(format, args...)
Definition: lcuda_debug.h:30
#define PAPI_CUDA_MAX_COUNTERS
Definition: linux-cuda.c:58
Here is the call graph for this function:

◆ cuda_update_control_state() [2/2]

static int cuda_update_control_state ( hwd_control_state_t ctl,
NativeInfo_t ntv_info,
int  ntv_count,
hwd_context_t ctx 
)
static

Variable Documentation

◆ _cuda_vector

papi_vector_t _cuda_vector

Definition at line 30 of file linux-cuda.c.

◆ global_event_names

ntv_event_table_t global_event_names

Definition at line 32 of file linux-cuda.c.