PAPI 7.1.0.0
Loading...
Searching...
No Matches
roc_profiler.c
Go to the documentation of this file.
1
8#include <rocprofiler.h>
9#include "roc_profiler.h"
10#include "roc_common.h"
11#include "htable.h"
12
25#define EVENTS_WIDTH (sizeof(uint64_t) * 8)
26#define DEVICE_WIDTH ( 7)
27#define INSTAN_WIDTH ( 7)
28#define QLMASK_WIDTH ( 2)
29#define NAMEID_WIDTH (12)
30#define UNUSED_WIDTH (EVENTS_WIDTH - DEVICE_WIDTH - INSTAN_WIDTH - QLMASK_WIDTH - NAMEID_WIDTH)
31#define DEVICE_SHIFT (EVENTS_WIDTH - UNUSED_WIDTH - DEVICE_WIDTH)
32#define INSTAN_SHIFT (DEVICE_SHIFT - INSTAN_WIDTH)
33#define QLMASK_SHIFT (INSTAN_SHIFT - QLMASK_WIDTH)
34#define NAMEID_SHIFT (QLMASK_SHIFT - NAMEID_WIDTH)
35#define DEVICE_MASK ((0xFFFFFFFFFFFFFFFF >> (EVENTS_WIDTH - DEVICE_WIDTH)) << DEVICE_SHIFT)
36#define INSTAN_MASK ((0xFFFFFFFFFFFFFFFF >> (EVENTS_WIDTH - INSTAN_WIDTH)) << INSTAN_SHIFT)
37#define QLMASK_MASK ((0xFFFFFFFFFFFFFFFF >> (EVENTS_WIDTH - QLMASK_WIDTH)) << QLMASK_SHIFT)
38#define NAMEID_MASK ((0xFFFFFFFFFFFFFFFF >> (EVENTS_WIDTH - NAMEID_WIDTH)) << NAMEID_SHIFT)
39#define DEVICE_FLAG (0x2)
40#define INSTAN_FLAG (0x1)
41
42typedef struct {
43 char *name;
44 char *descr;
48
49typedef struct ntv_event_table {
51 int count;
53
54struct rocd_ctx {
55 union {
56 struct {
57 int state;
58 uint64_t *events_id;
59 long long *counters;
64 struct {
65 int state;
66 uint64_t *events_id;
67 long long *counters;
68 rocprofiler_feature_t *features;
69 int feature_count;
70 rocprofiler_t **contexts;
72 rocprofiler_properties_t *ctx_prop;
74 } u;
75};
76
77typedef struct {
78 int device;
80 int flags;
81 int nameid;
83
84unsigned int rocm_prof_mode;
85unsigned int _rocm_lock;
86
87/* rocprofiler function pointers */
88static hsa_status_t (*rocp_get_info_p)(const hsa_agent_t *, rocprofiler_info_kind_t, void *);
89static hsa_status_t (*rocp_iterate_info_p)(const hsa_agent_t *, rocprofiler_info_kind_t, hsa_status_t (*)(const rocprofiler_info_data_t, void *), void *);
90static hsa_status_t (*rocp_error_string_p)(const char **);
91
92/* for sampling mode */
93static hsa_status_t (*rocp_open_p)(hsa_agent_t, rocprofiler_feature_t *, uint32_t, rocprofiler_t **, uint32_t, rocprofiler_properties_t *);
94static hsa_status_t (*rocp_close_p)(rocprofiler_t *);
95static hsa_status_t (*rocp_group_count_p)(const rocprofiler_t *, uint32_t *);
96static hsa_status_t (*rocp_start_p)(rocprofiler_t *, uint32_t);
97static hsa_status_t (*rocp_read_p)(rocprofiler_t *, uint32_t);
98static hsa_status_t (*rocp_stop_p)(rocprofiler_t *, uint32_t);
99static hsa_status_t (*rocp_get_group_p)(rocprofiler_t *, uint32_t, rocprofiler_group_t *);
100static hsa_status_t (*rocp_get_data_p)(rocprofiler_t *, uint32_t);
101static hsa_status_t (*rocp_group_get_data_p)(rocprofiler_group_t *);
102static hsa_status_t (*rocp_get_metrics_p)(const rocprofiler_t *);
103static hsa_status_t (*rocp_reset_p)(rocprofiler_t *, uint32_t);
104
105/* for intercept mode */
106static hsa_status_t (*rocp_pool_open_p)(hsa_agent_t, rocprofiler_feature_t *, uint32_t, rocprofiler_pool_t **, uint32_t, rocprofiler_pool_properties_t *);
107static hsa_status_t (*rocp_pool_close_p)(rocprofiler_pool_t *);
108static hsa_status_t (*rocp_pool_fetch_p)(rocprofiler_pool_t *, rocprofiler_pool_entry_t *);
109static hsa_status_t (*rocp_pool_flush_p)(rocprofiler_pool_t *);
110static hsa_status_t (*rocp_set_queue_cbs_p)(rocprofiler_queue_callbacks_t, void *);
111static hsa_status_t (*rocp_start_queue_cbs_p)(void);
112static hsa_status_t (*rocp_stop_queue_cbs_p)(void);
113static hsa_status_t (*rocp_remove_queue_cbs_p)(void);
114
119static int load_rocp_sym(void);
120static int init_rocp_env(void);
121static int init_event_table(void);
122static int unload_rocp_sym(void);
123static int sampling_ctx_open(uint64_t *, int, rocp_ctx_t *);
124static int intercept_ctx_open(uint64_t *, int, rocp_ctx_t *);
125static int sampling_ctx_close(rocp_ctx_t);
126static int intercept_ctx_close(rocp_ctx_t);
127static int sampling_ctx_start(rocp_ctx_t);
128static int intercept_ctx_start(rocp_ctx_t);
129static int sampling_ctx_stop(rocp_ctx_t);
130static int intercept_ctx_stop(rocp_ctx_t);
131static int sampling_ctx_read(rocp_ctx_t, long long **);
132static int intercept_ctx_read(rocp_ctx_t, long long **);
133static int sampling_ctx_reset(rocp_ctx_t);
134static int intercept_ctx_reset(rocp_ctx_t);
135static int sampling_shutdown(void);
136static int intercept_shutdown(void);
137static int evt_code_to_name(uint64_t event_code, char *name, int len);
138static int evt_id_create(event_info_t *info, uint64_t *event_id);
139static int evt_id_to_info(uint64_t event_id, event_info_t *info);
140static int evt_name_to_device(const char *name, int *device);
141static int evt_name_to_instance(const char *name, int *instance);
142static int evt_name_to_basename(const char *name, char *base, int len);
143
144static void *rocp_dlp = NULL;
147static void *htable;
148static void *htable_intercept;
149
150/* rocp_init_environment - initialize ROCm environment variables */
151int
153{
154 return init_rocp_env();
155}
156
157/* rocp_init - load runtime and profiling symbols, init runtime and profiling */
158int
160{
161 int papi_errno = PAPI_OK;
162 SUBDBG("ENTER\n");
163
164 papi_errno = load_rocp_sym();
165 if (papi_errno != PAPI_OK) {
166 goto fn_fail;
167 }
168
170
173 }
174
175 papi_errno = init_event_table();
176 if (papi_errno != PAPI_OK) {
177 (*hsa_shut_down_p)();
178 goto fn_fail;
179 }
180
182
183 fn_exit:
184 SUBDBG("EXIT: %s\n", PAPI_strerror(papi_errno));
185 return papi_errno;
186 fn_fail:
188 goto fn_exit;
189}
190
191/* rocp_evt_enum - enumerate native events */
192int
193rocp_evt_enum(uint64_t *event_code, int modifier)
194{
195 int papi_errno = PAPI_OK;
196 event_info_t info;
197 SUBDBG("ENTER: event_code: %lu, modifier: %d\n", *event_code, modifier);
198
199
200 switch(modifier) {
201 case PAPI_ENUM_FIRST:
202 if (ntv_table_p->count == 0) {
203 papi_errno = PAPI_ENOEVNT;
204 break;
205 }
206 info.device = 0;
207 info.instance = 0;
208 info.flags = 0;
209 info.nameid = 0;
210 papi_errno = evt_id_create(&info, event_code);
211 break;
212 case PAPI_ENUM_EVENTS:
213 papi_errno = evt_id_to_info(*event_code, &info);
214 if (papi_errno != PAPI_OK) {
215 break;
216 }
217 if (ntv_table_p->count > info.nameid + 1) {
218 info.device = 0;
219 info.instance = 0;
220 info.flags = 0;
221 info.nameid++;
222 papi_errno = evt_id_create(&info, event_code);
223 break;
224 }
225 papi_errno = PAPI_END;
226 break;
228 papi_errno = evt_id_to_info(*event_code, &info);
229 if (papi_errno != PAPI_OK) {
230 break;
231 }
232 if (info.flags == 0) {
233 info.device = 0;
234 info.instance = 0;
235 info.flags = DEVICE_FLAG;
236 papi_errno = evt_id_create(&info, event_code);
237 break;
238 }
239 if (info.flags & DEVICE_FLAG) {
240 if (ntv_table_p->events[info.nameid].instances > 1) {
241 info.device = 0;
242 info.instance = 0;
243 info.flags = INSTAN_FLAG;
244 papi_errno = evt_id_create(&info, event_code);
245 break;
246 }
247 }
248 papi_errno = PAPI_END;
249 break;
250 default:
251 papi_errno = PAPI_EINVAL;
252 }
253
254 SUBDBG("EXIT: %s\n", PAPI_strerror(papi_errno));
255 return papi_errno;
256}
257
258/* rocp_evt_code_to_descr - return descriptor string for event_code */
259int
260rocp_evt_code_to_descr(uint64_t event_code, char *descr, int len)
261{
262 int papi_errno;
263
264 event_info_t info;
265 papi_errno = evt_id_to_info(event_code, &info);
266 if (papi_errno != PAPI_OK) {
267 return papi_errno;
268 }
269
270 snprintf(descr, (size_t) len, "%s", ntv_table_p->events[info.nameid].descr);
271 return papi_errno;
272}
273
274/* rocp_evt_name_to_code - convert native event name to code */
275int
276rocp_evt_name_to_code(const char *name, uint64_t *event_code)
277{
278 int papi_errno = PAPI_OK;
279 int htable_errno;
280 SUBDBG("ENTER: name: %s, event_code: %p\n", name, event_code);
281
282 int device;
283 papi_errno = evt_name_to_device(name, &device);
284 if (papi_errno != PAPI_OK) {
285 goto fn_exit;
286 }
287
288 int instance;
289 papi_errno = evt_name_to_instance(name, &instance);
290 if (papi_errno != PAPI_OK) {
291 goto fn_exit;
292 }
293
294 char base[PAPI_MAX_STR_LEN] = { 0 };
295 papi_errno = evt_name_to_basename(name, base, PAPI_MAX_STR_LEN);
296 if (papi_errno != PAPI_OK) {
297 goto fn_exit;
298 }
299
300 ntv_event_t *event;
301 htable_errno = htable_find(htable, base, (void **) &event);
302 if (htable_errno != HTABLE_SUCCESS) {
303 papi_errno = (htable_errno == HTABLE_ENOVAL) ? PAPI_ENOEVNT : PAPI_ECMP;
304 goto fn_exit;
305 }
306
307 int flags = (event->instances > 1) ? (DEVICE_FLAG | INSTAN_FLAG) : DEVICE_FLAG;
308 int nameid = (int) (event - ntv_table_p->events);
309 event_info_t info = { device, instance, flags, nameid };
310 papi_errno = evt_id_create(&info, event_code);
311 if (papi_errno != PAPI_OK) {
312 goto fn_exit;
313 }
314
315 papi_errno = evt_id_to_info(*event_code, &info);
316
317 fn_exit:
318 SUBDBG("EXIT: %s\n", PAPI_strerror(papi_errno));
319 return papi_errno;
320}
321
322/* rocp_evt_code_to_name - convert native event code to name */
323int
324rocp_evt_code_to_name(uint64_t event_code, char *name, int len)
325{
326 return evt_code_to_name(event_code, name, len);
327}
328
329/* rocp_evt_code_to_info - get event info */
330int
331rocp_evt_code_to_info(uint64_t event_code, PAPI_event_info_t *info)
332{
333 int papi_errno;
334
335 event_info_t inf;
336 papi_errno = evt_id_to_info(event_code, &inf);
337 if (papi_errno != PAPI_OK) {
338 return papi_errno;
339 }
340
341 switch (inf.flags) {
342 case 0:
343 sprintf(info->symbol, "%s", ntv_table_p->events[inf.nameid].name);
344 sprintf(info->long_descr, "%s", ntv_table_p->events[inf.nameid].descr);
345 break;
346 case (DEVICE_FLAG | INSTAN_FLAG):
347 {
348 int i;
349 char devices[PAPI_MAX_STR_LEN] = { 0 };
350 for (i = 0; i < device_table_p->count; ++i) {
352 sprintf(devices + strlen(devices), "%i,", i);
353 }
354 }
355 *(devices + strlen(devices) - 1) = 0;
356 sprintf(info->symbol, "%s:device=%i:instance=%i", ntv_table_p->events[inf.nameid].name, inf.device, inf.instance);
357 sprintf(info->long_descr, "%s, masks:Mandatory device qualifier [%s]:Mandatory instance qualifier in range [0-%i]",
359 break;
360 }
361 case DEVICE_FLAG:
362 {
363 int i;
364 char devices[PAPI_MAX_STR_LEN] = { 0 };
365 for (i = 0; i < device_table_p->count; ++i) {
367 sprintf(devices + strlen(devices), "%i,", i);
368 }
369 }
370 *(devices + strlen(devices) - 1) = 0;
371 sprintf(info->symbol, "%s:device=%i", ntv_table_p->events[inf.nameid].name, inf.device);
372 sprintf(info->long_descr, "%s, masks:Mandatory device qualifier [%s]",
374 break;
375 }
376 case INSTAN_FLAG:
377 sprintf(info->symbol, "%s:instance=%i", ntv_table_p->events[inf.nameid].name, inf.instance);
378 sprintf(info->long_descr, "%s, masks:Mandatory instance qualifier in range [0-%i]",
380 break;
381 default:
382 papi_errno = PAPI_EINVAL;
383 }
384
385 return papi_errno;
386}
387
388/* rocp_ctx_open - open a profiling context for the requested events */
389int
390rocp_ctx_open(uint64_t *events_id, int num_events, rocp_ctx_t *rocp_ctx)
391{
393 return sampling_ctx_open(events_id, num_events, rocp_ctx);
394 }
395
396 return intercept_ctx_open(events_id, num_events, rocp_ctx);
397}
398
399/* rocp_ctx_close - close profiling context */
400int
401rocp_ctx_close(rocp_ctx_t rocp_ctx)
402{
404 return sampling_ctx_close(rocp_ctx);
405 }
406
407 return intercept_ctx_close(rocp_ctx);
408}
409
410/* rocp_ctx_start - start monitoring events associated to profiling context */
411int
412rocp_ctx_start(rocp_ctx_t rocp_ctx)
413{
415 return sampling_ctx_start(rocp_ctx);
416 }
417
418 return intercept_ctx_start(rocp_ctx);
419}
420
421/* rocp_ctx_stop - stop monitoring events associated to profiling context */
422int
423rocp_ctx_stop(rocp_ctx_t rocp_ctx)
424{
426 return sampling_ctx_stop(rocp_ctx);
427 }
428
429 return intercept_ctx_stop(rocp_ctx);
430}
431
432/* rocp_ctx_read - read counters for events associated to profiling context */
433int
434rocp_ctx_read(rocp_ctx_t rocp_ctx, long long **counts)
435{
437 return sampling_ctx_read(rocp_ctx, counts);
438 }
439
440 return intercept_ctx_read(rocp_ctx, counts);
441}
442
443/* rocp_ctx_reset - reset counters for events associated to profiling context */
444int
445rocp_ctx_reset(rocp_ctx_t rocp_ctx)
446{
448 return sampling_ctx_reset(rocp_ctx);
449 }
450
451 return intercept_ctx_reset(rocp_ctx);
452}
453
454/* rocp_shutdown - shutdown runtime and profiling, unload runtime and profiling symbols */
455int
457{
459 return sampling_shutdown();
460 }
461
462 return intercept_shutdown();
463}
464
469int
471{
472 int papi_errno = PAPI_OK;
473
474 char *pathname = getenv("HSA_TOOLS_LIB");
475 if (pathname == NULL) {
476 snprintf(error_string, PAPI_MAX_STR_LEN, "Can't load librocprofiler64.so, neither PAPI_ROCM_ROOT nor HSA_TOOLS_LIB are set.");
477 goto fn_fail;
478 }
479
480 rocp_dlp = dlopen(pathname, RTLD_NOW | RTLD_GLOBAL);
481 if (rocp_dlp == NULL) {
482 sprintf(error_string, "%s", dlerror());
483 goto fn_fail;
484 }
485
486 rocp_get_info_p = dlsym(rocp_dlp, "rocprofiler_get_info");
487 rocp_iterate_info_p = dlsym(rocp_dlp, "rocprofiler_iterate_info");
488 rocp_error_string_p = dlsym(rocp_dlp, "rocprofiler_error_string");
489 rocp_open_p = dlsym(rocp_dlp, "rocprofiler_open");
490 rocp_close_p = dlsym(rocp_dlp, "rocprofiler_close");
491 rocp_group_count_p = dlsym(rocp_dlp, "rocprofiler_group_count");
492 rocp_start_p = dlsym(rocp_dlp, "rocprofiler_start");
493 rocp_read_p = dlsym(rocp_dlp, "rocprofiler_read");
494 rocp_stop_p = dlsym(rocp_dlp, "rocprofiler_stop");
495 rocp_get_group_p = dlsym(rocp_dlp, "rocprofiler_get_group");
496 rocp_get_data_p = dlsym(rocp_dlp, "rocprofiler_get_data");
497 rocp_group_get_data_p = dlsym(rocp_dlp, "rocprofiler_group_get_data");
498 rocp_get_metrics_p = dlsym(rocp_dlp, "rocprofiler_get_metrics");
499 rocp_reset_p = dlsym(rocp_dlp, "rocprofiler_reset");
500 rocp_pool_open_p = dlsym(rocp_dlp, "rocprofiler_pool_open");
501 rocp_pool_close_p = dlsym(rocp_dlp, "rocprofiler_pool_close");
502 rocp_pool_fetch_p = dlsym(rocp_dlp, "rocprofiler_pool_fetch");
503 rocp_pool_flush_p = dlsym(rocp_dlp, "rocprofiler_pool_flush");
504 rocp_set_queue_cbs_p = dlsym(rocp_dlp, "rocprofiler_set_queue_callbacks");
505 rocp_start_queue_cbs_p = dlsym(rocp_dlp, "rocprofiler_start_queue_callbacks");
506 rocp_stop_queue_cbs_p = dlsym(rocp_dlp, "rocprofiler_stop_queue_callbacks");
507 rocp_remove_queue_cbs_p= dlsym(rocp_dlp, "rocprofiler_remove_queue_callbacks");
508
509 int rocp_not_initialized = (!rocp_get_info_p ||
512 !rocp_open_p ||
513 !rocp_close_p ||
515 !rocp_start_p ||
516 !rocp_read_p ||
517 !rocp_stop_p ||
522 !rocp_reset_p ||
531
532 papi_errno = (rocp_not_initialized) ? PAPI_EMISC : PAPI_OK;
533 if (papi_errno != PAPI_OK) {
534 snprintf(error_string, PAPI_MAX_STR_LEN, "Error while loading rocprofiler symbols.");
535 }
536
537 fn_exit:
538 return papi_errno;
539 fn_fail:
540 papi_errno = PAPI_ENOSUPP;
541 goto fn_exit;
542}
543
544int
546{
547 if (rocp_dlp == NULL) {
548 return PAPI_OK;
549 }
550
551 rocp_get_info_p = NULL;
552 rocp_iterate_info_p = NULL;
553 rocp_error_string_p = NULL;
554 rocp_open_p = NULL;
555 rocp_close_p = NULL;
556 rocp_group_count_p = NULL;
557 rocp_start_p = NULL;
558 rocp_read_p = NULL;
559 rocp_stop_p = NULL;
560 rocp_get_group_p = NULL;
561 rocp_get_data_p = NULL;
563 rocp_get_metrics_p = NULL;
564 rocp_reset_p = NULL;
565 rocp_pool_open_p = NULL;
566 rocp_pool_close_p = NULL;
567 rocp_pool_fetch_p = NULL;
568 rocp_pool_flush_p = NULL;
573
574 dlclose(rocp_dlp);
575
576 return PAPI_OK;
577}
578
579int
581{
582 static int rocp_env_initialized;
583
584 if (rocp_env_initialized) {
585 return PAPI_OK;
586 }
587
588 const char *rocp_mode = getenv("ROCP_HSA_INTERCEPT");
589 rocm_prof_mode = (rocp_mode != NULL) ?
590 atoi(rocp_mode) : ROCM_PROFILE_SAMPLING_MODE;
591
592 char pathname[PATH_MAX];
593 char *rocm_root = getenv("PAPI_ROCM_ROOT");
594 if (rocm_root == NULL) {
595 snprintf(error_string, PAPI_MAX_STR_LEN, "Can't set HSA_TOOLS_LIB. PAPI_ROCM_ROOT not set.");
596 return PAPI_EMISC;
597 }
598
599 int err;
600 int override_hsa_tools_lib = 1;
601 struct stat stat_info;
602 char *hsa_tools_lib = getenv("HSA_TOOLS_LIB");
603 if (hsa_tools_lib) {
604 err = stat(hsa_tools_lib, &stat_info);
605 if (err == 0 && S_ISREG(stat_info.st_mode)) {
606 override_hsa_tools_lib = 0;
607 }
608 }
609
610 if (override_hsa_tools_lib) {
611 /* Account for change of librocprofiler64.so file location in rocm-5.2.0
612 * directory structure */
613
614 /* prefer .so.1 as .so might not be available in 5.7 anymore, in 5.6 it
615 * was a linker script. */
616 const char *candidates[] = {
617 "lib/librocprofiler64.so.1",
618 "lib/librocprofiler64.so",
619 "rocprofiler/lib/libprofiler64.so.1",
620 "rocprofiler/lib/libprofiler64.so",
621 NULL
622 };
623 const char **candidate = candidates;
624 while (*candidate) {
625 sprintf(pathname, "%s/%s", rocm_root, *candidate);
626
627 err = stat(pathname, &stat_info);
628 if (err == 0) {
629 break;
630 }
631 ++candidate;
632 }
633 if (!*candidate) {
634 snprintf(error_string, PAPI_MAX_STR_LEN, "Rocprofiler librocprofiler64.so file not found.");
635 return PAPI_EMISC;
636 }
637
638 setenv("HSA_TOOLS_LIB", pathname, 1);
639 }
640
641 int override_rocp_metrics = 1;
642 char *rocp_metrics = getenv("ROCP_METRICS");
643 if (rocp_metrics) {
644 err = stat(rocp_metrics, &stat_info);
645 if (err == 0 && S_ISREG(stat_info.st_mode)) {
646 override_rocp_metrics = 0;
647 }
648 }
649
650 if (override_rocp_metrics) {
651 /* Account for change of metrics file location in rocm-5.2.0
652 * directory structure */
653 sprintf(pathname, "%s/lib/rocprofiler/metrics.xml", rocm_root);
654
655 err = stat(pathname, &stat_info);
656 if (err < 0) {
657 sprintf(pathname, "%s/rocprofiler/lib/metrics.xml", rocm_root);
658
659 err = stat(pathname, &stat_info);
660 if (err < 0) {
661 snprintf(error_string, PAPI_MAX_STR_LEN, "Rocprofiler metrics.xml file not found.");
662 return PAPI_EMISC;
663 }
664 }
665
666 setenv("ROCP_METRICS", pathname, 1);
667 }
668
669 rocp_env_initialized = 1;
670 return PAPI_OK;
671}
672
673static hsa_status_t count_ntv_events_cb(const rocprofiler_info_data_t, void *);
674static hsa_status_t get_ntv_events_cb(const rocprofiler_info_data_t, void *);
675
676struct ntv_arg {
677 int count;
679};
680
681int
683{
684 int papi_errno = PAPI_OK;
685 int i;
686
687 for (i = 0; i < device_table_p->count; ++i) {
688 hsa_status_t rocp_errno = rocp_iterate_info_p(&device_table_p->devices[i],
689 ROCPROFILER_INFO_KIND_METRIC,
692 if (rocp_errno != HSA_STATUS_SUCCESS) {
693 const char *error_string_p;
694 hsa_status_string_p(rocp_errno, &error_string_p);
695 snprintf(error_string, PAPI_MAX_STR_LEN, "%s", error_string_p);
696 papi_errno = PAPI_EMISC;
697 goto fn_fail;
698 }
699 }
700
702 if (ntv_table.events == NULL) {
703 papi_errno = PAPI_ENOMEM;
704 goto fn_fail;
705 }
706
707 struct ntv_arg arg;
708 arg.count = 0;
709
710 for (i = 0; i < device_table_p->count; ++i) {
711 arg.dev_id = i;
712 hsa_status_t rocp_errno = rocp_iterate_info_p(&device_table_p->devices[i],
713 ROCPROFILER_INFO_KIND_METRIC,
715 &arg);
716 if (rocp_errno != HSA_STATUS_SUCCESS) {
717 const char *error_string_p;
718 hsa_status_string_p(rocp_errno, &error_string_p);
719 snprintf(error_string, PAPI_MAX_STR_LEN, "%s", error_string_p);
720 papi_errno = PAPI_EMISC;
721 goto fn_fail;
722 }
723 }
724
726 if (ntv_table.events == NULL) {
727 papi_errno = PAPI_ENOMEM;
728 }
729
730 ntv_table.count = arg.count;
731
732 fn_exit:
733 return papi_errno;
734 fn_fail:
735 goto fn_exit;
736}
737
738int
739evt_code_to_name(uint64_t event_code, char *name, int len)
740{
741 int papi_errno;
742
743 event_info_t info;
744 papi_errno = evt_id_to_info(event_code, &info);
745 if (papi_errno != PAPI_OK) {
746 return papi_errno;
747 }
748
749 switch (info.flags) {
750 case (DEVICE_FLAG | INSTAN_FLAG):
751 snprintf(name, len, "%s:device=%i:instance=%i", ntv_table_p->events[info.nameid].name, info.device, info.instance);
752 break;
753 case (DEVICE_FLAG):
754 snprintf(name, len, "%s:device=%i", ntv_table_p->events[info.nameid].name, info.device);
755 break;
756 default:
757 snprintf(name, len, "%s", ntv_table_p->events[info.nameid].name);
758 }
759
760 return papi_errno;
761}
762
763int
764evt_id_create(event_info_t *info, uint64_t *event_id)
765{
766 *event_id = (uint64_t)(info->device << DEVICE_SHIFT);
767 *event_id |= (uint64_t)(info->instance << INSTAN_SHIFT);
768 *event_id |= (uint64_t)(info->flags << QLMASK_SHIFT);
769 *event_id |= (uint64_t)(info->nameid << NAMEID_SHIFT);
770 return PAPI_OK;
771}
772
773int
774evt_id_to_info(uint64_t event_id, event_info_t *info)
775{
776 info->device = (int)((event_id & DEVICE_MASK) >> DEVICE_SHIFT);
777 info->instance = (int)((event_id & INSTAN_MASK) >> INSTAN_SHIFT);
778 info->flags = (int)((event_id & QLMASK_MASK) >> QLMASK_SHIFT);
779 info->nameid = (int)((event_id & NAMEID_MASK) >> NAMEID_SHIFT);
780
781 if (info->device >= device_table_p->count) {
782 return PAPI_ENOEVNT;
783 }
784
785 if (0 == (info->flags & DEVICE_FLAG) && info->device > 0) {
786 return PAPI_ENOEVNT;
787 }
788
789 if (rocc_dev_check(ntv_table_p->events[info->nameid].device_map, info->device) == 0) {
790 return PAPI_ENOEVNT;
791 }
792
793 if (info->nameid >= ntv_table_p->count) {
794 return PAPI_ENOEVNT;
795 }
796
797 if (ntv_table_p->events[info->nameid].instances > 1 && 0 == (info->flags & INSTAN_FLAG) && info->instance > 0) {
798 return PAPI_ENOEVNT;
799 }
800
801 if (info->instance >= ntv_table_p->events[info->nameid].instances) {
802 return PAPI_ENOEVNT;
803 }
804
805 return PAPI_OK;
806}
807
808int
809evt_name_to_device(const char *name, int *device)
810{
811 char *p = strstr(name, ":device=");
812 if (!p) {
813 return PAPI_ENOEVNT;
814 }
815 *device = (int) strtol(p + strlen(":device="), NULL, 10);
816 return PAPI_OK;
817}
818
819int
820evt_name_to_instance(const char *name, int *instance)
821{
822 *instance = 0;
823
824 char basename[PAPI_MAX_STR_LEN] = { 0 };
825 int papi_errno = evt_name_to_basename(name, basename, PAPI_MAX_STR_LEN);
826 if (papi_errno != PAPI_OK) {
827 return papi_errno;
828 }
829
830 ntv_event_t *event;
831 if (htable_find(htable, basename, (void **) &event) != HTABLE_SUCCESS) {
832 return PAPI_ENOEVNT;
833 }
834
835 char *p = strstr(name, ":instance=");
836 if (event->instances > 1) {
837 if (!p) {
838 return PAPI_ENOEVNT;
839 }
840 *instance = (int) strtol(p + strlen(":instance="), NULL, 10);
841 } else {
842 if (p) {
843 return PAPI_ENOEVNT;
844 }
845 }
846
847 return PAPI_OK;
848}
849
850int
851evt_name_to_basename(const char *name, char *base, int len)
852{
853 char *p = strstr(name, ":");
854 if (p) {
855 if (len < (int)(p - name)) {
856 return PAPI_EBUF;
857 }
858 strncpy(base, name, (size_t)(p - name));
859 } else {
860 if (len < (int) strlen(name)) {
861 return PAPI_EBUF;
862 }
863 strncpy(base, name, (size_t) len);
864 }
865 return PAPI_OK;
866}
867
872hsa_status_t
873count_ntv_events_cb(const rocprofiler_info_data_t info __attribute__((unused)), void *count)
874{
875 (*(int *) count) += 1;
876 return HSA_STATUS_SUCCESS;
877}
878
879hsa_status_t
880get_ntv_events_cb(const rocprofiler_info_data_t info, void *ntv_arg)
881{
882 struct ntv_arg *arg = (struct ntv_arg *) ntv_arg;
883 int capacity = ntv_table.count;
884 int *count = &arg->count;
886
887 if (*count > capacity) {
888 snprintf(error_string, PAPI_MAX_STR_LEN, "Number of events exceeds detected count.");
889 return HSA_STATUS_ERROR;
890 }
891
892 ntv_event_t *event;
893 if (htable_find(htable, info.metric.name, (void **) &event) != HTABLE_SUCCESS) {
894 event = &events[(*count)++];
895 event->name = papi_strdup(info.metric.name);
896 event->descr = papi_strdup(info.metric.description);
897 event->instances = info.metric.instances;
898 htable_insert(htable, info.metric.name, event);
899 }
900
901 rocc_dev_set(&event->device_map, arg->dev_id);
902
903 return HSA_STATUS_SUCCESS;
904}
905
910static int init_features(uint64_t *, int, rocprofiler_feature_t *);
911static int finalize_features(rocprofiler_feature_t *, int);
912static int sampling_ctx_init(uint64_t *, int, rocp_ctx_t *);
913static int sampling_ctx_finalize(rocp_ctx_t *);
914static int ctx_open(rocp_ctx_t);
915static int ctx_close(rocp_ctx_t);
916static int ctx_init(uint64_t *, int, rocp_ctx_t *);
917static int ctx_finalize(rocp_ctx_t *);
918static int ctx_get_dev_feature_count(rocp_ctx_t, int);
919
920int
921sampling_ctx_open(uint64_t *events_id, int num_events, rocp_ctx_t *rocp_ctx)
922{
923 int papi_errno = PAPI_OK;
924
925 if (num_events <= 0) {
926 return PAPI_ENOEVNT;
927 }
928
930
931 papi_errno = ctx_init(events_id, num_events, rocp_ctx);
932 if (papi_errno != PAPI_OK) {
933 goto fn_fail;
934 }
935
936 papi_errno = ctx_open(*rocp_ctx);
937 if (papi_errno != PAPI_OK) {
938 goto fn_fail;
939 }
940
941 (*rocp_ctx)->u.sampling.state |= ROCM_EVENTS_OPENED;
942
943 fn_exit:
945 return papi_errno;
946 fn_fail:
947 ctx_finalize(rocp_ctx);
948 goto fn_exit;
949}
950
951int
952sampling_ctx_close(rocp_ctx_t rocp_ctx)
953{
954 int papi_errno = PAPI_OK;
955
957
958 papi_errno = ctx_close(rocp_ctx);
959 if (papi_errno != PAPI_OK) {
960 goto fn_fail;
961 }
962
963 ctx_finalize(&rocp_ctx);
964
965 fn_exit:
967 return papi_errno;
968 fn_fail:
969 goto fn_exit;
970}
971
972int
973sampling_ctx_start(rocp_ctx_t rocp_ctx)
974{
975 if (!(rocp_ctx->u.sampling.state & ROCM_EVENTS_OPENED)) {
976 SUBDBG("[ROCP sampling mode] Cannot start eventset, not opened.");
977 return PAPI_EINVAL;
978 }
979
980 if (rocp_ctx->u.sampling.state & ROCM_EVENTS_RUNNING) {
981 SUBDBG("[ROCP sampling mode] Cannot start eventset, already running.");
982 return PAPI_EINVAL;
983 }
984
985 int devs_count;
986 rocc_dev_get_count(rocp_ctx->u.sampling.device_map, &devs_count);
987
988 int i;
989 for (i = 0; i < devs_count; ++i) {
990 hsa_status_t rocp_errno = rocp_start_p(rocp_ctx->u.sampling.contexts[i], 0);
991 if (rocp_errno != HSA_STATUS_SUCCESS) {
992 return PAPI_EMISC;
993 }
994 }
995
996 rocp_ctx->u.sampling.state |= ROCM_EVENTS_RUNNING;
997 return PAPI_OK;
998}
999
1000int
1001sampling_ctx_stop(rocp_ctx_t rocp_ctx)
1002{
1003 if (!(rocp_ctx->u.sampling.state & ROCM_EVENTS_OPENED)) {
1004 SUBDBG("[ROCP sampling mode] Cannot stop eventset, not opened.");
1005 return PAPI_EINVAL;
1006 }
1007
1008 if (!(rocp_ctx->u.sampling.state & ROCM_EVENTS_RUNNING)) {
1009 SUBDBG("[ROCP sampling mode] Cannot stop eventset, not running.");
1010 return PAPI_EINVAL;
1011 }
1012
1013 int devs_count;
1014 rocc_dev_get_count(rocp_ctx->u.sampling.device_map, &devs_count);
1015
1016 int i;
1017 for (i = 0; i < devs_count; ++i) {
1018 hsa_status_t rocp_errno = rocp_stop_p(rocp_ctx->u.sampling.contexts[i], 0);
1019 if (rocp_errno != HSA_STATUS_SUCCESS) {
1020 return PAPI_EMISC;
1021 }
1022 }
1023
1024 rocp_ctx->u.sampling.state &= ~ROCM_EVENTS_RUNNING;
1025 return PAPI_OK;
1026}
1027
1028int
1029sampling_ctx_read(rocp_ctx_t rocp_ctx, long long **counts)
1030{
1031 int i, j, k = 0;
1032 int dev_feature_offset = 0;
1033 int dev_id, dev_count;
1034 rocprofiler_feature_t *features = rocp_ctx->u.sampling.features;
1035
1036 rocc_dev_get_count(rocp_ctx->u.sampling.device_map, &dev_count);
1037
1038 for (i = 0; i < dev_count; ++i) {
1039 hsa_status_t rocp_errno = rocp_read_p(rocp_ctx->u.sampling.contexts[i], 0);
1040 if (rocp_errno != HSA_STATUS_SUCCESS) {
1041 return PAPI_EMISC;
1042 }
1043
1044 rocp_errno = rocp_get_data_p(rocp_ctx->u.sampling.contexts[i], 0);
1045 if (rocp_errno != HSA_STATUS_SUCCESS) {
1046 return PAPI_EMISC;
1047 }
1048
1049 rocp_errno = rocp_get_metrics_p(rocp_ctx->u.sampling.contexts[i]);
1050 if (rocp_errno != HSA_STATUS_SUCCESS) {
1051 return PAPI_EMISC;
1052 }
1053
1054 int papi_errno = rocc_dev_get_id(rocp_ctx->u.sampling.device_map, i, &dev_id);
1055 if (papi_errno != PAPI_OK) {
1056 return papi_errno;
1057 }
1058
1059 int dev_feature_count = ctx_get_dev_feature_count(rocp_ctx, dev_id);
1060 rocprofiler_feature_t *dev_features = features + dev_feature_offset;
1061 long long *counters = rocp_ctx->u.sampling.counters;
1062
1063 for (j = 0; j < dev_feature_count; ++j) {
1064 switch(dev_features[j].data.kind) {
1065 case ROCPROFILER_DATA_KIND_INT32:
1066 counters[k++] = (long long) dev_features[j].data.result_int32;
1067 break;
1068 case ROCPROFILER_DATA_KIND_INT64:
1069 counters[k++] = dev_features[j].data.result_int64;
1070 break;
1071 case ROCPROFILER_DATA_KIND_FLOAT:
1072 counters[k++] = (long long) dev_features[j].data.result_float;
1073 break;
1074 case ROCPROFILER_DATA_KIND_DOUBLE:
1075 counters[k++] = (long long) dev_features[j].data.result_double;
1076 break;
1077 default:
1078 return PAPI_EINVAL;
1079 }
1080 }
1081 dev_feature_offset += dev_feature_count;
1082 }
1083 *counts = rocp_ctx->u.sampling.counters;
1084
1085 return PAPI_OK;
1086}
1087
1088int
1089sampling_ctx_reset(rocp_ctx_t rocp_ctx)
1090{
1091 int i, devs_count;
1092 rocc_dev_get_count(rocp_ctx->u.sampling.device_map, &devs_count);
1093
1094 for (i = 0; i < devs_count; ++i) {
1095 hsa_status_t rocp_errno = rocp_reset_p(rocp_ctx->u.sampling.contexts[i], 0);
1096 if (rocp_errno != HSA_STATUS_SUCCESS) {
1097 return PAPI_EMISC;
1098 }
1099 }
1100 for (i = 0; i < rocp_ctx->u.sampling.feature_count; ++i) {
1101 rocp_ctx->u.sampling.counters[i] = 0;
1102 }
1103 return PAPI_OK;
1104}
1105
1110static int shutdown_event_table(void);
1111
1112int
1114{
1117
1119
1120 return PAPI_OK;
1121}
1122
1123int
1125{
1126 int i;
1127
1128 for (i = 0; i < ntv_table_p->count; ++i) {
1131 }
1132
1133 ntv_table_p->count = 0;
1134
1136
1137 return PAPI_OK;
1138}
1139
1144static int
1145event_id_to_dev_id_cb(uint64_t event_id, int *device)
1146{
1147 event_info_t info;
1148 int papi_errno = evt_id_to_info(event_id, &info);
1149 *device = info.device;
1150 return papi_errno;
1151}
1152
1153int
1154sampling_ctx_init(uint64_t *events_id, int num_events, rocp_ctx_t *rocp_ctx)
1155{
1156 int papi_errno = PAPI_OK;
1157 int num_devs;
1158 rocprofiler_feature_t *features = NULL;
1159 rocprofiler_t **contexts = NULL;
1160 rocprofiler_properties_t *ctx_prop = NULL;
1161 long long *counters = NULL;
1162 *rocp_ctx = NULL;
1163
1164 rocc_bitmap_t bitmap;
1166 if (papi_errno != PAPI_OK) {
1167 return papi_errno;
1168 }
1169
1170 papi_errno = rocc_dev_get_count(bitmap, &num_devs);
1171 if (papi_errno != PAPI_OK) {
1172 goto fn_fail;
1173 }
1174
1175 contexts = papi_calloc(num_devs, sizeof(*contexts));
1176 if (contexts == NULL) {
1177 papi_errno = PAPI_ENOMEM;
1178 goto fn_fail;
1179 }
1180
1181 ctx_prop = papi_calloc(num_devs, sizeof(*ctx_prop));
1182 if (ctx_prop == NULL) {
1183 papi_errno = PAPI_ENOMEM;
1184 goto fn_fail;
1185 }
1186
1188 if (features == NULL) {
1189 papi_errno = PAPI_ENOMEM;
1190 goto fn_fail;
1191 }
1192
1193 counters = papi_malloc(num_events * sizeof(*counters));
1194 if (counters == NULL) {
1195 papi_errno = PAPI_ENOMEM;
1196 goto fn_fail;
1197 }
1198
1200 if (papi_errno != PAPI_OK) {
1201 goto fn_fail;
1202 }
1203
1204 *rocp_ctx = papi_calloc(1, sizeof(**rocp_ctx));
1205 if (*rocp_ctx == NULL) {
1206 papi_errno = PAPI_ENOMEM;
1207 goto fn_fail;
1208 }
1209
1210 (*rocp_ctx)->u.sampling.events_id = events_id;
1211 (*rocp_ctx)->u.sampling.features = features;
1212 (*rocp_ctx)->u.sampling.feature_count = num_events;
1213 (*rocp_ctx)->u.sampling.contexts = contexts;
1214 (*rocp_ctx)->u.sampling.counters = counters;
1215 (*rocp_ctx)->u.sampling.device_map = bitmap;
1216 (*rocp_ctx)->u.sampling.ctx_prop = ctx_prop;
1217
1218 fn_exit:
1219 return papi_errno;
1220 fn_fail:
1221 if (contexts) {
1222 papi_free(contexts);
1223 }
1224 if (features) {
1226 }
1227 if (counters) {
1228 papi_free(counters);
1229 }
1230 if (*rocp_ctx) {
1231 papi_free(*rocp_ctx);
1232 }
1233 *rocp_ctx = NULL;
1234 goto fn_exit;
1235}
1236
1237int
1238sampling_ctx_finalize(rocp_ctx_t *rocp_ctx)
1239{
1240 if (*rocp_ctx == NULL) {
1241 return PAPI_OK;
1242 }
1243
1244 if ((*rocp_ctx)->u.sampling.features) {
1245 finalize_features((*rocp_ctx)->u.sampling.features, (*rocp_ctx)->u.sampling.feature_count);
1246 papi_free((*rocp_ctx)->u.sampling.features);
1247 }
1248
1249 if ((*rocp_ctx)->u.sampling.contexts) {
1250 papi_free((*rocp_ctx)->u.sampling.contexts);
1251 }
1252
1253 if ((*rocp_ctx)->u.sampling.ctx_prop) {
1254 papi_free((*rocp_ctx)->u.sampling.ctx_prop);
1255 }
1256
1257 if ((*rocp_ctx)->u.sampling.counters) {
1258 papi_free((*rocp_ctx)->u.sampling.counters);
1259 }
1260
1261 papi_free(*rocp_ctx);
1262 *rocp_ctx = NULL;
1263
1264 return PAPI_OK;
1265}
1266
1267int
1268ctx_open(rocp_ctx_t rocp_ctx)
1269{
1270 int papi_errno = PAPI_OK;
1271 int i, j;
1272 rocprofiler_feature_t *features = rocp_ctx->u.sampling.features;
1273 int dev_feature_offset = 0;
1274 int dev_count;
1275 rocprofiler_t **contexts = rocp_ctx->u.sampling.contexts;
1276 rocprofiler_properties_t *ctx_prop = rocp_ctx->u.sampling.ctx_prop;
1277
1278 papi_errno = rocc_dev_get_count(rocp_ctx->u.sampling.device_map, &dev_count);
1279 if (papi_errno != PAPI_OK) {
1280 return papi_errno;
1281 }
1282
1283 for (i = 0; i < dev_count; ++i) {
1284 int dev_id;
1285 papi_errno = rocc_dev_get_id(rocp_ctx->u.sampling.device_map, i, &dev_id);
1286 if (papi_errno != PAPI_OK) {
1287 goto fn_fail;
1288 }
1289
1290 int dev_feature_count = ctx_get_dev_feature_count(rocp_ctx, dev_id);
1291 rocprofiler_feature_t *dev_features = features + dev_feature_offset;
1292
1293 const uint32_t mode =
1294 ROCPROFILER_MODE_STANDALONE | ROCPROFILER_MODE_CREATEQUEUE | ROCPROFILER_MODE_SINGLEGROUP;
1295
1296 ctx_prop[i].queue_depth = 128;
1297 hsa_status_t rocp_errno = rocp_open_p(device_table_p->devices[dev_id], dev_features,
1298 dev_feature_count, &contexts[i], mode,
1299 &ctx_prop[i]);
1300 if (rocp_errno != HSA_STATUS_SUCCESS) {
1301 papi_errno = PAPI_EMISC;
1302 goto fn_fail;
1303 }
1304
1305 dev_feature_offset += dev_feature_count;
1306 }
1307
1308 papi_errno = rocc_dev_acquire(rocp_ctx->u.sampling.device_map);
1309 if (papi_errno != PAPI_OK) {
1310 goto fn_fail;
1311 }
1312
1313 fn_exit:
1314 return papi_errno;
1315 fn_fail:
1316 for (j = 0; j < i; ++j) {
1317 rocp_close_p(contexts[j]);
1318 hsa_queue_destroy_p(ctx_prop[j].queue);
1319 }
1320 goto fn_exit;
1321}
1322
1323int
1324ctx_close(rocp_ctx_t rocp_ctx)
1325{
1326 int papi_errno;
1327 int i, devs_count;
1328 rocc_dev_get_count(rocp_ctx->u.sampling.device_map, &devs_count);
1329
1330 for (i = 0; i < devs_count; ++i) {
1331 if (rocp_close_p(rocp_ctx->u.sampling.contexts[i]) != HSA_STATUS_SUCCESS) {
1332 papi_errno = PAPI_EMISC;
1333 }
1334
1335 if (hsa_queue_destroy_p(rocp_ctx->u.sampling.ctx_prop[i].queue) != HSA_STATUS_SUCCESS) {
1336 papi_errno = PAPI_EMISC;
1337 }
1338 }
1339
1340 papi_errno = rocc_dev_release(rocp_ctx->u.sampling.device_map);
1341
1342 return papi_errno;
1343}
1344
1345int
1346init_features(uint64_t *events_id, int num_events, rocprofiler_feature_t *features)
1347{
1348 int papi_errno = PAPI_OK;
1349
1350 int i;
1351 for (i = 0; i < num_events; ++i) {
1352 char name[PAPI_MAX_STR_LEN] = { 0 };
1353 event_info_t info;
1354 papi_errno = evt_id_to_info(events_id[i], &info);
1355 if (papi_errno != PAPI_OK) {
1356 break;
1357 }
1358 if (ntv_table_p->events[info.nameid].instances > 1) {
1359 sprintf(name, "%s[%i]", ntv_table_p->events[info.nameid].name, info.instance);
1360 } else {
1361 strcpy(name, ntv_table_p->events[info.nameid].name);
1362 }
1363 features[i].kind = (rocprofiler_feature_kind_t) ROCPROFILER_INFO_KIND_METRIC;
1364 features[i].name = papi_strdup(name);
1365 }
1366
1367 return papi_errno;
1368}
1369
1370int
1371finalize_features(rocprofiler_feature_t *features, int feature_count)
1372{
1373 int i;
1374 for (i = 0; i < feature_count; ++i) {
1375 papi_free((char *) features[i].name);
1376 }
1377 return PAPI_OK;
1378}
1379
1380static int sampling_ctx_get_dev_feature_count(rocp_ctx_t, int);
1381static int intercept_ctx_get_dev_feature_count(rocp_ctx_t, int);
1382
1383int
1384ctx_get_dev_feature_count(rocp_ctx_t rocp_ctx, int i)
1385{
1387 return sampling_ctx_get_dev_feature_count(rocp_ctx, i);
1388 }
1389
1390 return intercept_ctx_get_dev_feature_count(rocp_ctx, i);
1391}
1392
1393int
1394sampling_ctx_get_dev_feature_count(rocp_ctx_t rocp_ctx, int i)
1395{
1396 int start, stop, j = 0;
1397 int num_events = rocp_ctx->u.sampling.feature_count;
1398 uint64_t *events_id = rocp_ctx->u.sampling.events_id;
1399
1400 while (j < num_events && (events_id[j] & DEVICE_MASK) >> DEVICE_SHIFT != (uint64_t) i) {
1401 ++j;
1402 }
1403
1404 start = j;
1405
1406 while (j < num_events && (events_id[j] & DEVICE_MASK) >> DEVICE_SHIFT == (uint64_t) i) {
1407 ++j;
1408 }
1409
1410 stop = j;
1411
1412 return stop - start;
1413}
1414
1415int
1417{
1418 int start, stop, j = 0;
1419 int num_events = rocp_ctx->u.intercept.feature_count;
1420 uint64_t *events_id = rocp_ctx->u.intercept.events_id;
1421
1422 while (j < num_events && (events_id[j] & DEVICE_MASK) >> DEVICE_SHIFT != (uint64_t) i) {
1423 ++j;
1424 }
1425
1426 start = j;
1427
1428 while (j < num_events && (events_id[j] & DEVICE_MASK) >> DEVICE_SHIFT == (uint64_t) i) {
1429 ++j;
1430 }
1431
1432 stop = j;
1433
1434 return stop - start;
1435}
1436
1441typedef struct cb_context_node {
1442 unsigned long tid;
1443 long long *counters;
1444 struct cb_context_node *next;
1446
1447static struct {
1448 uint64_t *events_id;
1449 rocprofiler_feature_t *features;
1454
1455static int verify_events(uint64_t *, int);
1456static int init_callbacks(rocprofiler_feature_t *, int);
1457static int register_dispatch_counter(unsigned long, int *);
1458static int increment_and_fetch_dispatch_counter(unsigned long);
1459static int decrement_and_fetch_dispatch_counter(unsigned long);
1460static int unregister_dispatch_counter(unsigned long);
1461static int fetch_dispatch_counter(unsigned long);
1464static int get_context_node(int, cb_context_node_t **);
1465static int get_context_counters(int, cb_context_node_t *, rocp_ctx_t);
1466static void put_context_counters(rocprofiler_feature_t *, int, cb_context_node_t *);
1467static void put_context_node(int, cb_context_node_t *);
1468static int intercept_ctx_init(uint64_t *, int, rocp_ctx_t *);
1469static int intercept_ctx_finalize(rocp_ctx_t *);
1470
1471int
1472intercept_ctx_open(uint64_t *events_id, int num_events, rocp_ctx_t *rocp_ctx)
1473{
1474 int papi_errno = PAPI_OK;
1475
1476 if (num_events <= 0) {
1477 return PAPI_ENOEVNT;
1478 }
1479
1481
1482 papi_errno = verify_events(events_id, num_events);
1483 if (papi_errno != PAPI_OK) {
1484 SUBDBG("[ROCP intercept mode] Can only monitor one set of events "
1485 "per application run.");
1486 goto fn_fail;
1487 }
1488
1489 papi_errno = ctx_init(events_id, num_events, rocp_ctx);
1490 if (papi_errno != PAPI_OK) {
1491 goto fn_fail;
1492 }
1493
1494 unsigned long tid;
1495 rocc_thread_get_id(&tid);
1496 papi_errno = register_dispatch_counter(tid, &(*rocp_ctx)->u.intercept.dispatch_count);
1497 if (papi_errno != PAPI_OK) {
1498 goto fn_fail;
1499 }
1500
1501 (*rocp_ctx)->u.intercept.state |= ROCM_EVENTS_OPENED;
1502
1503 fn_exit:
1505 return papi_errno;
1506 fn_fail:
1507 ctx_finalize(rocp_ctx);
1508 goto fn_exit;
1509}
1510
1511int
1512intercept_ctx_close(rocp_ctx_t rocp_ctx)
1513{
1514 int papi_errno = PAPI_OK;
1515
1517
1518 if (intercept_global_state.active_thread_count == 0) {
1519 goto fn_exit;
1520 }
1521
1522 unsigned long tid;
1523 rocc_thread_get_id(&tid);
1524 papi_errno = unregister_dispatch_counter(tid);
1525 if (papi_errno != PAPI_OK) {
1526 goto fn_exit;
1527 }
1528
1529 ctx_finalize(&rocp_ctx);
1530
1531 fn_exit:
1533 return papi_errno;
1534}
1535
1536int
1537intercept_ctx_start(rocp_ctx_t rocp_ctx)
1538{
1539 int papi_errno = PAPI_OK;
1540
1542
1543 if (!(rocp_ctx->u.sampling.state & ROCM_EVENTS_OPENED)) {
1544 SUBDBG("[ROCP intercept mode] Cannot start eventset, not opened.");
1545 papi_errno = PAPI_EINVAL;
1546 goto fn_fail;
1547 }
1548
1549 if (rocp_ctx->u.intercept.state & ROCM_EVENTS_RUNNING) {
1550 SUBDBG("[ROCP intercept mode] Cannot start eventset, already running.");
1551 papi_errno = PAPI_EINVAL;
1552 goto fn_fail;
1553 }
1554
1555 if (intercept_global_state.kernel_count++ == 0) {
1556 if (rocp_start_queue_cbs_p() != HSA_STATUS_SUCCESS) {
1557 papi_errno = PAPI_EMISC;
1558 goto fn_fail;
1559 }
1560 }
1561
1562 rocp_ctx->u.intercept.state |= ROCM_EVENTS_RUNNING;
1563
1564 fn_exit:
1566 return papi_errno;
1567 fn_fail:
1568 goto fn_exit;
1569}
1570
1571int
1572intercept_ctx_stop(rocp_ctx_t rocp_ctx)
1573{
1574 int papi_errno = PAPI_OK;
1575
1577
1578 if (!(rocp_ctx->u.sampling.state & ROCM_EVENTS_OPENED)) {
1579 SUBDBG("[ROCP intercept mode] Cannot stop eventset, not opened.");
1580 papi_errno = PAPI_EINVAL;
1581 goto fn_fail;
1582 }
1583
1584 if (!(rocp_ctx->u.intercept.state & ROCM_EVENTS_RUNNING)) {
1585 SUBDBG("[ROCP intercept mode] Cannot stop eventset, not running.");
1586 papi_errno = PAPI_EINVAL;
1587 goto fn_fail;
1588 }
1589
1590 if (--intercept_global_state.kernel_count == 0) {
1591 if (rocp_stop_queue_cbs_p() != HSA_STATUS_SUCCESS) {
1592 papi_errno = PAPI_EMISC;
1593 goto fn_fail;
1594 }
1595 }
1596
1597 rocp_ctx->u.intercept.state &= ~ROCM_EVENTS_RUNNING;
1598
1599 fn_exit:
1601 return papi_errno;
1602 fn_fail:
1603 goto fn_exit;
1604}
1605
1606int
1607intercept_ctx_read(rocp_ctx_t rocp_ctx, long long **counts)
1608{
1609 int papi_errno = PAPI_OK;
1610
1612
1613 unsigned long tid;
1614 rocc_thread_get_id(&tid);
1615 int dispatch_count = fetch_dispatch_counter(tid);
1616 if (dispatch_count == 0) {
1617 *counts = rocp_ctx->u.intercept.counters;
1618 goto fn_exit;
1619 }
1620
1621 cb_context_node_t *n = NULL;
1622
1623 int devs_count;
1624 papi_errno = rocc_dev_get_count(rocp_ctx->u.intercept.device_map, &devs_count);
1625 if (papi_errno != PAPI_OK) {
1626 goto fn_exit;
1627 }
1628
1629 int i;
1630 for (i = 0; i < devs_count; ++i) {
1631 int dev_id;
1632 papi_errno = rocc_dev_get_id(rocp_ctx->u.intercept.device_map, i, &dev_id);
1633 if (papi_errno != PAPI_OK) {
1634 goto fn_exit;
1635 }
1636
1637 while (dispatch_count > 0) {
1638 get_context_node(dev_id, &n);
1639 if (n == NULL) {
1640 break;
1641 }
1642
1643 get_context_counters(dev_id, n, rocp_ctx);
1644 dispatch_count = decrement_and_fetch_dispatch_counter(tid);
1646 }
1647 }
1648
1649 if (dispatch_count > 0) {
1650 SUBDBG("[ROCP intercept mode] User monitoring GPU i but running on j.");
1651 }
1652
1653 *counts = rocp_ctx->u.intercept.counters;
1654
1655 fn_exit:
1657 return papi_errno;
1658}
1659
1660int
1661intercept_ctx_reset(rocp_ctx_t rocp_ctx)
1662{
1663 int i;
1664
1665 for (i = 0; i < rocp_ctx->u.intercept.feature_count; ++i) {
1666 rocp_ctx->u.intercept.counters[i] = 0;
1667 }
1668
1669 return PAPI_OK;
1670}
1671
1681typedef struct {
1682 rocprofiler_pool_t *pools[PAPI_ROCM_MAX_DEV_COUNT];
1684
1686
1687int
1689{
1690 /* calling rocprofiler_pool_close() here would cause
1691 * a double free runtime error. */
1692
1696
1697 (*hsa_shut_down_p)();
1698
1700
1701 if (intercept_global_state.features) {
1704 }
1705
1706 if (intercept_global_state.events_id) {
1708 }
1709
1710 return PAPI_OK;
1711}
1712
1717int
1719{
1720 int papi_errno = PAPI_OK;
1721 int i;
1722 char name[PAPI_MAX_STR_LEN] = { 0 };
1723
1724 if (intercept_global_state.events_id == NULL) {
1725 return papi_errno;
1726 }
1727
1728 for (i = 0; i < num_events; ++i) {
1729 event_info_t info;
1730 papi_errno = evt_id_to_info(events_id[i], &info);
1731 if (papi_errno != PAPI_OK) {
1732 break;
1733 }
1734 if (ntv_table_p->events[info.nameid].instances > 1) {
1735 sprintf(name, "%s[%i]", ntv_table_p->events[info.nameid].name, info.instance);
1736 } else {
1737 sprintf(name, "%s", ntv_table_p->events[info.nameid].name);
1738 }
1739 void *p;
1741 papi_errno = PAPI_ECNFLCT;
1742 break;
1743 }
1744 }
1745
1746 return papi_errno;
1747}
1748
1749static int count_unique_events(uint64_t *events_id, int num_events, int *num_unique);
1750static int copy_unique_events(uint64_t *target, uint64_t *source, int source_len);
1751static int save_callback_features(rocprofiler_feature_t *features, int feature_count);
1752static int cleanup_callback_features(rocprofiler_feature_t *features, int feature_count);
1753
1754int
1755intercept_ctx_init(uint64_t *events_id, int num_events, rocp_ctx_t *rocp_ctx)
1756{
1757 int papi_errno = PAPI_OK;
1758 long long *counters = NULL;
1759 *rocp_ctx = NULL;
1760
1761 rocc_bitmap_t bitmap;
1763 if (papi_errno != PAPI_OK) {
1764 return papi_errno;
1765 }
1766
1767 if (intercept_global_state.events_id == NULL) {
1768 int num_unique_events = 0;
1769 count_unique_events(events_id, num_events, &num_unique_events);
1770 intercept_global_state.events_id = papi_calloc(num_unique_events, sizeof(uint64_t));
1771 if (intercept_global_state.events_id == NULL) {
1772 papi_errno = PAPI_ENOMEM;
1773 goto fn_fail_undo;
1774 }
1776
1777 intercept_global_state.features = papi_calloc(num_unique_events, sizeof(*intercept_global_state.features));
1778 if (intercept_global_state.features == NULL) {
1779 papi_errno = PAPI_ENOMEM;
1780 goto fn_fail_undo;
1781 }
1782
1783 papi_errno = init_features(intercept_global_state.events_id, num_unique_events, intercept_global_state.features);
1784 if (papi_errno != PAPI_OK) {
1785 goto fn_fail_undo;
1786 }
1787
1788 intercept_global_state.feature_count = num_unique_events;
1789
1790 papi_errno = save_callback_features(intercept_global_state.features, intercept_global_state.feature_count);
1791 if (papi_errno != PAPI_OK) {
1792 goto fn_fail_undo;
1793 }
1794
1795 papi_errno = init_callbacks(intercept_global_state.features, intercept_global_state.feature_count);
1796 if (papi_errno != PAPI_OK) {
1797 goto fn_fail;
1798 }
1799 }
1800
1801 counters = papi_calloc(num_events, sizeof(*counters));
1802 if (counters == NULL) {
1803 papi_errno = PAPI_ENOMEM;
1804 goto fn_fail;
1805 }
1806
1807 *rocp_ctx = papi_calloc(1, sizeof(**rocp_ctx));
1808 if (*rocp_ctx == NULL) {
1809 return PAPI_ENOMEM;
1810 }
1811
1812 (*rocp_ctx)->u.intercept.events_id = events_id;
1813 (*rocp_ctx)->u.intercept.counters = counters;
1814 (*rocp_ctx)->u.intercept.dispatch_count = 0;
1815 (*rocp_ctx)->u.intercept.device_map = bitmap;
1816 (*rocp_ctx)->u.intercept.feature_count = num_events;
1817
1818 fn_exit:
1819 return papi_errno;
1820 fn_fail:
1821 if (counters) {
1822 papi_free(counters);
1823 }
1824 if (*rocp_ctx) {
1825 papi_free(*rocp_ctx);
1826 }
1827 *rocp_ctx = NULL;
1828 goto fn_exit;
1829 fn_fail_undo:
1831 if (intercept_global_state.events_id) {
1833 }
1834 if (intercept_global_state.features) {
1836 }
1837 goto fn_exit;
1838}
1839
1840int
1841count_unique_events(uint64_t *events_id, int num_events, int *num_unique)
1842{
1843 int papi_errno = PAPI_OK;
1844 char name[PAPI_MAX_STR_LEN] = { 0 };
1845 int i;
1846 int count = 0;
1847 void *count_table, *p;
1848
1849 htable_init(&count_table);
1850
1851 for (i = 0; i < num_events; ++i) {
1852 event_info_t info;
1853 papi_errno = evt_id_to_info(events_id[i], &info);
1854 if (papi_errno != PAPI_OK) {
1855 return papi_errno;
1856 }
1857 if (ntv_table_p->events[info.nameid].instances > 1) {
1858 sprintf(name, "%s[%i]", ntv_table_p->events[info.nameid].name, info.instance);
1859 } else {
1860 sprintf(name, "%s", ntv_table_p->events[info.nameid].name);
1861 }
1862 if (htable_find(count_table, name, &p) != HTABLE_SUCCESS) {
1863 htable_insert(count_table, name, NULL);
1864 ++count;
1865 }
1866 }
1867
1868 *num_unique = count;
1869
1870 htable_shutdown(count_table);
1871 return papi_errno;
1872}
1873
1874int
1875copy_unique_events(uint64_t *target, uint64_t *source, int source_len)
1876{
1877 int papi_errno = PAPI_OK;
1878 char name[PAPI_MAX_STR_LEN] = { 0 };
1879 int i, j;
1880 void *count_table, *p;
1881
1882 htable_init(&count_table);
1883
1884 for (i = 0, j = 0; i < source_len; ++i) {
1885 event_info_t info;
1886 papi_errno = evt_id_to_info(source[i], &info);
1887 if (papi_errno) {
1888 return papi_errno;
1889 }
1890 if (ntv_table_p->events[info.nameid].instances > 1) {
1891 sprintf(name, "%s[%i]", ntv_table_p->events[info.nameid].name, info.instance);
1892 } else {
1893 sprintf(name, "%s", ntv_table_p->events[info.nameid].name);
1894 }
1895 if (htable_find(count_table, name, &p) != HTABLE_SUCCESS) {
1896 htable_insert(count_table, name, NULL);
1897 target[j++] = source[i];
1898 }
1899 }
1900
1901 htable_shutdown(count_table);
1902 return papi_errno;
1903}
1904
1905int
1907{
1908 int i;
1909 for (i = 0; i < feature_count; ++i) {
1911 }
1912 return PAPI_OK;
1913}
1914
1915int
1917{
1918 int i;
1919 for (i = 0; i < feature_count; ++i) {
1921 }
1922 return PAPI_OK;
1923}
1924
1925int
1926intercept_ctx_finalize(rocp_ctx_t *rocp_ctx)
1927{
1928 if (*rocp_ctx == NULL) {
1929 return PAPI_OK;
1930 }
1931
1932 if ((*rocp_ctx)->u.intercept.counters) {
1933 papi_free((*rocp_ctx)->u.intercept.counters);
1934 }
1935
1936 papi_free(*rocp_ctx);
1937 *rocp_ctx = NULL;
1938
1939 return PAPI_OK;
1940}
1941
1946int
1948 rocp_ctx_t *rocp_ctx)
1949{
1951 return sampling_ctx_init(events_id, num_events, rocp_ctx);
1952 }
1953
1954 return intercept_ctx_init(events_id, num_events, rocp_ctx);
1955}
1956
1957int
1958ctx_finalize(rocp_ctx_t *rocp_ctx)
1959{
1961 return sampling_ctx_finalize(rocp_ctx);
1962 }
1963
1964 return intercept_ctx_finalize(rocp_ctx);
1965}
1966
1972typedef struct {
1973 rocprofiler_feature_t *features;
1976
1981typedef struct {
1983 unsigned long tid;
1984 hsa_agent_t agent;
1985 rocprofiler_group_t group;
1986 rocprofiler_callback_data_t data;
1988
1989static bool context_handler_cb(const rocprofiler_pool_entry_t *, void *);
1990static hsa_status_t dispatch_cb(const rocprofiler_callback_data_t *, void *, rocprofiler_group_t *);
1991
1992int
1993init_callbacks(rocprofiler_feature_t *features, int feature_count)
1994{
1995 int papi_errno = PAPI_OK;
1996
1997 cb_context_arg_t *context_arg = papi_calloc(1, sizeof(cb_context_arg_t));
1998 if (context_arg == NULL) {
1999 papi_errno = PAPI_ENOMEM;
2000 goto fn_fail;
2001 }
2002
2003 context_arg->features = features;
2004 context_arg->feature_count = feature_count;
2005
2006 rocprofiler_pool_properties_t properties;
2007 properties.num_entries = 128;
2008 properties.payload_bytes = sizeof(cb_context_payload_t);
2009 properties.handler = context_handler_cb;
2010 properties.handler_arg = context_arg;
2011
2012 /* FIXME: the intercept code initializes callbacks for every device
2013 * regardless what the user asked for. Moreover, every device
2014 * is initialized with the same callback events (features).
2015 * The intercept code should eventually be changed to allow
2016 * user to initialize different callbacks on different devices
2017 * and also to reinitialize already initialized callbacks on
2018 * any given device. Rocm 5.3.0 still does not support this
2019 * callback initialization mechanism.
2020 */
2021 int i;
2022 for (i = 0; i < device_table_p->count; ++i) {
2023 hsa_agent_t agent = device_table_p->devices[i];
2024
2025 rocprofiler_pool_t *pool = NULL;
2026 if (rocp_pool_open_p(agent, features, feature_count, &pool, 0, &properties) != HSA_STATUS_SUCCESS) {
2027 papi_errno = PAPI_EMISC;
2028 goto fn_fail;
2029 }
2030
2031 cb_dispatch_arg.pools[i] = pool;
2032 }
2033
2034 rocprofiler_queue_callbacks_t dispatch_ptrs = { 0 };
2035 dispatch_ptrs.dispatch = dispatch_cb;
2036
2037 if (rocp_set_queue_cbs_p(dispatch_ptrs, &cb_dispatch_arg) != HSA_STATUS_SUCCESS) {
2038 papi_errno = PAPI_EMISC;
2039 goto fn_fail;
2040 }
2041
2042 fn_exit:
2043 return papi_errno;
2044 fn_fail:
2045 if (context_arg) {
2046 papi_free(context_arg);
2047 }
2048 goto fn_exit;
2049}
2050
2051int
2052register_dispatch_counter(unsigned long tid, int *counter)
2053{
2054 int papi_errno = PAPI_OK;
2055 int htable_errno = HTABLE_SUCCESS;
2056 char key[PAPI_MIN_STR_LEN] = { 0 };
2057
2058 /* FIXME: probably better using a different hash table for this */
2059 sprintf(key, "%lu", tid);
2060 int *counter_p;
2061 htable_errno = htable_find(htable, key, (void **) &counter_p);
2062 if (htable_errno == HTABLE_SUCCESS) {
2063 papi_errno = PAPI_EMISC;
2064 goto fn_exit;
2065 }
2066
2067 htable_insert(htable, (const char *) key, counter);
2068 ++intercept_global_state.active_thread_count;
2069
2070 fn_exit:
2071 return papi_errno;
2072}
2073
2074int
2076{
2077 int papi_errno = PAPI_OK;
2078 int htable_errno = HTABLE_SUCCESS;
2079 char key[PAPI_MIN_STR_LEN] = { 0 };
2080
2081 sprintf(key, "%lu", tid);
2082 int *counter_p;
2083 htable_errno = htable_find(htable, (const char *) key, (void **) &counter_p);
2084 if (htable_errno != HTABLE_SUCCESS) {
2085 papi_errno = PAPI_ECMP;
2086 goto fn_exit;
2087 }
2088
2089 htable_delete(htable, (const char *) key);
2090 --intercept_global_state.active_thread_count;
2091
2092 fn_exit:
2093 return papi_errno;
2094}
2095
2100static void process_context_entry(cb_context_payload_t *, rocprofiler_feature_t *, int);
2101
2112
2113hsa_status_t
2114dispatch_cb(const rocprofiler_callback_data_t *callback_data, void *arg, rocprofiler_group_t *group)
2115{
2116 hsa_agent_t agent = callback_data->agent;
2117 hsa_status_t status = HSA_STATUS_SUCCESS;
2118
2119 int dev_id;
2120 rocc_dev_get_agent_id(agent, &dev_id);
2121
2122 cb_dispatch_arg_t *dispatch_arg = (cb_dispatch_arg_t *) arg;
2123 rocprofiler_pool_t *pool = dispatch_arg->pools[dev_id];
2124 rocprofiler_pool_entry_t pool_entry;
2125 hsa_status_t rocp_errno = rocp_pool_fetch_p(pool, &pool_entry);
2126 if (rocp_errno != HSA_STATUS_SUCCESS) {
2127 status = rocp_errno;
2128 goto fn_exit;
2129 }
2130
2131 rocprofiler_t *context = pool_entry.context;
2132 cb_context_payload_t *payload = (cb_context_payload_t *) pool_entry.payload;
2133
2134 rocp_errno = rocp_get_group_p(context, 0, group);
2135 if (rocp_errno != HSA_STATUS_SUCCESS) {
2136 status = rocp_errno;
2137 goto fn_exit;
2138 }
2139
2140 unsigned long tid;
2141 rocc_thread_get_id(&tid);
2142 payload->tid = tid;
2143 payload->agent = agent;
2144 payload->group = *group;
2145 payload->data = *callback_data;
2146
2148 payload->valid = true;
2150
2151 fn_exit:
2152 return status;
2153}
2154
2155bool
2156context_handler_cb(const rocprofiler_pool_entry_t *entry, void *arg)
2157{
2158 cb_context_payload_t *payload = (cb_context_payload_t *) entry->payload;
2159 cb_context_arg_t *context_arg = (cb_context_arg_t *) arg;
2160
2161 process_context_entry(payload, context_arg->features, context_arg->feature_count);
2162
2163 return false;
2164}
2165
2166void
2168{
2169 fn_check_again:
2171 if (payload->valid == false) {
2173 goto fn_check_again;
2174 }
2175
2176 if (feature_count < 1) {
2177 goto fn_exit;
2178 }
2179
2180 if (rocp_group_get_data_p(&payload->group) != HSA_STATUS_SUCCESS) {
2181 goto fn_exit;
2182 }
2183
2184 if (rocp_get_metrics_p(payload->group.context)) {
2185 goto fn_exit;
2186 }
2187
2188 if (increment_and_fetch_dispatch_counter(payload->tid) < 0) {
2189 /* thread not registered, ignore counters */
2190 goto fn_exit;
2191 }
2192
2194 if (n == NULL) {
2196 goto fn_exit;
2197 }
2198
2199 int dev_id;
2200 rocc_dev_get_agent_id(payload->agent, &dev_id);
2201
2202 n->tid = payload->tid;
2204 put_context_node(dev_id, n);
2205
2206 fn_exit:
2208}
2209
2212{
2213 cb_context_node_t *n = papi_malloc(sizeof(*n));
2214 if (n == NULL) {
2215 return NULL;
2216 }
2217
2218 n->counters = papi_malloc(num_events * sizeof(long long));
2219 if (n->counters == NULL) {
2220 papi_free(n);
2221 return NULL;
2222 }
2223
2224 return n;
2225}
2226
2227void
2229{
2230 int i;
2231 for (i = 0; i < feature_count; ++i) {
2232 const rocprofiler_feature_t *f = &features[i];
2233 switch(f->data.kind) {
2234 case ROCPROFILER_DATA_KIND_INT32:
2235 n->counters[i] = (long long) f->data.result_int32;
2236 break;
2237 case ROCPROFILER_DATA_KIND_INT64:
2238 n->counters[i] = f->data.result_int64;
2239 break;
2240 case ROCPROFILER_DATA_KIND_FLOAT:
2241 n->counters[i] = (long long) f->data.result_float;
2242 break;
2243 case ROCPROFILER_DATA_KIND_DOUBLE:
2244 n->counters[i] = (long long) f->data.result_double;
2245 break;
2246 default:
2247 SUBDBG("Unsupported data kind from rocprofiler");
2248 }
2249 }
2250}
2251
2252void
2254{
2255 n->next = NULL;
2256
2257 if (cb_ctx_list_heads[dev_id] != NULL) {
2258 n->next = cb_ctx_list_heads[dev_id];
2259 }
2260
2261 cb_ctx_list_heads[dev_id] = n;
2262}
2263
2264int
2266{
2267 int htable_errno = HTABLE_SUCCESS;
2268 char key[PAPI_MIN_STR_LEN] = { 0 };
2269
2270 sprintf(key, "%lu", tid);
2271 int *counter_p;
2272 htable_errno = htable_find(htable, (const char *) key, (void **) &counter_p);
2273 if (htable_errno != HTABLE_SUCCESS) {
2274 return 0;
2275 }
2276
2277 return ++(*counter_p);
2278}
2279
2280int
2281fetch_dispatch_counter(unsigned long tid)
2282{
2283 int htable_errno = HTABLE_SUCCESS;
2284 char key[PAPI_MIN_STR_LEN] = { 0 };
2285
2286 sprintf(key, "%lu", tid);
2287 int *counter_p;
2288 htable_errno = htable_find(htable, (const char *) key, (void **) &counter_p);
2289 if (htable_errno != HTABLE_SUCCESS) {
2290 return 0;
2291 }
2292
2293 return (*counter_p);
2294}
2295
2296int
2298{
2299 cb_context_node_t *curr = cb_ctx_list_heads[dev_id];
2300 cb_context_node_t *flag = NULL;
2301 cb_context_node_t *prev = curr;
2302 cb_context_node_t *flag_prev;
2303
2304 while (curr) {
2305 unsigned long tid;
2306 rocc_thread_get_id(&tid);
2307 if (curr->tid == tid) {
2308 flag_prev = prev;
2309 flag = curr;
2310 }
2311 prev = curr;
2312 curr = curr->next;
2313 }
2314
2315 if (flag != NULL) {
2316 flag_prev->next = flag->next;
2317 if (cb_ctx_list_heads[dev_id] == flag) {
2318 cb_ctx_list_heads[dev_id] = NULL;
2319 }
2320 }
2321
2322 *n = flag;
2323 return PAPI_OK;
2324}
2325
2326int
2328{
2329 int htable_errno = HTABLE_SUCCESS;
2330 char key[PAPI_MIN_STR_LEN] = { 0 };
2331
2332 sprintf(key, "%lu", tid);
2333 int *counter_p;
2334 htable_errno = htable_find(htable, (const char *) key, (void **) &counter_p);
2335 if (htable_errno != HTABLE_SUCCESS) {
2336 return 0;
2337 }
2338
2339 return --(*counter_p);
2340}
2341
2342int
2343get_context_counters(int dev_id, cb_context_node_t *n, rocp_ctx_t rocp_ctx)
2344{
2345 int papi_errno = PAPI_OK;
2346 uint64_t *events_id = rocp_ctx->u.intercept.events_id;
2347
2348 /* Here we get events_id ordered according to user's viewpoint and we want
2349 * to map these to events_id ordered according to callbacks' viewpoint. We
2350 * compare events from the user and the callbacks using a brute force
2351 * approach as the number of events is typically small. */
2352 int i, j;
2353 for (i = 0; i < intercept_global_state.feature_count; ++i) {
2354 uint64_t event_id = intercept_global_state.events_id[i] | (dev_id << DEVICE_SHIFT);
2355
2356 for (j = 0; j < rocp_ctx->u.intercept.feature_count; ++j) {
2357 if (event_id == events_id[j]) {
2358 rocp_ctx->u.intercept.counters[j] += n->counters[i];
2359 break;
2360 }
2361 }
2362 }
2363
2364 return papi_errno;
2365}
2366
2367void
2369{
2370 papi_free(n->counters);
2371 papi_free(n);
2372}
2373
2374void __attribute__((visibility("default")))
2375OnLoadToolProp(rocprofiler_settings_t *settings __attribute__((unused)))
2376{
2377 init_rocp_env();
2378}
2379
2380void __attribute__((visibility("default")))
2381OnUnloadTool(void)
2382{
2383 return;
2384}
int i
static long count
Returns a string describing the PAPI error code.
double f(double a)
Definition: cpi.c:23
static int htable_insert(void *handle, const char *key, void *in)
Definition: cuda/htable.h:92
static int htable_delete(void *handle, const char *key)
Definition: cuda/htable.h:130
#define HTABLE_ENOVAL
Definition: cuda/htable.h:18
static int htable_shutdown(void *handle)
Definition: cuda/htable.h:76
#define HTABLE_SUCCESS
Definition: cuda/htable.h:17
static int htable_find(void *handle, const char *key, void **out)
Definition: cuda/htable.h:161
static int htable_init(void **handle)
Definition: cuda/htable.h:55
#define PAPI_ENUM_EVENTS
Definition: f90papi.h:224
#define PAPI_OK
Definition: f90papi.h:73
#define PAPI_ENUM_FIRST
Definition: f90papi.h:85
#define PAPI_MIN_STR_LEN
Definition: f90papi.h:208
#define PAPI_ECNFLCT
Definition: f90papi.h:234
#define PAPI_ENOEVNT
Definition: f90papi.h:139
#define PAPI_END
Definition: f90papi.h:303
#define PAPI_EINVAL
Definition: f90papi.h:115
#define PAPI_ENOSUPP
Definition: f90papi.h:244
#define PAPI_MAX_STR_LEN
Definition: f90papi.h:77
#define PAPI_EMISC
Definition: f90papi.h:122
#define PAPI_NTV_ENUM_UMASKS
Definition: f90papi.h:66
#define PAPI_ECMP
Definition: f90papi.h:214
#define PAPI_ENOMEM
Definition: f90papi.h:16
#define PAPI_EBUF
Definition: f90papi.h:253
char events[MAX_EVENTS][BUFSIZ]
static pthread_key_t key
static struct timeval start
static int num_events
static int * features
Definition: linux-nvml.c:147
static nvmlDevice_t * devices
Definition: linux-nvml.c:146
unsigned long AO_t __attribute__((__aligned__(4)))
Definition: m68k.h:21
#define SUBDBG(format, args...)
Definition: papi_debug.h:64
#define papi_calloc(a, b)
Definition: papi_memory.h:37
#define papi_free(a)
Definition: papi_memory.h:35
#define papi_strdup(a)
Definition: papi_memory.h:39
#define papi_malloc(a)
Definition: papi_memory.h:34
#define papi_realloc(a, b)
Definition: papi_memory.h:36
int rocc_dev_check(rocc_bitmap_t bitmap, int i)
Definition: roc_common.c:187
int rocc_dev_get_map(rocc_dev_get_map_cb query_dev_id, uint64_t *events_id, int num_events, rocc_bitmap_t *bitmap)
Definition: roc_common.c:74
hsa_status_t(* hsa_status_string_p)(hsa_status_t, const char **)
Definition: roc_common.c:14
hsa_status_t(* hsa_queue_destroy_p)(hsa_queue_t *)
Definition: roc_common.c:13
char error_string[PAPI_MAX_STR_LEN]
Definition: roc_common.c:17
int rocc_dev_acquire(rocc_bitmap_t bitmap)
Definition: roc_common.c:93
int rocc_dev_get_count(rocc_bitmap_t bitmap, int *num_devices)
Definition: roc_common.c:121
device_table_t * device_table_p
Definition: roc_common.c:19
int rocc_dev_get_agent_id(hsa_agent_t agent, int *dev_id)
Definition: roc_common.c:169
int rocc_dev_get_id(rocc_bitmap_t bitmap, int dev_count, int *device_id)
Definition: roc_common.c:140
int rocc_dev_set(rocc_bitmap_t *bitmap, int i)
Definition: roc_common.c:180
int rocc_thread_get_id(unsigned long *tid)
Definition: roc_common.c:193
int rocc_dev_release(rocc_bitmap_t bitmap)
Definition: roc_common.c:106
int64_t rocc_bitmap_t
Definition: roc_common.h:19
#define PAPI_ROCM_MAX_DEV_COUNT
Definition: roc_common.h:16
static int evt_id_create(event_info_t *info, uint64_t *event_id)
Definition: roc_profiler.c:764
#define INSTAN_SHIFT
Definition: roc_profiler.c:32
static int sampling_ctx_open(uint64_t *, int, rocp_ctx_t *)
Definition: roc_profiler.c:921
static int count_unique_events(uint64_t *events_id, int num_events, int *num_unique)
int rocp_evt_code_to_descr(uint64_t event_code, char *descr, int len)
Definition: roc_profiler.c:260
static hsa_status_t(* rocp_open_p)(hsa_agent_t, rocprofiler_feature_t *, uint32_t, rocprofiler_t **, uint32_t, rocprofiler_properties_t *)
Definition: roc_profiler.c:93
static int ctx_get_dev_feature_count(rocp_ctx_t, int)
static cb_context_node_t * cb_ctx_list_heads[PAPI_ROCM_MAX_DEV_COUNT]
static cb_dispatch_arg_t cb_dispatch_arg
static int intercept_ctx_close(rocp_ctx_t)
static void process_context_entry(cb_context_payload_t *, rocprofiler_feature_t *, int)
static hsa_status_t(* rocp_pool_fetch_p)(rocprofiler_pool_t *, rocprofiler_pool_entry_t *)
Definition: roc_profiler.c:108
static hsa_status_t(* rocp_start_p)(rocprofiler_t *, uint32_t)
Definition: roc_profiler.c:96
int rocp_ctx_open(uint64_t *events_id, int num_events, rocp_ctx_t *rocp_ctx)
Definition: roc_profiler.c:390
static ntv_event_table_t ntv_table
Definition: roc_profiler.c:145
int kernel_count
static int intercept_ctx_init(uint64_t *, int, rocp_ctx_t *)
static hsa_status_t(* rocp_get_metrics_p)(const rocprofiler_t *)
Definition: roc_profiler.c:102
static int ctx_open(rocp_ctx_t)
static int ctx_close(rocp_ctx_t)
int rocp_ctx_start(rocp_ctx_t rocp_ctx)
Definition: roc_profiler.c:412
static hsa_status_t(* rocp_stop_p)(rocprofiler_t *, uint32_t)
Definition: roc_profiler.c:98
static int load_rocp_sym(void)
Definition: roc_profiler.c:470
static int verify_events(uint64_t *, int)
#define NAMEID_SHIFT
Definition: roc_profiler.c:34
static int save_callback_features(rocprofiler_feature_t *features, int feature_count)
static hsa_status_t(* rocp_reset_p)(rocprofiler_t *, uint32_t)
Definition: roc_profiler.c:103
int rocp_shutdown(void)
Definition: roc_profiler.c:456
static int unregister_dispatch_counter(unsigned long)
int rocp_init(void)
Definition: roc_profiler.c:159
static int intercept_ctx_get_dev_feature_count(rocp_ctx_t, int)
#define DEVICE_SHIFT
Definition: roc_profiler.c:31
rocprofiler_feature_t * features
static int evt_name_to_basename(const char *name, char *base, int len)
Definition: roc_profiler.c:851
static int ctx_finalize(rocp_ctx_t *)
static hsa_status_t(* rocp_group_count_p)(const rocprofiler_t *, uint32_t *)
Definition: roc_profiler.c:95
int rocp_ctx_close(rocp_ctx_t rocp_ctx)
Definition: roc_profiler.c:401
static void * htable
Definition: roc_profiler.c:147
static void * rocp_dlp
Definition: roc_profiler.c:144
static int evt_code_to_name(uint64_t event_code, char *name, int len)
Definition: roc_profiler.c:739
#define QLMASK_MASK
Definition: roc_profiler.c:37
static hsa_status_t(* rocp_start_queue_cbs_p)(void)
Definition: roc_profiler.c:111
static int intercept_ctx_start(rocp_ctx_t)
static hsa_status_t(* rocp_pool_open_p)(hsa_agent_t, rocprofiler_feature_t *, uint32_t, rocprofiler_pool_t **, uint32_t, rocprofiler_pool_properties_t *)
Definition: roc_profiler.c:106
static hsa_status_t(* rocp_stop_queue_cbs_p)(void)
Definition: roc_profiler.c:112
static int cleanup_callback_features(rocprofiler_feature_t *features, int feature_count)
#define QLMASK_SHIFT
Definition: roc_profiler.c:33
static int init_callbacks(rocprofiler_feature_t *, int)
int rocp_evt_enum(uint64_t *event_code, int modifier)
Definition: roc_profiler.c:193
static hsa_status_t(* rocp_group_get_data_p)(rocprofiler_group_t *)
Definition: roc_profiler.c:101
static int sampling_ctx_close(rocp_ctx_t)
Definition: roc_profiler.c:952
static int increment_and_fetch_dispatch_counter(unsigned long)
static hsa_status_t get_ntv_events_cb(const rocprofiler_info_data_t, void *)
Definition: roc_profiler.c:880
static int sampling_ctx_finalize(rocp_ctx_t *)
static int unload_rocp_sym(void)
Definition: roc_profiler.c:545
static bool context_handler_cb(const rocprofiler_pool_entry_t *, void *)
static int intercept_ctx_stop(rocp_ctx_t)
static int evt_name_to_instance(const char *name, int *instance)
Definition: roc_profiler.c:820
static cb_context_node_t * alloc_context_node(int)
static int intercept_ctx_open(uint64_t *, int, rocp_ctx_t *)
static struct @2 intercept_global_state
static hsa_status_t(* rocp_iterate_info_p)(const hsa_agent_t *, rocprofiler_info_kind_t, hsa_status_t(*)(const rocprofiler_info_data_t, void *), void *)
Definition: roc_profiler.c:89
static void free_context_node(cb_context_node_t *)
int feature_count
static ntv_event_table_t * ntv_table_p
Definition: roc_profiler.c:146
static hsa_status_t(* rocp_error_string_p)(const char **)
Definition: roc_profiler.c:90
static hsa_status_t(* rocp_get_group_p)(rocprofiler_t *, uint32_t, rocprofiler_group_t *)
Definition: roc_profiler.c:99
static hsa_status_t(* rocp_get_info_p)(const hsa_agent_t *, rocprofiler_info_kind_t, void *)
Definition: roc_profiler.c:88
static int get_context_counters(int, cb_context_node_t *, rocp_ctx_t)
static int get_context_node(int, cb_context_node_t **)
static int evt_id_to_info(uint64_t event_id, event_info_t *info)
Definition: roc_profiler.c:774
static hsa_status_t(* rocp_get_data_p)(rocprofiler_t *, uint32_t)
Definition: roc_profiler.c:100
static int intercept_shutdown(void)
static int fetch_dispatch_counter(unsigned long)
#define INSTAN_FLAG
Definition: roc_profiler.c:40
static int intercept_ctx_reset(rocp_ctx_t)
int rocp_evt_code_to_name(uint64_t event_code, char *name, int len)
Definition: roc_profiler.c:324
int rocp_evt_name_to_code(const char *name, uint64_t *event_code)
Definition: roc_profiler.c:276
static int finalize_features(rocprofiler_feature_t *, int)
static int intercept_ctx_read(rocp_ctx_t, long long **)
static hsa_status_t count_ntv_events_cb(const rocprofiler_info_data_t, void *)
static hsa_status_t(* rocp_remove_queue_cbs_p)(void)
Definition: roc_profiler.c:113
static int register_dispatch_counter(unsigned long, int *)
static hsa_status_t dispatch_cb(const rocprofiler_callback_data_t *, void *, rocprofiler_group_t *)
#define NAMEID_MASK
Definition: roc_profiler.c:38
int active_thread_count
static int sampling_ctx_read(rocp_ctx_t, long long **)
static int sampling_ctx_reset(rocp_ctx_t)
static hsa_status_t(* rocp_pool_flush_p)(rocprofiler_pool_t *)
Definition: roc_profiler.c:109
static int copy_unique_events(uint64_t *target, uint64_t *source, int source_len)
static hsa_status_t(* rocp_pool_close_p)(rocprofiler_pool_t *)
Definition: roc_profiler.c:107
static int init_rocp_env(void)
Definition: roc_profiler.c:580
static int intercept_ctx_finalize(rocp_ctx_t *)
#define DEVICE_MASK
Definition: roc_profiler.c:35
static int sampling_ctx_stop(rocp_ctx_t)
static hsa_status_t(* rocp_read_p)(rocprofiler_t *, uint32_t)
Definition: roc_profiler.c:97
int rocp_ctx_stop(rocp_ctx_t rocp_ctx)
Definition: roc_profiler.c:423
uint64_t * events_id
static int decrement_and_fetch_dispatch_counter(unsigned long)
int rocp_ctx_reset(rocp_ctx_t rocp_ctx)
Definition: roc_profiler.c:445
static hsa_status_t(* rocp_close_p)(rocprofiler_t *)
Definition: roc_profiler.c:94
static void * htable_intercept
Definition: roc_profiler.c:148
int rocp_init_environment(void)
Definition: roc_profiler.c:152
static int init_event_table(void)
Definition: roc_profiler.c:682
static hsa_status_t(* rocp_set_queue_cbs_p)(rocprofiler_queue_callbacks_t, void *)
Definition: roc_profiler.c:110
static int sampling_shutdown(void)
static void put_context_counters(rocprofiler_feature_t *, int, cb_context_node_t *)
#define DEVICE_FLAG
Definition: roc_profiler.c:39
static int ctx_init(uint64_t *, int, rocp_ctx_t *)
#define INSTAN_MASK
Definition: roc_profiler.c:36
static void put_context_node(int, cb_context_node_t *)
static int shutdown_event_table(void)
static int sampling_ctx_get_dev_feature_count(rocp_ctx_t, int)
unsigned int _rocm_lock
Definition: roc_profiler.c:85
static int evt_name_to_device(const char *name, int *device)
Definition: roc_profiler.c:809
static int sampling_ctx_init(uint64_t *, int, rocp_ctx_t *)
int rocp_evt_code_to_info(uint64_t event_code, PAPI_event_info_t *info)
Definition: roc_profiler.c:331
static int init_features(uint64_t *, int, rocprofiler_feature_t *)
int rocp_ctx_read(rocp_ctx_t rocp_ctx, long long **counts)
Definition: roc_profiler.c:434
static int sampling_ctx_start(rocp_ctx_t)
Definition: roc_profiler.c:973
unsigned int rocm_prof_mode
Definition: roc_profiler.c:84
static int event_id_to_dev_id_cb(uint64_t event_id, int *device)
#define ROCM_PROFILE_SAMPLING_MODE
#define ROCM_EVENTS_RUNNING
#define ROCM_EVENTS_OPENED
const char * name
Definition: rocs.c:225
int
Definition: sde_internal.h:89
long long int long long
Definition: sde_internal.h:85
char symbol[PAPI_HUGE_STR_LEN]
Definition: papi.h:960
char long_descr[PAPI_HUGE_STR_LEN]
Definition: papi.h:963
rocprofiler_feature_t * features
struct cb_context_node * next
unsigned long tid
long long * counters
rocprofiler_callback_data_t data
rocprofiler_group_t group
rocprofiler_pool_t * pools[PAPI_ROCM_MAX_DEV_COUNT]
hsa_agent_t devices[PAPI_ROCM_MAX_DEV_COUNT]
Definition: roc_common.h:23
int dev_id
Definition: roc_profiler.c:678
rocc_bitmap_t device_map
Definition: roc_profiler.c:46
char * name
Definition: roc_profiler.c:43
char * descr
Definition: roc_profiler.c:44
ntv_event_t * events
Definition: roc_profiler.c:50
struct rocd_ctx::@3::@4 intercept
rocprofiler_feature_t * features
Definition: roc_profiler.c:68
union rocd_ctx::@3 u
rocprofiler_properties_t * ctx_prop
Definition: roc_profiler.c:72
int feature_count
Definition: roc_profiler.c:62
struct rocd_ctx::@3::@5 sampling
int dispatch_count
Definition: roc_profiler.c:60
rocc_bitmap_t device_map
Definition: roc_profiler.c:61
rocprofiler_t ** contexts
Definition: roc_profiler.c:70
uint64_t * events_id
Definition: roc_profiler.c:58
long long * counters
Definition: roc_profiler.c:59
inline_static int _papi_hwi_lock(int lck)
Definition: threads.h:69
inline_static int _papi_hwi_unlock(int lck)
Definition: threads.h:83
char * descr