8#include <rocprofiler.h>
25#define EVENTS_WIDTH (sizeof(uint64_t) * 8)
26#define DEVICE_WIDTH ( 7)
27#define INSTAN_WIDTH ( 7)
28#define QLMASK_WIDTH ( 2)
29#define NAMEID_WIDTH (12)
30#define UNUSED_WIDTH (EVENTS_WIDTH - DEVICE_WIDTH - INSTAN_WIDTH - QLMASK_WIDTH - NAMEID_WIDTH)
31#define DEVICE_SHIFT (EVENTS_WIDTH - UNUSED_WIDTH - DEVICE_WIDTH)
32#define INSTAN_SHIFT (DEVICE_SHIFT - INSTAN_WIDTH)
33#define QLMASK_SHIFT (INSTAN_SHIFT - QLMASK_WIDTH)
34#define NAMEID_SHIFT (QLMASK_SHIFT - NAMEID_WIDTH)
35#define DEVICE_MASK ((0xFFFFFFFFFFFFFFFF >> (EVENTS_WIDTH - DEVICE_WIDTH)) << DEVICE_SHIFT)
36#define INSTAN_MASK ((0xFFFFFFFFFFFFFFFF >> (EVENTS_WIDTH - INSTAN_WIDTH)) << INSTAN_SHIFT)
37#define QLMASK_MASK ((0xFFFFFFFFFFFFFFFF >> (EVENTS_WIDTH - QLMASK_WIDTH)) << QLMASK_SHIFT)
38#define NAMEID_MASK ((0xFFFFFFFFFFFFFFFF >> (EVENTS_WIDTH - NAMEID_WIDTH)) << NAMEID_SHIFT)
39#define DEVICE_FLAG (0x2)
40#define INSTAN_FLAG (0x1)
49typedef struct ntv_event_table {
88static hsa_status_t (*
rocp_get_info_p)(
const hsa_agent_t *, rocprofiler_info_kind_t,
void *);
89static hsa_status_t (*
rocp_iterate_info_p)(
const hsa_agent_t *, rocprofiler_info_kind_t, hsa_status_t (*)(
const rocprofiler_info_data_t,
void *),
void *);
93static hsa_status_t (*
rocp_open_p)(hsa_agent_t, rocprofiler_feature_t *, uint32_t, rocprofiler_t **, uint32_t, rocprofiler_properties_t *);
99static hsa_status_t (*
rocp_get_group_p)(rocprofiler_t *, uint32_t, rocprofiler_group_t *);
106static hsa_status_t (*
rocp_pool_open_p)(hsa_agent_t, rocprofiler_feature_t *, uint32_t, rocprofiler_pool_t **, uint32_t, rocprofiler_pool_properties_t *);
177 (*hsa_shut_down_p)();
197 SUBDBG(
"ENTER: event_code: %lu, modifier: %d\n", *event_code, modifier);
232 if (info.
flags == 0) {
280 SUBDBG(
"ENTER: name: %s, event_code: %p\n",
name, event_code);
309 event_info_t info = { device, instance, flags, nameid };
357 sprintf(info->
long_descr,
"%s, masks:Mandatory device qualifier [%s]:Mandatory instance qualifier in range [0-%i]",
372 sprintf(info->
long_descr,
"%s, masks:Mandatory device qualifier [%s]",
378 sprintf(info->
long_descr,
"%s, masks:Mandatory instance qualifier in range [0-%i]",
474 char *pathname = getenv(
"HSA_TOOLS_LIB");
475 if (pathname == NULL) {
480 rocp_dlp = dlopen(pathname, RTLD_NOW | RTLD_GLOBAL);
582 static int rocp_env_initialized;
584 if (rocp_env_initialized) {
588 const char *rocp_mode = getenv(
"ROCP_HSA_INTERCEPT");
592 char pathname[PATH_MAX];
593 char *rocm_root = getenv(
"PAPI_ROCM_ROOT");
594 if (rocm_root == NULL) {
600 int override_hsa_tools_lib = 1;
601 struct stat stat_info;
602 char *hsa_tools_lib = getenv(
"HSA_TOOLS_LIB");
604 err = stat(hsa_tools_lib, &stat_info);
605 if (err == 0 && S_ISREG(stat_info.st_mode)) {
606 override_hsa_tools_lib = 0;
610 if (override_hsa_tools_lib) {
616 const char *candidates[] = {
617 "lib/librocprofiler64.so.1",
618 "lib/librocprofiler64.so",
619 "rocprofiler/lib/libprofiler64.so.1",
620 "rocprofiler/lib/libprofiler64.so",
623 const char **candidate = candidates;
625 sprintf(pathname,
"%s/%s", rocm_root, *candidate);
627 err = stat(pathname, &stat_info);
638 setenv(
"HSA_TOOLS_LIB", pathname, 1);
641 int override_rocp_metrics = 1;
642 char *rocp_metrics = getenv(
"ROCP_METRICS");
644 err = stat(rocp_metrics, &stat_info);
645 if (err == 0 && S_ISREG(stat_info.st_mode)) {
646 override_rocp_metrics = 0;
650 if (override_rocp_metrics) {
653 sprintf(pathname,
"%s/lib/rocprofiler/metrics.xml", rocm_root);
655 err = stat(pathname, &stat_info);
657 sprintf(pathname,
"%s/rocprofiler/lib/metrics.xml", rocm_root);
659 err = stat(pathname, &stat_info);
666 setenv(
"ROCP_METRICS", pathname, 1);
669 rocp_env_initialized = 1;
689 ROCPROFILER_INFO_KIND_METRIC,
692 if (rocp_errno != HSA_STATUS_SUCCESS) {
693 const char *error_string_p;
713 ROCPROFILER_INFO_KIND_METRIC,
716 if (rocp_errno != HSA_STATUS_SUCCESS) {
717 const char *error_string_p;
749 switch (info.
flags) {
811 char *p = strstr(
name,
":device=");
815 *device = (
int) strtol(p + strlen(
":device="), NULL, 10);
835 char *p = strstr(
name,
":instance=");
840 *instance = (
int) strtol(p + strlen(
":instance="), NULL, 10);
853 char *p = strstr(
name,
":");
855 if (len < (
int)(p -
name)) {
858 strncpy(base,
name, (
size_t)(p -
name));
860 if (len < (
int) strlen(
name)) {
863 strncpy(base,
name, (
size_t) len);
875 (*(
int *)
count) += 1;
876 return HSA_STATUS_SUCCESS;
887 if (*
count > capacity) {
889 return HSA_STATUS_ERROR;
894 event = &
events[(*count)++];
896 event->descr =
papi_strdup(info.metric.description);
897 event->instances = info.metric.instances;
903 return HSA_STATUS_SUCCESS;
910static int init_features(uint64_t *,
int, rocprofiler_feature_t *);
916static int ctx_init(uint64_t *,
int, rocp_ctx_t *);
976 SUBDBG(
"[ROCP sampling mode] Cannot start eventset, not opened.");
981 SUBDBG(
"[ROCP sampling mode] Cannot start eventset, already running.");
989 for (
i = 0;
i < devs_count; ++
i) {
990 hsa_status_t rocp_errno =
rocp_start_p(rocp_ctx->u.sampling.contexts[
i], 0);
991 if (rocp_errno != HSA_STATUS_SUCCESS) {
1004 SUBDBG(
"[ROCP sampling mode] Cannot stop eventset, not opened.");
1009 SUBDBG(
"[ROCP sampling mode] Cannot stop eventset, not running.");
1017 for (
i = 0;
i < devs_count; ++
i) {
1018 hsa_status_t rocp_errno =
rocp_stop_p(rocp_ctx->u.sampling.contexts[
i], 0);
1019 if (rocp_errno != HSA_STATUS_SUCCESS) {
1024 rocp_ctx->u.sampling.state &= ~ROCM_EVENTS_RUNNING;
1032 int dev_feature_offset = 0;
1034 rocprofiler_feature_t *
features = rocp_ctx->u.sampling.features;
1038 for (
i = 0;
i < dev_count; ++
i) {
1039 hsa_status_t rocp_errno =
rocp_read_p(rocp_ctx->u.sampling.contexts[
i], 0);
1040 if (rocp_errno != HSA_STATUS_SUCCESS) {
1045 if (rocp_errno != HSA_STATUS_SUCCESS) {
1050 if (rocp_errno != HSA_STATUS_SUCCESS) {
1060 rocprofiler_feature_t *dev_features =
features + dev_feature_offset;
1061 long long *counters = rocp_ctx->u.sampling.counters;
1063 for (j = 0; j < dev_feature_count; ++j) {
1064 switch(dev_features[j].data.kind) {
1065 case ROCPROFILER_DATA_KIND_INT32:
1066 counters[k++] = (
long long) dev_features[j].data.result_int32;
1068 case ROCPROFILER_DATA_KIND_INT64:
1069 counters[k++] = dev_features[j].data.result_int64;
1071 case ROCPROFILER_DATA_KIND_FLOAT:
1072 counters[k++] = (
long long) dev_features[j].data.result_float;
1074 case ROCPROFILER_DATA_KIND_DOUBLE:
1075 counters[k++] = (
long long) dev_features[j].data.result_double;
1081 dev_feature_offset += dev_feature_count;
1083 *counts = rocp_ctx->u.sampling.counters;
1094 for (
i = 0;
i < devs_count; ++
i) {
1095 hsa_status_t rocp_errno =
rocp_reset_p(rocp_ctx->u.sampling.contexts[
i], 0);
1096 if (rocp_errno != HSA_STATUS_SUCCESS) {
1100 for (
i = 0;
i < rocp_ctx->u.sampling.feature_count; ++
i) {
1101 rocp_ctx->u.sampling.counters[
i] = 0;
1158 rocprofiler_feature_t *
features = NULL;
1159 rocprofiler_t **contexts = NULL;
1160 rocprofiler_properties_t *ctx_prop = NULL;
1161 long long *counters = NULL;
1175 contexts =
papi_calloc(num_devs,
sizeof(*contexts));
1176 if (contexts == NULL) {
1181 ctx_prop =
papi_calloc(num_devs,
sizeof(*ctx_prop));
1182 if (ctx_prop == NULL) {
1194 if (counters == NULL) {
1205 if (*rocp_ctx == NULL) {
1210 (*rocp_ctx)->u.sampling.events_id =
events_id;
1211 (*rocp_ctx)->u.sampling.features =
features;
1212 (*rocp_ctx)->u.sampling.feature_count =
num_events;
1213 (*rocp_ctx)->u.sampling.contexts = contexts;
1214 (*rocp_ctx)->u.sampling.counters = counters;
1215 (*rocp_ctx)->u.sampling.device_map = bitmap;
1216 (*rocp_ctx)->u.sampling.ctx_prop = ctx_prop;
1240 if (*rocp_ctx == NULL) {
1244 if ((*rocp_ctx)->u.sampling.features) {
1245 finalize_features((*rocp_ctx)->u.sampling.features, (*rocp_ctx)->u.sampling.feature_count);
1246 papi_free((*rocp_ctx)->u.sampling.features);
1249 if ((*rocp_ctx)->u.sampling.contexts) {
1250 papi_free((*rocp_ctx)->u.sampling.contexts);
1253 if ((*rocp_ctx)->u.sampling.ctx_prop) {
1254 papi_free((*rocp_ctx)->u.sampling.ctx_prop);
1257 if ((*rocp_ctx)->u.sampling.counters) {
1258 papi_free((*rocp_ctx)->u.sampling.counters);
1272 rocprofiler_feature_t *
features = rocp_ctx->u.sampling.features;
1273 int dev_feature_offset = 0;
1275 rocprofiler_t **contexts = rocp_ctx->u.sampling.contexts;
1276 rocprofiler_properties_t *ctx_prop = rocp_ctx->u.sampling.ctx_prop;
1283 for (
i = 0;
i < dev_count; ++
i) {
1291 rocprofiler_feature_t *dev_features =
features + dev_feature_offset;
1293 const uint32_t mode =
1294 ROCPROFILER_MODE_STANDALONE | ROCPROFILER_MODE_CREATEQUEUE | ROCPROFILER_MODE_SINGLEGROUP;
1296 ctx_prop[
i].queue_depth = 128;
1298 dev_feature_count, &contexts[
i], mode,
1300 if (rocp_errno != HSA_STATUS_SUCCESS) {
1305 dev_feature_offset += dev_feature_count;
1316 for (j = 0; j <
i; ++j) {
1330 for (
i = 0;
i < devs_count; ++
i) {
1331 if (
rocp_close_p(rocp_ctx->u.sampling.contexts[
i]) != HSA_STATUS_SUCCESS) {
1363 features[
i].kind = (rocprofiler_feature_kind_t) ROCPROFILER_INFO_KIND_METRIC;
1396 int start, stop, j = 0;
1397 int num_events = rocp_ctx->u.sampling.feature_count;
1398 uint64_t *
events_id = rocp_ctx->u.sampling.events_id;
1412 return stop -
start;
1418 int start, stop, j = 0;
1419 int num_events = rocp_ctx->u.intercept.feature_count;
1420 uint64_t *
events_id = rocp_ctx->u.intercept.events_id;
1434 return stop -
start;
1441typedef struct cb_context_node {
1484 SUBDBG(
"[ROCP intercept mode] Can only monitor one set of events "
1485 "per application run.");
1544 SUBDBG(
"[ROCP intercept mode] Cannot start eventset, not opened.");
1550 SUBDBG(
"[ROCP intercept mode] Cannot start eventset, already running.");
1579 SUBDBG(
"[ROCP intercept mode] Cannot stop eventset, not opened.");
1585 SUBDBG(
"[ROCP intercept mode] Cannot stop eventset, not running.");
1597 rocp_ctx->u.intercept.state &= ~ROCM_EVENTS_RUNNING;
1616 if (dispatch_count == 0) {
1617 *counts = rocp_ctx->u.intercept.counters;
1630 for (
i = 0;
i < devs_count; ++
i) {
1632 papi_errno =
rocc_dev_get_id(rocp_ctx->u.intercept.device_map,
i, &dev_id);
1637 while (dispatch_count > 0) {
1649 if (dispatch_count > 0) {
1650 SUBDBG(
"[ROCP intercept mode] User monitoring GPU i but running on j.");
1653 *counts = rocp_ctx->u.intercept.counters;
1665 for (
i = 0;
i < rocp_ctx->u.intercept.feature_count; ++
i) {
1666 rocp_ctx->u.intercept.counters[
i] = 0;
1697 (*hsa_shut_down_p)();
1758 long long *counters = NULL;
1768 int num_unique_events = 0;
1802 if (counters == NULL) {
1808 if (*rocp_ctx == NULL) {
1812 (*rocp_ctx)->u.intercept.events_id =
events_id;
1813 (*rocp_ctx)->u.intercept.counters = counters;
1814 (*rocp_ctx)->u.intercept.dispatch_count = 0;
1815 (*rocp_ctx)->u.intercept.device_map = bitmap;
1816 (*rocp_ctx)->u.intercept.feature_count =
num_events;
1847 void *count_table, *p;
1868 *num_unique =
count;
1880 void *count_table, *p;
1884 for (
i = 0, j = 0;
i < source_len; ++
i) {
1897 target[j++] = source[
i];
1928 if (*rocp_ctx == NULL) {
1932 if ((*rocp_ctx)->u.intercept.counters) {
1933 papi_free((*rocp_ctx)->u.intercept.counters);
1948 rocp_ctx_t *rocp_ctx)
1990static hsa_status_t
dispatch_cb(
const rocprofiler_callback_data_t *,
void *, rocprofiler_group_t *);
1998 if (context_arg == NULL) {
2006 rocprofiler_pool_properties_t properties;
2007 properties.num_entries = 128;
2010 properties.handler_arg = context_arg;
2025 rocprofiler_pool_t *pool = NULL;
2034 rocprofiler_queue_callbacks_t dispatch_ptrs = { 0 };
2059 sprintf(
key,
"%lu", tid);
2081 sprintf(
key,
"%lu", tid);
2114dispatch_cb(
const rocprofiler_callback_data_t *callback_data,
void *arg, rocprofiler_group_t *group)
2116 hsa_agent_t agent = callback_data->agent;
2117 hsa_status_t status = HSA_STATUS_SUCCESS;
2123 rocprofiler_pool_t *pool = dispatch_arg->
pools[dev_id];
2124 rocprofiler_pool_entry_t pool_entry;
2126 if (rocp_errno != HSA_STATUS_SUCCESS) {
2127 status = rocp_errno;
2131 rocprofiler_t *context = pool_entry.context;
2135 if (rocp_errno != HSA_STATUS_SUCCESS) {
2136 status = rocp_errno;
2143 payload->
agent = agent;
2144 payload->
group = *group;
2145 payload->
data = *callback_data;
2148 payload->
valid =
true;
2171 if (payload->
valid ==
false) {
2173 goto fn_check_again;
2232 const rocprofiler_feature_t *
f = &
features[
i];
2233 switch(
f->data.kind) {
2234 case ROCPROFILER_DATA_KIND_INT32:
2237 case ROCPROFILER_DATA_KIND_INT64:
2240 case ROCPROFILER_DATA_KIND_FLOAT:
2241 n->
counters[
i] = (
long long)
f->data.result_float;
2243 case ROCPROFILER_DATA_KIND_DOUBLE:
2247 SUBDBG(
"Unsupported data kind from rocprofiler");
2270 sprintf(
key,
"%lu", tid);
2277 return ++(*counter_p);
2286 sprintf(
key,
"%lu", tid);
2293 return (*counter_p);
2307 if (curr->
tid == tid) {
2332 sprintf(
key,
"%lu", tid);
2339 return --(*counter_p);
2346 uint64_t *
events_id = rocp_ctx->u.intercept.events_id;
2356 for (j = 0; j < rocp_ctx->u.intercept.feature_count; ++j) {
2358 rocp_ctx->u.intercept.counters[j] += n->
counters[
i];
2375OnLoadToolProp(rocprofiler_settings_t *settings
__attribute__((unused)))
Returns a string describing the PAPI error code.
static int htable_insert(void *handle, const char *key, void *in)
static int htable_delete(void *handle, const char *key)
static int htable_shutdown(void *handle)
static int htable_find(void *handle, const char *key, void **out)
static int htable_init(void **handle)
#define PAPI_NTV_ENUM_UMASKS
char events[MAX_EVENTS][BUFSIZ]
static struct timeval start
static nvmlDevice_t * devices
unsigned long AO_t __attribute__((__aligned__(4)))
#define SUBDBG(format, args...)
#define papi_calloc(a, b)
#define papi_realloc(a, b)
int rocc_dev_check(rocc_bitmap_t bitmap, int i)
int rocc_dev_get_map(rocc_dev_get_map_cb query_dev_id, uint64_t *events_id, int num_events, rocc_bitmap_t *bitmap)
hsa_status_t(* hsa_status_string_p)(hsa_status_t, const char **)
hsa_status_t(* hsa_queue_destroy_p)(hsa_queue_t *)
char error_string[PAPI_MAX_STR_LEN]
int rocc_dev_acquire(rocc_bitmap_t bitmap)
int rocc_dev_get_count(rocc_bitmap_t bitmap, int *num_devices)
device_table_t * device_table_p
int rocc_dev_get_agent_id(hsa_agent_t agent, int *dev_id)
int rocc_dev_get_id(rocc_bitmap_t bitmap, int dev_count, int *device_id)
int rocc_dev_set(rocc_bitmap_t *bitmap, int i)
int rocc_thread_get_id(unsigned long *tid)
int rocc_dev_release(rocc_bitmap_t bitmap)
#define PAPI_ROCM_MAX_DEV_COUNT
static int evt_id_create(event_info_t *info, uint64_t *event_id)
static int sampling_ctx_open(uint64_t *, int, rocp_ctx_t *)
static int count_unique_events(uint64_t *events_id, int num_events, int *num_unique)
int rocp_evt_code_to_descr(uint64_t event_code, char *descr, int len)
static hsa_status_t(* rocp_open_p)(hsa_agent_t, rocprofiler_feature_t *, uint32_t, rocprofiler_t **, uint32_t, rocprofiler_properties_t *)
static int ctx_get_dev_feature_count(rocp_ctx_t, int)
static cb_context_node_t * cb_ctx_list_heads[PAPI_ROCM_MAX_DEV_COUNT]
static cb_dispatch_arg_t cb_dispatch_arg
static int intercept_ctx_close(rocp_ctx_t)
static void process_context_entry(cb_context_payload_t *, rocprofiler_feature_t *, int)
static hsa_status_t(* rocp_pool_fetch_p)(rocprofiler_pool_t *, rocprofiler_pool_entry_t *)
static hsa_status_t(* rocp_start_p)(rocprofiler_t *, uint32_t)
int rocp_ctx_open(uint64_t *events_id, int num_events, rocp_ctx_t *rocp_ctx)
static ntv_event_table_t ntv_table
static int intercept_ctx_init(uint64_t *, int, rocp_ctx_t *)
static hsa_status_t(* rocp_get_metrics_p)(const rocprofiler_t *)
static int ctx_open(rocp_ctx_t)
static int ctx_close(rocp_ctx_t)
int rocp_ctx_start(rocp_ctx_t rocp_ctx)
static hsa_status_t(* rocp_stop_p)(rocprofiler_t *, uint32_t)
static int load_rocp_sym(void)
static int verify_events(uint64_t *, int)
static int save_callback_features(rocprofiler_feature_t *features, int feature_count)
static hsa_status_t(* rocp_reset_p)(rocprofiler_t *, uint32_t)
static int unregister_dispatch_counter(unsigned long)
static int intercept_ctx_get_dev_feature_count(rocp_ctx_t, int)
rocprofiler_feature_t * features
static int evt_name_to_basename(const char *name, char *base, int len)
static int ctx_finalize(rocp_ctx_t *)
static hsa_status_t(* rocp_group_count_p)(const rocprofiler_t *, uint32_t *)
int rocp_ctx_close(rocp_ctx_t rocp_ctx)
static int evt_code_to_name(uint64_t event_code, char *name, int len)
static hsa_status_t(* rocp_start_queue_cbs_p)(void)
static int intercept_ctx_start(rocp_ctx_t)
static hsa_status_t(* rocp_pool_open_p)(hsa_agent_t, rocprofiler_feature_t *, uint32_t, rocprofiler_pool_t **, uint32_t, rocprofiler_pool_properties_t *)
static hsa_status_t(* rocp_stop_queue_cbs_p)(void)
static int cleanup_callback_features(rocprofiler_feature_t *features, int feature_count)
static int init_callbacks(rocprofiler_feature_t *, int)
int rocp_evt_enum(uint64_t *event_code, int modifier)
static hsa_status_t(* rocp_group_get_data_p)(rocprofiler_group_t *)
static int sampling_ctx_close(rocp_ctx_t)
static int increment_and_fetch_dispatch_counter(unsigned long)
static hsa_status_t get_ntv_events_cb(const rocprofiler_info_data_t, void *)
static int sampling_ctx_finalize(rocp_ctx_t *)
static int unload_rocp_sym(void)
static bool context_handler_cb(const rocprofiler_pool_entry_t *, void *)
static int intercept_ctx_stop(rocp_ctx_t)
static int evt_name_to_instance(const char *name, int *instance)
static cb_context_node_t * alloc_context_node(int)
static int intercept_ctx_open(uint64_t *, int, rocp_ctx_t *)
static struct @2 intercept_global_state
static hsa_status_t(* rocp_iterate_info_p)(const hsa_agent_t *, rocprofiler_info_kind_t, hsa_status_t(*)(const rocprofiler_info_data_t, void *), void *)
static void free_context_node(cb_context_node_t *)
static ntv_event_table_t * ntv_table_p
static hsa_status_t(* rocp_error_string_p)(const char **)
static hsa_status_t(* rocp_get_group_p)(rocprofiler_t *, uint32_t, rocprofiler_group_t *)
static hsa_status_t(* rocp_get_info_p)(const hsa_agent_t *, rocprofiler_info_kind_t, void *)
static int get_context_counters(int, cb_context_node_t *, rocp_ctx_t)
static int get_context_node(int, cb_context_node_t **)
static int evt_id_to_info(uint64_t event_id, event_info_t *info)
static hsa_status_t(* rocp_get_data_p)(rocprofiler_t *, uint32_t)
static int intercept_shutdown(void)
static int fetch_dispatch_counter(unsigned long)
static int intercept_ctx_reset(rocp_ctx_t)
int rocp_evt_code_to_name(uint64_t event_code, char *name, int len)
int rocp_evt_name_to_code(const char *name, uint64_t *event_code)
static int finalize_features(rocprofiler_feature_t *, int)
static int intercept_ctx_read(rocp_ctx_t, long long **)
static hsa_status_t count_ntv_events_cb(const rocprofiler_info_data_t, void *)
static hsa_status_t(* rocp_remove_queue_cbs_p)(void)
static int register_dispatch_counter(unsigned long, int *)
static hsa_status_t dispatch_cb(const rocprofiler_callback_data_t *, void *, rocprofiler_group_t *)
static int sampling_ctx_read(rocp_ctx_t, long long **)
static int sampling_ctx_reset(rocp_ctx_t)
static hsa_status_t(* rocp_pool_flush_p)(rocprofiler_pool_t *)
static int copy_unique_events(uint64_t *target, uint64_t *source, int source_len)
static hsa_status_t(* rocp_pool_close_p)(rocprofiler_pool_t *)
static int init_rocp_env(void)
static int intercept_ctx_finalize(rocp_ctx_t *)
static int sampling_ctx_stop(rocp_ctx_t)
static hsa_status_t(* rocp_read_p)(rocprofiler_t *, uint32_t)
int rocp_ctx_stop(rocp_ctx_t rocp_ctx)
static int decrement_and_fetch_dispatch_counter(unsigned long)
int rocp_ctx_reset(rocp_ctx_t rocp_ctx)
static hsa_status_t(* rocp_close_p)(rocprofiler_t *)
static void * htable_intercept
int rocp_init_environment(void)
static int init_event_table(void)
static hsa_status_t(* rocp_set_queue_cbs_p)(rocprofiler_queue_callbacks_t, void *)
static int sampling_shutdown(void)
static void put_context_counters(rocprofiler_feature_t *, int, cb_context_node_t *)
static int ctx_init(uint64_t *, int, rocp_ctx_t *)
static void put_context_node(int, cb_context_node_t *)
static int shutdown_event_table(void)
static int sampling_ctx_get_dev_feature_count(rocp_ctx_t, int)
static int evt_name_to_device(const char *name, int *device)
static int sampling_ctx_init(uint64_t *, int, rocp_ctx_t *)
int rocp_evt_code_to_info(uint64_t event_code, PAPI_event_info_t *info)
static int init_features(uint64_t *, int, rocprofiler_feature_t *)
int rocp_ctx_read(rocp_ctx_t rocp_ctx, long long **counts)
static int sampling_ctx_start(rocp_ctx_t)
unsigned int rocm_prof_mode
static int event_id_to_dev_id_cb(uint64_t event_id, int *device)
#define ROCM_PROFILE_SAMPLING_MODE
#define ROCM_EVENTS_RUNNING
#define ROCM_EVENTS_OPENED
char symbol[PAPI_HUGE_STR_LEN]
char long_descr[PAPI_HUGE_STR_LEN]
rocprofiler_feature_t * features
struct cb_context_node * next
rocprofiler_callback_data_t data
rocprofiler_group_t group
rocprofiler_pool_t * pools[PAPI_ROCM_MAX_DEV_COUNT]
hsa_agent_t devices[PAPI_ROCM_MAX_DEV_COUNT]
struct rocd_ctx::@3::@4 intercept
rocprofiler_feature_t * features
rocprofiler_properties_t * ctx_prop
struct rocd_ctx::@3::@5 sampling
rocprofiler_t ** contexts
inline_static int _papi_hwi_lock(int lck)
inline_static int _papi_hwi_unlock(int lck)