PAPI 7.1.0.0
Loading...
Searching...
No Matches
roc_profiler.c File Reference
Include dependency graph for roc_profiler.c:

Go to the source code of this file.

Data Structures

struct  ntv_event_t
 
struct  ntv_event_table_t
 
struct  rocp_ctx_t
 
struct  event_info_t
 
struct  ntv_arg
 
struct  cb_context_node_t
 
struct  cb_dispatch_arg_t
 
struct  cb_context_arg_t
 
struct  cb_context_payload_t
 

Macros

#define EVENTS_WIDTH   (sizeof(uint64_t) * 8)
 
#define DEVICE_WIDTH   ( 7)
 
#define INSTAN_WIDTH   ( 7)
 
#define QLMASK_WIDTH   ( 2)
 
#define NAMEID_WIDTH   (12)
 
#define UNUSED_WIDTH   (EVENTS_WIDTH - DEVICE_WIDTH - INSTAN_WIDTH - QLMASK_WIDTH - NAMEID_WIDTH)
 
#define DEVICE_SHIFT   (EVENTS_WIDTH - UNUSED_WIDTH - DEVICE_WIDTH)
 
#define INSTAN_SHIFT   (DEVICE_SHIFT - INSTAN_WIDTH)
 
#define QLMASK_SHIFT   (INSTAN_SHIFT - QLMASK_WIDTH)
 
#define NAMEID_SHIFT   (QLMASK_SHIFT - NAMEID_WIDTH)
 
#define DEVICE_MASK   ((0xFFFFFFFFFFFFFFFF >> (EVENTS_WIDTH - DEVICE_WIDTH)) << DEVICE_SHIFT)
 
#define INSTAN_MASK   ((0xFFFFFFFFFFFFFFFF >> (EVENTS_WIDTH - INSTAN_WIDTH)) << INSTAN_SHIFT)
 
#define QLMASK_MASK   ((0xFFFFFFFFFFFFFFFF >> (EVENTS_WIDTH - QLMASK_WIDTH)) << QLMASK_SHIFT)
 
#define NAMEID_MASK   ((0xFFFFFFFFFFFFFFFF >> (EVENTS_WIDTH - NAMEID_WIDTH)) << NAMEID_SHIFT)
 
#define DEVICE_FLAG   (0x2)
 
#define INSTAN_FLAG   (0x1)
 

Functions

static int load_rocp_sym (void)
 
static int init_rocp_env (void)
 
static int init_event_table (void)
 
static int unload_rocp_sym (void)
 
static int sampling_ctx_open (uint64_t *, int, rocp_ctx_t *)
 
static int intercept_ctx_open (uint64_t *, int, rocp_ctx_t *)
 
static int sampling_ctx_close (rocp_ctx_t)
 
static int intercept_ctx_close (rocp_ctx_t)
 
static int sampling_ctx_start (rocp_ctx_t)
 
static int intercept_ctx_start (rocp_ctx_t)
 
static int sampling_ctx_stop (rocp_ctx_t)
 
static int intercept_ctx_stop (rocp_ctx_t)
 
static int sampling_ctx_read (rocp_ctx_t, long long **)
 
static int intercept_ctx_read (rocp_ctx_t, long long **)
 
static int sampling_ctx_reset (rocp_ctx_t)
 
static int intercept_ctx_reset (rocp_ctx_t)
 
static int sampling_shutdown (void)
 
static int intercept_shutdown (void)
 
static int evt_code_to_name (uint64_t event_code, char *name, int len)
 
static int evt_id_create (event_info_t *info, uint64_t *event_id)
 
static int evt_id_to_info (uint64_t event_id, event_info_t *info)
 
static int evt_name_to_device (const char *name, int *device)
 
static int evt_name_to_instance (const char *name, int *instance)
 
static int evt_name_to_basename (const char *name, char *base, int len)
 
int rocp_init_environment (void)
 
int rocp_init (void)
 
int rocp_evt_enum (uint64_t *event_code, int modifier)
 
int rocp_evt_code_to_descr (uint64_t event_code, char *descr, int len)
 
int rocp_evt_name_to_code (const char *name, uint64_t *event_code)
 
int rocp_evt_code_to_name (uint64_t event_code, char *name, int len)
 
int rocp_evt_code_to_info (uint64_t event_code, PAPI_event_info_t *info)
 
int rocp_ctx_open (uint64_t *events_id, int num_events, rocp_ctx_t *rocp_ctx)
 
int rocp_ctx_close (rocp_ctx_t rocp_ctx)
 
int rocp_ctx_start (rocp_ctx_t rocp_ctx)
 
int rocp_ctx_stop (rocp_ctx_t rocp_ctx)
 
int rocp_ctx_read (rocp_ctx_t rocp_ctx, long long **counts)
 
int rocp_ctx_reset (rocp_ctx_t rocp_ctx)
 
int rocp_shutdown (void)
 
static hsa_status_t count_ntv_events_cb (const rocprofiler_info_data_t, void *)
 
static hsa_status_t get_ntv_events_cb (const rocprofiler_info_data_t, void *)
 
hsa_status_t count_ntv_events_cb (const rocprofiler_info_data_t info __attribute__((unused)), void *count)
 
static int init_features (uint64_t *, int, rocprofiler_feature_t *)
 
static int finalize_features (rocprofiler_feature_t *, int)
 
static int sampling_ctx_init (uint64_t *, int, rocp_ctx_t *)
 
static int sampling_ctx_finalize (rocp_ctx_t *)
 
static int ctx_open (rocp_ctx_t)
 
static int ctx_close (rocp_ctx_t)
 
static int ctx_init (uint64_t *, int, rocp_ctx_t *)
 
static int ctx_finalize (rocp_ctx_t *)
 
static int ctx_get_dev_feature_count (rocp_ctx_t, int)
 
static int shutdown_event_table (void)
 
static int event_id_to_dev_id_cb (uint64_t event_id, int *device)
 
static int sampling_ctx_get_dev_feature_count (rocp_ctx_t, int)
 
static int intercept_ctx_get_dev_feature_count (rocp_ctx_t, int)
 
static int verify_events (uint64_t *, int)
 
static int init_callbacks (rocprofiler_feature_t *, int)
 
static int register_dispatch_counter (unsigned long, int *)
 
static int increment_and_fetch_dispatch_counter (unsigned long)
 
static int decrement_and_fetch_dispatch_counter (unsigned long)
 
static int unregister_dispatch_counter (unsigned long)
 
static int fetch_dispatch_counter (unsigned long)
 
static cb_context_node_talloc_context_node (int)
 
static void free_context_node (cb_context_node_t *)
 
static int get_context_node (int, cb_context_node_t **)
 
static int get_context_counters (int, cb_context_node_t *, rocp_ctx_t)
 
static void put_context_counters (rocprofiler_feature_t *, int, cb_context_node_t *)
 
static void put_context_node (int, cb_context_node_t *)
 
static int intercept_ctx_init (uint64_t *, int, rocp_ctx_t *)
 
static int intercept_ctx_finalize (rocp_ctx_t *)
 
static int count_unique_events (uint64_t *events_id, int num_events, int *num_unique)
 
static int copy_unique_events (uint64_t *target, uint64_t *source, int source_len)
 
static int save_callback_features (rocprofiler_feature_t *features, int feature_count)
 
static int cleanup_callback_features (rocprofiler_feature_t *features, int feature_count)
 
static bool context_handler_cb (const rocprofiler_pool_entry_t *, void *)
 
static hsa_status_t dispatch_cb (const rocprofiler_callback_data_t *, void *, rocprofiler_group_t *)
 
static void process_context_entry (cb_context_payload_t *, rocprofiler_feature_t *, int)
 
void __attribute__ ((visibility("default")))
 

Variables

unsigned int rocm_prof_mode
 
unsigned int _rocm_lock
 
static hsa_status_t(* rocp_get_info_p )(const hsa_agent_t *, rocprofiler_info_kind_t, void *)
 
static hsa_status_t(* rocp_iterate_info_p )(const hsa_agent_t *, rocprofiler_info_kind_t, hsa_status_t(*)(const rocprofiler_info_data_t, void *), void *)
 
static hsa_status_t(* rocp_error_string_p )(const char **)
 
static hsa_status_t(* rocp_open_p )(hsa_agent_t, rocprofiler_feature_t *, uint32_t, rocprofiler_t **, uint32_t, rocprofiler_properties_t *)
 
static hsa_status_t(* rocp_close_p )(rocprofiler_t *)
 
static hsa_status_t(* rocp_group_count_p )(const rocprofiler_t *, uint32_t *)
 
static hsa_status_t(* rocp_start_p )(rocprofiler_t *, uint32_t)
 
static hsa_status_t(* rocp_read_p )(rocprofiler_t *, uint32_t)
 
static hsa_status_t(* rocp_stop_p )(rocprofiler_t *, uint32_t)
 
static hsa_status_t(* rocp_get_group_p )(rocprofiler_t *, uint32_t, rocprofiler_group_t *)
 
static hsa_status_t(* rocp_get_data_p )(rocprofiler_t *, uint32_t)
 
static hsa_status_t(* rocp_group_get_data_p )(rocprofiler_group_t *)
 
static hsa_status_t(* rocp_get_metrics_p )(const rocprofiler_t *)
 
static hsa_status_t(* rocp_reset_p )(rocprofiler_t *, uint32_t)
 
static hsa_status_t(* rocp_pool_open_p )(hsa_agent_t, rocprofiler_feature_t *, uint32_t, rocprofiler_pool_t **, uint32_t, rocprofiler_pool_properties_t *)
 
static hsa_status_t(* rocp_pool_close_p )(rocprofiler_pool_t *)
 
static hsa_status_t(* rocp_pool_fetch_p )(rocprofiler_pool_t *, rocprofiler_pool_entry_t *)
 
static hsa_status_t(* rocp_pool_flush_p )(rocprofiler_pool_t *)
 
static hsa_status_t(* rocp_set_queue_cbs_p )(rocprofiler_queue_callbacks_t, void *)
 
static hsa_status_t(* rocp_start_queue_cbs_p )(void)
 
static hsa_status_t(* rocp_stop_queue_cbs_p )(void)
 
static hsa_status_t(* rocp_remove_queue_cbs_p )(void)
 
static void * rocp_dlp = NULL
 
static ntv_event_table_t ntv_table
 
static ntv_event_table_tntv_table_p
 
static void * htable
 
static void * htable_intercept
 
struct {
   uint64_t *   events_id
 
   rocprofiler_feature_t *   features
 
   int   feature_count
 
   int   active_thread_count
 
   int   kernel_count
 
intercept_global_state
 
static cb_dispatch_arg_t cb_dispatch_arg
 
static cb_context_node_tcb_ctx_list_heads [PAPI_ROCM_MAX_DEV_COUNT]
 

Detailed Description

Author
Giuseppe Congiu gcong.nosp@m.iu@i.nosp@m.cl.ut.nosp@m.k.ed.nosp@m.u

Definition in file roc_profiler.c.

Macro Definition Documentation

◆ DEVICE_FLAG

#define DEVICE_FLAG   (0x2)

Definition at line 39 of file roc_profiler.c.

◆ DEVICE_MASK

#define DEVICE_MASK   ((0xFFFFFFFFFFFFFFFF >> (EVENTS_WIDTH - DEVICE_WIDTH)) << DEVICE_SHIFT)

Definition at line 35 of file roc_profiler.c.

◆ DEVICE_SHIFT

#define DEVICE_SHIFT   (EVENTS_WIDTH - UNUSED_WIDTH - DEVICE_WIDTH)

Definition at line 31 of file roc_profiler.c.

◆ DEVICE_WIDTH

#define DEVICE_WIDTH   ( 7)

Definition at line 26 of file roc_profiler.c.

◆ EVENTS_WIDTH

#define EVENTS_WIDTH   (sizeof(uint64_t) * 8)

Event identifier encoding format: +------------------------------—+----—+----—+–+---------—+ | unused | dev | inst | | nameid | +------------------------------—+----—+----—+–+---------—+

unused : 36 bits device : 7 bits ([0 - 127] devices) instance : 7 bits ([0 - 127] instances) qlmask : 2 bits (qualifier mask) nameid : 12 bits ([0 - 4095] event names)

Definition at line 25 of file roc_profiler.c.

◆ INSTAN_FLAG

#define INSTAN_FLAG   (0x1)

Definition at line 40 of file roc_profiler.c.

◆ INSTAN_MASK

#define INSTAN_MASK   ((0xFFFFFFFFFFFFFFFF >> (EVENTS_WIDTH - INSTAN_WIDTH)) << INSTAN_SHIFT)

Definition at line 36 of file roc_profiler.c.

◆ INSTAN_SHIFT

#define INSTAN_SHIFT   (DEVICE_SHIFT - INSTAN_WIDTH)

Definition at line 32 of file roc_profiler.c.

◆ INSTAN_WIDTH

#define INSTAN_WIDTH   ( 7)

Definition at line 27 of file roc_profiler.c.

◆ NAMEID_MASK

#define NAMEID_MASK   ((0xFFFFFFFFFFFFFFFF >> (EVENTS_WIDTH - NAMEID_WIDTH)) << NAMEID_SHIFT)

Definition at line 38 of file roc_profiler.c.

◆ NAMEID_SHIFT

#define NAMEID_SHIFT   (QLMASK_SHIFT - NAMEID_WIDTH)

Definition at line 34 of file roc_profiler.c.

◆ NAMEID_WIDTH

#define NAMEID_WIDTH   (12)

Definition at line 29 of file roc_profiler.c.

◆ QLMASK_MASK

#define QLMASK_MASK   ((0xFFFFFFFFFFFFFFFF >> (EVENTS_WIDTH - QLMASK_WIDTH)) << QLMASK_SHIFT)

Definition at line 37 of file roc_profiler.c.

◆ QLMASK_SHIFT

#define QLMASK_SHIFT   (INSTAN_SHIFT - QLMASK_WIDTH)

Definition at line 33 of file roc_profiler.c.

◆ QLMASK_WIDTH

#define QLMASK_WIDTH   ( 2)

Definition at line 28 of file roc_profiler.c.

◆ UNUSED_WIDTH

#define UNUSED_WIDTH   (EVENTS_WIDTH - DEVICE_WIDTH - INSTAN_WIDTH - QLMASK_WIDTH - NAMEID_WIDTH)

Definition at line 30 of file roc_profiler.c.

Function Documentation

◆ __attribute__()

void __attribute__ ( (visibility("default"))  )

Definition at line 2374 of file roc_profiler.c.

2376{
2377 init_rocp_env();
2378}
static int init_rocp_env(void)
Definition: roc_profiler.c:580
Here is the call graph for this function:

◆ alloc_context_node()

cb_context_node_t * alloc_context_node ( int  num_events)
static

Definition at line 2211 of file roc_profiler.c.

2212{
2213 cb_context_node_t *n = papi_malloc(sizeof(*n));
2214 if (n == NULL) {
2215 return NULL;
2216 }
2217
2218 n->counters = papi_malloc(num_events * sizeof(long long));
2219 if (n->counters == NULL) {
2220 papi_free(n);
2221 return NULL;
2222 }
2223
2224 return n;
2225}
static int num_events
#define papi_free(a)
Definition: papi_memory.h:35
#define papi_malloc(a)
Definition: papi_memory.h:34
long long * counters
Here is the caller graph for this function:

◆ cleanup_callback_features()

int cleanup_callback_features ( rocprofiler_feature_t *  features,
int  feature_count 
)
static

Definition at line 1916 of file roc_profiler.c.

1917{
1918 int i;
1919 for (i = 0; i < feature_count; ++i) {
1921 }
1922 return PAPI_OK;
1923}
int i
static int htable_delete(void *handle, const char *key)
Definition: cuda/htable.h:130
#define PAPI_OK
Definition: f90papi.h:73
rocprofiler_feature_t * features
int feature_count
static void * htable_intercept
Definition: roc_profiler.c:148
const char * name
Definition: rocs.c:225
Here is the call graph for this function:
Here is the caller graph for this function:

◆ context_handler_cb()

bool context_handler_cb ( const rocprofiler_pool_entry_t *  entry,
void *  arg 
)
static

Definition at line 2156 of file roc_profiler.c.

2157{
2158 cb_context_payload_t *payload = (cb_context_payload_t *) entry->payload;
2159 cb_context_arg_t *context_arg = (cb_context_arg_t *) arg;
2160
2161 process_context_entry(payload, context_arg->features, context_arg->feature_count);
2162
2163 return false;
2164}
static void process_context_entry(cb_context_payload_t *, rocprofiler_feature_t *, int)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ copy_unique_events()

int copy_unique_events ( uint64_t *  target,
uint64_t *  source,
int  source_len 
)
static

Definition at line 1875 of file roc_profiler.c.

1876{
1877 int papi_errno = PAPI_OK;
1878 char name[PAPI_MAX_STR_LEN] = { 0 };
1879 int i, j;
1880 void *count_table, *p;
1881
1882 htable_init(&count_table);
1883
1884 for (i = 0, j = 0; i < source_len; ++i) {
1885 event_info_t info;
1886 papi_errno = evt_id_to_info(source[i], &info);
1887 if (papi_errno) {
1888 return papi_errno;
1889 }
1890 if (ntv_table_p->events[info.nameid].instances > 1) {
1891 sprintf(name, "%s[%i]", ntv_table_p->events[info.nameid].name, info.instance);
1892 } else {
1893 sprintf(name, "%s", ntv_table_p->events[info.nameid].name);
1894 }
1895 if (htable_find(count_table, name, &p) != HTABLE_SUCCESS) {
1896 htable_insert(count_table, name, NULL);
1897 target[j++] = source[i];
1898 }
1899 }
1900
1901 htable_shutdown(count_table);
1902 return papi_errno;
1903}
static int htable_insert(void *handle, const char *key, void *in)
Definition: cuda/htable.h:92
static int htable_shutdown(void *handle)
Definition: cuda/htable.h:76
#define HTABLE_SUCCESS
Definition: cuda/htable.h:17
static int htable_find(void *handle, const char *key, void **out)
Definition: cuda/htable.h:161
static int htable_init(void **handle)
Definition: cuda/htable.h:55
#define PAPI_MAX_STR_LEN
Definition: f90papi.h:77
static ntv_event_table_t * ntv_table_p
Definition: roc_profiler.c:146
static int evt_id_to_info(uint64_t event_id, event_info_t *info)
Definition: roc_profiler.c:774
char * name
Definition: roc_profiler.c:43
ntv_event_t * events
Definition: roc_profiler.c:50
Here is the call graph for this function:
Here is the caller graph for this function:

◆ count_ntv_events_cb() [1/2]

hsa_status_t count_ntv_events_cb ( const rocprofiler_info_data_t info   __attribute__(unused),
void *  count 
)

init_event_table utility functions

Definition at line 873 of file roc_profiler.c.

874{
875 (*(int *) count) += 1;
876 return HSA_STATUS_SUCCESS;
877}
static long count

◆ count_ntv_events_cb() [2/2]

static hsa_status_t count_ntv_events_cb ( const  rocprofiler_info_data_t,
void *   
)
static
Here is the caller graph for this function:

◆ count_unique_events()

int count_unique_events ( uint64_t *  events_id,
int  num_events,
int num_unique 
)
static

Definition at line 1841 of file roc_profiler.c.

1842{
1843 int papi_errno = PAPI_OK;
1844 char name[PAPI_MAX_STR_LEN] = { 0 };
1845 int i;
1846 int count = 0;
1847 void *count_table, *p;
1848
1849 htable_init(&count_table);
1850
1851 for (i = 0; i < num_events; ++i) {
1852 event_info_t info;
1853 papi_errno = evt_id_to_info(events_id[i], &info);
1854 if (papi_errno != PAPI_OK) {
1855 return papi_errno;
1856 }
1857 if (ntv_table_p->events[info.nameid].instances > 1) {
1858 sprintf(name, "%s[%i]", ntv_table_p->events[info.nameid].name, info.instance);
1859 } else {
1860 sprintf(name, "%s", ntv_table_p->events[info.nameid].name);
1861 }
1862 if (htable_find(count_table, name, &p) != HTABLE_SUCCESS) {
1863 htable_insert(count_table, name, NULL);
1864 ++count;
1865 }
1866 }
1867
1868 *num_unique = count;
1869
1870 htable_shutdown(count_table);
1871 return papi_errno;
1872}
uint64_t * events_id
Here is the call graph for this function:
Here is the caller graph for this function:

◆ ctx_close()

int ctx_close ( rocp_ctx_t  rocp_ctx)
static

Definition at line 1324 of file roc_profiler.c.

1325{
1326 int papi_errno;
1327 int i, devs_count;
1328 rocc_dev_get_count(rocp_ctx->u.sampling.device_map, &devs_count);
1329
1330 for (i = 0; i < devs_count; ++i) {
1331 if (rocp_close_p(rocp_ctx->u.sampling.contexts[i]) != HSA_STATUS_SUCCESS) {
1332 papi_errno = PAPI_EMISC;
1333 }
1334
1335 if (hsa_queue_destroy_p(rocp_ctx->u.sampling.ctx_prop[i].queue) != HSA_STATUS_SUCCESS) {
1336 papi_errno = PAPI_EMISC;
1337 }
1338 }
1339
1340 papi_errno = rocc_dev_release(rocp_ctx->u.sampling.device_map);
1341
1342 return papi_errno;
1343}
#define PAPI_EMISC
Definition: f90papi.h:122
hsa_status_t(* hsa_queue_destroy_p)(hsa_queue_t *)
Definition: roc_common.c:13
int rocc_dev_get_count(rocc_bitmap_t bitmap, int *num_devices)
Definition: roc_common.c:121
int rocc_dev_release(rocc_bitmap_t bitmap)
Definition: roc_common.c:106
static hsa_status_t(* rocp_close_p)(rocprofiler_t *)
Definition: roc_profiler.c:94
Here is the call graph for this function:
Here is the caller graph for this function:

◆ ctx_finalize()

int ctx_finalize ( rocp_ctx_t *  rocp_ctx)
static

Definition at line 1958 of file roc_profiler.c.

1959{
1961 return sampling_ctx_finalize(rocp_ctx);
1962 }
1963
1964 return intercept_ctx_finalize(rocp_ctx);
1965}
static int sampling_ctx_finalize(rocp_ctx_t *)
static int intercept_ctx_finalize(rocp_ctx_t *)
unsigned int rocm_prof_mode
Definition: roc_profiler.c:84
#define ROCM_PROFILE_SAMPLING_MODE
Here is the call graph for this function:
Here is the caller graph for this function:

◆ ctx_get_dev_feature_count()

int ctx_get_dev_feature_count ( rocp_ctx_t  rocp_ctx,
int  i 
)
static

Definition at line 1384 of file roc_profiler.c.

1385{
1387 return sampling_ctx_get_dev_feature_count(rocp_ctx, i);
1388 }
1389
1390 return intercept_ctx_get_dev_feature_count(rocp_ctx, i);
1391}
static int intercept_ctx_get_dev_feature_count(rocp_ctx_t, int)
static int sampling_ctx_get_dev_feature_count(rocp_ctx_t, int)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ ctx_init()

int ctx_init ( uint64_t *  events_id,
int  num_events,
rocp_ctx_t *  rocp_ctx 
)
static

Context init and finalize

Definition at line 1947 of file roc_profiler.c.

1949{
1951 return sampling_ctx_init(events_id, num_events, rocp_ctx);
1952 }
1953
1954 return intercept_ctx_init(events_id, num_events, rocp_ctx);
1955}
static int intercept_ctx_init(uint64_t *, int, rocp_ctx_t *)
static int sampling_ctx_init(uint64_t *, int, rocp_ctx_t *)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ ctx_open()

int ctx_open ( rocp_ctx_t  rocp_ctx)
static

Definition at line 1268 of file roc_profiler.c.

1269{
1270 int papi_errno = PAPI_OK;
1271 int i, j;
1272 rocprofiler_feature_t *features = rocp_ctx->u.sampling.features;
1273 int dev_feature_offset = 0;
1274 int dev_count;
1275 rocprofiler_t **contexts = rocp_ctx->u.sampling.contexts;
1276 rocprofiler_properties_t *ctx_prop = rocp_ctx->u.sampling.ctx_prop;
1277
1278 papi_errno = rocc_dev_get_count(rocp_ctx->u.sampling.device_map, &dev_count);
1279 if (papi_errno != PAPI_OK) {
1280 return papi_errno;
1281 }
1282
1283 for (i = 0; i < dev_count; ++i) {
1284 int dev_id;
1285 papi_errno = rocc_dev_get_id(rocp_ctx->u.sampling.device_map, i, &dev_id);
1286 if (papi_errno != PAPI_OK) {
1287 goto fn_fail;
1288 }
1289
1290 int dev_feature_count = ctx_get_dev_feature_count(rocp_ctx, dev_id);
1291 rocprofiler_feature_t *dev_features = features + dev_feature_offset;
1292
1293 const uint32_t mode =
1294 ROCPROFILER_MODE_STANDALONE | ROCPROFILER_MODE_CREATEQUEUE | ROCPROFILER_MODE_SINGLEGROUP;
1295
1296 ctx_prop[i].queue_depth = 128;
1297 hsa_status_t rocp_errno = rocp_open_p(device_table_p->devices[dev_id], dev_features,
1298 dev_feature_count, &contexts[i], mode,
1299 &ctx_prop[i]);
1300 if (rocp_errno != HSA_STATUS_SUCCESS) {
1301 papi_errno = PAPI_EMISC;
1302 goto fn_fail;
1303 }
1304
1305 dev_feature_offset += dev_feature_count;
1306 }
1307
1308 papi_errno = rocc_dev_acquire(rocp_ctx->u.sampling.device_map);
1309 if (papi_errno != PAPI_OK) {
1310 goto fn_fail;
1311 }
1312
1313 fn_exit:
1314 return papi_errno;
1315 fn_fail:
1316 for (j = 0; j < i; ++j) {
1317 rocp_close_p(contexts[j]);
1318 hsa_queue_destroy_p(ctx_prop[j].queue);
1319 }
1320 goto fn_exit;
1321}
int rocc_dev_acquire(rocc_bitmap_t bitmap)
Definition: roc_common.c:93
device_table_t * device_table_p
Definition: roc_common.c:19
int rocc_dev_get_id(rocc_bitmap_t bitmap, int dev_count, int *device_id)
Definition: roc_common.c:140
static hsa_status_t(* rocp_open_p)(hsa_agent_t, rocprofiler_feature_t *, uint32_t, rocprofiler_t **, uint32_t, rocprofiler_properties_t *)
Definition: roc_profiler.c:93
static int ctx_get_dev_feature_count(rocp_ctx_t, int)
hsa_agent_t devices[PAPI_ROCM_MAX_DEV_COUNT]
Definition: roc_common.h:23
Here is the call graph for this function:
Here is the caller graph for this function:

◆ decrement_and_fetch_dispatch_counter()

int decrement_and_fetch_dispatch_counter ( unsigned long  tid)
static

Definition at line 2327 of file roc_profiler.c.

2328{
2329 int htable_errno = HTABLE_SUCCESS;
2330 char key[PAPI_MIN_STR_LEN] = { 0 };
2331
2332 sprintf(key, "%lu", tid);
2333 int *counter_p;
2334 htable_errno = htable_find(htable, (const char *) key, (void **) &counter_p);
2335 if (htable_errno != HTABLE_SUCCESS) {
2336 return 0;
2337 }
2338
2339 return --(*counter_p);
2340}
#define PAPI_MIN_STR_LEN
Definition: f90papi.h:208
static pthread_key_t key
static void * htable
Definition: roc_profiler.c:147
Here is the call graph for this function:
Here is the caller graph for this function:

◆ dispatch_cb()

hsa_status_t dispatch_cb ( const rocprofiler_callback_data_t *  callback_data,
void *  arg,
rocprofiler_group_t *  group 
)
static

Definition at line 2114 of file roc_profiler.c.

2115{
2116 hsa_agent_t agent = callback_data->agent;
2117 hsa_status_t status = HSA_STATUS_SUCCESS;
2118
2119 int dev_id;
2120 rocc_dev_get_agent_id(agent, &dev_id);
2121
2122 cb_dispatch_arg_t *dispatch_arg = (cb_dispatch_arg_t *) arg;
2123 rocprofiler_pool_t *pool = dispatch_arg->pools[dev_id];
2124 rocprofiler_pool_entry_t pool_entry;
2125 hsa_status_t rocp_errno = rocp_pool_fetch_p(pool, &pool_entry);
2126 if (rocp_errno != HSA_STATUS_SUCCESS) {
2127 status = rocp_errno;
2128 goto fn_exit;
2129 }
2130
2131 rocprofiler_t *context = pool_entry.context;
2132 cb_context_payload_t *payload = (cb_context_payload_t *) pool_entry.payload;
2133
2134 rocp_errno = rocp_get_group_p(context, 0, group);
2135 if (rocp_errno != HSA_STATUS_SUCCESS) {
2136 status = rocp_errno;
2137 goto fn_exit;
2138 }
2139
2140 unsigned long tid;
2141 rocc_thread_get_id(&tid);
2142 payload->tid = tid;
2143 payload->agent = agent;
2144 payload->group = *group;
2145 payload->data = *callback_data;
2146
2148 payload->valid = true;
2150
2151 fn_exit:
2152 return status;
2153}
int rocc_dev_get_agent_id(hsa_agent_t agent, int *dev_id)
Definition: roc_common.c:169
int rocc_thread_get_id(unsigned long *tid)
Definition: roc_common.c:193
static hsa_status_t(* rocp_pool_fetch_p)(rocprofiler_pool_t *, rocprofiler_pool_entry_t *)
Definition: roc_profiler.c:108
static hsa_status_t(* rocp_get_group_p)(rocprofiler_t *, uint32_t, rocprofiler_group_t *)
Definition: roc_profiler.c:99
unsigned int _rocm_lock
Definition: roc_profiler.c:85
rocprofiler_callback_data_t data
rocprofiler_group_t group
rocprofiler_pool_t * pools[PAPI_ROCM_MAX_DEV_COUNT]
inline_static int _papi_hwi_lock(int lck)
Definition: threads.h:69
inline_static int _papi_hwi_unlock(int lck)
Definition: threads.h:83
Here is the call graph for this function:
Here is the caller graph for this function:

◆ event_id_to_dev_id_cb()

static int event_id_to_dev_id_cb ( uint64_t  event_id,
int device 
)
static

sampling_ctx_open utility functions

Definition at line 1145 of file roc_profiler.c.

1146{
1147 event_info_t info;
1148 int papi_errno = evt_id_to_info(event_id, &info);
1149 *device = info.device;
1150 return papi_errno;
1151}
Here is the call graph for this function:
Here is the caller graph for this function:

◆ evt_code_to_name()

int evt_code_to_name ( uint64_t  event_code,
char *  name,
int  len 
)
static

Definition at line 739 of file roc_profiler.c.

740{
741 int papi_errno;
742
743 event_info_t info;
744 papi_errno = evt_id_to_info(event_code, &info);
745 if (papi_errno != PAPI_OK) {
746 return papi_errno;
747 }
748
749 switch (info.flags) {
750 case (DEVICE_FLAG | INSTAN_FLAG):
751 snprintf(name, len, "%s:device=%i:instance=%i", ntv_table_p->events[info.nameid].name, info.device, info.instance);
752 break;
753 case (DEVICE_FLAG):
754 snprintf(name, len, "%s:device=%i", ntv_table_p->events[info.nameid].name, info.device);
755 break;
756 default:
757 snprintf(name, len, "%s", ntv_table_p->events[info.nameid].name);
758 }
759
760 return papi_errno;
761}
#define INSTAN_FLAG
Definition: roc_profiler.c:40
#define DEVICE_FLAG
Definition: roc_profiler.c:39
Here is the call graph for this function:
Here is the caller graph for this function:

◆ evt_id_create()

int evt_id_create ( event_info_t info,
uint64_t *  event_id 
)
static

Definition at line 764 of file roc_profiler.c.

765{
766 *event_id = (uint64_t)(info->device << DEVICE_SHIFT);
767 *event_id |= (uint64_t)(info->instance << INSTAN_SHIFT);
768 *event_id |= (uint64_t)(info->flags << QLMASK_SHIFT);
769 *event_id |= (uint64_t)(info->nameid << NAMEID_SHIFT);
770 return PAPI_OK;
771}
#define INSTAN_SHIFT
Definition: roc_profiler.c:32
#define NAMEID_SHIFT
Definition: roc_profiler.c:34
#define DEVICE_SHIFT
Definition: roc_profiler.c:31
#define QLMASK_SHIFT
Definition: roc_profiler.c:33
Here is the caller graph for this function:

◆ evt_id_to_info()

int evt_id_to_info ( uint64_t  event_id,
event_info_t info 
)
static

Definition at line 774 of file roc_profiler.c.

775{
776 info->device = (int)((event_id & DEVICE_MASK) >> DEVICE_SHIFT);
777 info->instance = (int)((event_id & INSTAN_MASK) >> INSTAN_SHIFT);
778 info->flags = (int)((event_id & QLMASK_MASK) >> QLMASK_SHIFT);
779 info->nameid = (int)((event_id & NAMEID_MASK) >> NAMEID_SHIFT);
780
781 if (info->device >= device_table_p->count) {
782 return PAPI_ENOEVNT;
783 }
784
785 if (0 == (info->flags & DEVICE_FLAG) && info->device > 0) {
786 return PAPI_ENOEVNT;
787 }
788
789 if (rocc_dev_check(ntv_table_p->events[info->nameid].device_map, info->device) == 0) {
790 return PAPI_ENOEVNT;
791 }
792
793 if (info->nameid >= ntv_table_p->count) {
794 return PAPI_ENOEVNT;
795 }
796
797 if (ntv_table_p->events[info->nameid].instances > 1 && 0 == (info->flags & INSTAN_FLAG) && info->instance > 0) {
798 return PAPI_ENOEVNT;
799 }
800
801 if (info->instance >= ntv_table_p->events[info->nameid].instances) {
802 return PAPI_ENOEVNT;
803 }
804
805 return PAPI_OK;
806}
#define PAPI_ENOEVNT
Definition: f90papi.h:139
int rocc_dev_check(rocc_bitmap_t bitmap, int i)
Definition: roc_common.c:187
#define QLMASK_MASK
Definition: roc_profiler.c:37
#define NAMEID_MASK
Definition: roc_profiler.c:38
#define DEVICE_MASK
Definition: roc_profiler.c:35
#define INSTAN_MASK
Definition: roc_profiler.c:36
int
Definition: sde_internal.h:89
rocc_bitmap_t device_map
Definition: roc_profiler.c:46
Here is the call graph for this function:
Here is the caller graph for this function:

◆ evt_name_to_basename()

int evt_name_to_basename ( const char *  name,
char *  base,
int  len 
)
static

Definition at line 851 of file roc_profiler.c.

852{
853 char *p = strstr(name, ":");
854 if (p) {
855 if (len < (int)(p - name)) {
856 return PAPI_EBUF;
857 }
858 strncpy(base, name, (size_t)(p - name));
859 } else {
860 if (len < (int) strlen(name)) {
861 return PAPI_EBUF;
862 }
863 strncpy(base, name, (size_t) len);
864 }
865 return PAPI_OK;
866}
#define PAPI_EBUF
Definition: f90papi.h:253
Here is the caller graph for this function:

◆ evt_name_to_device()

int evt_name_to_device ( const char *  name,
int device 
)
static

Definition at line 809 of file roc_profiler.c.

810{
811 char *p = strstr(name, ":device=");
812 if (!p) {
813 return PAPI_ENOEVNT;
814 }
815 *device = (int) strtol(p + strlen(":device="), NULL, 10);
816 return PAPI_OK;
817}
Here is the caller graph for this function:

◆ evt_name_to_instance()

int evt_name_to_instance ( const char *  name,
int instance 
)
static

Definition at line 820 of file roc_profiler.c.

821{
822 *instance = 0;
823
824 char basename[PAPI_MAX_STR_LEN] = { 0 };
825 int papi_errno = evt_name_to_basename(name, basename, PAPI_MAX_STR_LEN);
826 if (papi_errno != PAPI_OK) {
827 return papi_errno;
828 }
829
830 ntv_event_t *event;
831 if (htable_find(htable, basename, (void **) &event) != HTABLE_SUCCESS) {
832 return PAPI_ENOEVNT;
833 }
834
835 char *p = strstr(name, ":instance=");
836 if (event->instances > 1) {
837 if (!p) {
838 return PAPI_ENOEVNT;
839 }
840 *instance = (int) strtol(p + strlen(":instance="), NULL, 10);
841 } else {
842 if (p) {
843 return PAPI_ENOEVNT;
844 }
845 }
846
847 return PAPI_OK;
848}
static int evt_name_to_basename(const char *name, char *base, int len)
Definition: roc_profiler.c:851
Here is the call graph for this function:
Here is the caller graph for this function:

◆ fetch_dispatch_counter()

int fetch_dispatch_counter ( unsigned long  tid)
static

Definition at line 2281 of file roc_profiler.c.

2282{
2283 int htable_errno = HTABLE_SUCCESS;
2284 char key[PAPI_MIN_STR_LEN] = { 0 };
2285
2286 sprintf(key, "%lu", tid);
2287 int *counter_p;
2288 htable_errno = htable_find(htable, (const char *) key, (void **) &counter_p);
2289 if (htable_errno != HTABLE_SUCCESS) {
2290 return 0;
2291 }
2292
2293 return (*counter_p);
2294}
Here is the call graph for this function:
Here is the caller graph for this function:

◆ finalize_features()

int finalize_features ( rocprofiler_feature_t *  features,
int  feature_count 
)
static

Definition at line 1371 of file roc_profiler.c.

1372{
1373 int i;
1374 for (i = 0; i < feature_count; ++i) {
1375 papi_free((char *) features[i].name);
1376 }
1377 return PAPI_OK;
1378}
Here is the caller graph for this function:

◆ free_context_node()

void free_context_node ( cb_context_node_t n)
static

Definition at line 2368 of file roc_profiler.c.

2369{
2370 papi_free(n->counters);
2371 papi_free(n);
2372}
Here is the caller graph for this function:

◆ get_context_counters()

int get_context_counters ( int  dev_id,
cb_context_node_t n,
rocp_ctx_t  rocp_ctx 
)
static

Definition at line 2343 of file roc_profiler.c.

2344{
2345 int papi_errno = PAPI_OK;
2346 uint64_t *events_id = rocp_ctx->u.intercept.events_id;
2347
2348 /* Here we get events_id ordered according to user's viewpoint and we want
2349 * to map these to events_id ordered according to callbacks' viewpoint. We
2350 * compare events from the user and the callbacks using a brute force
2351 * approach as the number of events is typically small. */
2352 int i, j;
2353 for (i = 0; i < intercept_global_state.feature_count; ++i) {
2354 uint64_t event_id = intercept_global_state.events_id[i] | (dev_id << DEVICE_SHIFT);
2355
2356 for (j = 0; j < rocp_ctx->u.intercept.feature_count; ++j) {
2357 if (event_id == events_id[j]) {
2358 rocp_ctx->u.intercept.counters[j] += n->counters[i];
2359 break;
2360 }
2361 }
2362 }
2363
2364 return papi_errno;
2365}
static struct @2 intercept_global_state
Here is the caller graph for this function:

◆ get_context_node()

int get_context_node ( int  dev_id,
cb_context_node_t **  n 
)
static

Definition at line 2297 of file roc_profiler.c.

2298{
2299 cb_context_node_t *curr = cb_ctx_list_heads[dev_id];
2300 cb_context_node_t *flag = NULL;
2301 cb_context_node_t *prev = curr;
2302 cb_context_node_t *flag_prev;
2303
2304 while (curr) {
2305 unsigned long tid;
2306 rocc_thread_get_id(&tid);
2307 if (curr->tid == tid) {
2308 flag_prev = prev;
2309 flag = curr;
2310 }
2311 prev = curr;
2312 curr = curr->next;
2313 }
2314
2315 if (flag != NULL) {
2316 flag_prev->next = flag->next;
2317 if (cb_ctx_list_heads[dev_id] == flag) {
2318 cb_ctx_list_heads[dev_id] = NULL;
2319 }
2320 }
2321
2322 *n = flag;
2323 return PAPI_OK;
2324}
static cb_context_node_t * cb_ctx_list_heads[PAPI_ROCM_MAX_DEV_COUNT]
struct cb_context_node * next
unsigned long tid
Here is the call graph for this function:
Here is the caller graph for this function:

◆ get_ntv_events_cb()

hsa_status_t get_ntv_events_cb ( const rocprofiler_info_data_t  info,
void *  ntv_arg 
)
static

Definition at line 880 of file roc_profiler.c.

881{
882 struct ntv_arg *arg = (struct ntv_arg *) ntv_arg;
883 int capacity = ntv_table.count;
884 int *count = &arg->count;
886
887 if (*count > capacity) {
888 snprintf(error_string, PAPI_MAX_STR_LEN, "Number of events exceeds detected count.");
889 return HSA_STATUS_ERROR;
890 }
891
892 ntv_event_t *event;
893 if (htable_find(htable, info.metric.name, (void **) &event) != HTABLE_SUCCESS) {
894 event = &events[(*count)++];
895 event->name = papi_strdup(info.metric.name);
896 event->descr = papi_strdup(info.metric.description);
897 event->instances = info.metric.instances;
898 htable_insert(htable, info.metric.name, event);
899 }
900
901 rocc_dev_set(&event->device_map, arg->dev_id);
902
903 return HSA_STATUS_SUCCESS;
904}
char events[MAX_EVENTS][BUFSIZ]
#define papi_strdup(a)
Definition: papi_memory.h:39
char error_string[PAPI_MAX_STR_LEN]
Definition: roc_common.c:17
int rocc_dev_set(rocc_bitmap_t *bitmap, int i)
Definition: roc_common.c:180
static ntv_event_table_t ntv_table
Definition: roc_profiler.c:145
int dev_id
Definition: roc_profiler.c:678
Here is the call graph for this function:
Here is the caller graph for this function:

◆ increment_and_fetch_dispatch_counter()

int increment_and_fetch_dispatch_counter ( unsigned long  tid)
static

Definition at line 2265 of file roc_profiler.c.

2266{
2267 int htable_errno = HTABLE_SUCCESS;
2268 char key[PAPI_MIN_STR_LEN] = { 0 };
2269
2270 sprintf(key, "%lu", tid);
2271 int *counter_p;
2272 htable_errno = htable_find(htable, (const char *) key, (void **) &counter_p);
2273 if (htable_errno != HTABLE_SUCCESS) {
2274 return 0;
2275 }
2276
2277 return ++(*counter_p);
2278}
Here is the call graph for this function:
Here is the caller graph for this function:

◆ init_callbacks()

int init_callbacks ( rocprofiler_feature_t *  features,
int  feature_count 
)
static

Definition at line 1993 of file roc_profiler.c.

1994{
1995 int papi_errno = PAPI_OK;
1996
1997 cb_context_arg_t *context_arg = papi_calloc(1, sizeof(cb_context_arg_t));
1998 if (context_arg == NULL) {
1999 papi_errno = PAPI_ENOMEM;
2000 goto fn_fail;
2001 }
2002
2003 context_arg->features = features;
2004 context_arg->feature_count = feature_count;
2005
2006 rocprofiler_pool_properties_t properties;
2007 properties.num_entries = 128;
2008 properties.payload_bytes = sizeof(cb_context_payload_t);
2009 properties.handler = context_handler_cb;
2010 properties.handler_arg = context_arg;
2011
2012 /* FIXME: the intercept code initializes callbacks for every device
2013 * regardless what the user asked for. Moreover, every device
2014 * is initialized with the same callback events (features).
2015 * The intercept code should eventually be changed to allow
2016 * user to initialize different callbacks on different devices
2017 * and also to reinitialize already initialized callbacks on
2018 * any given device. Rocm 5.3.0 still does not support this
2019 * callback initialization mechanism.
2020 */
2021 int i;
2022 for (i = 0; i < device_table_p->count; ++i) {
2023 hsa_agent_t agent = device_table_p->devices[i];
2024
2025 rocprofiler_pool_t *pool = NULL;
2026 if (rocp_pool_open_p(agent, features, feature_count, &pool, 0, &properties) != HSA_STATUS_SUCCESS) {
2027 papi_errno = PAPI_EMISC;
2028 goto fn_fail;
2029 }
2030
2031 cb_dispatch_arg.pools[i] = pool;
2032 }
2033
2034 rocprofiler_queue_callbacks_t dispatch_ptrs = { 0 };
2035 dispatch_ptrs.dispatch = dispatch_cb;
2036
2037 if (rocp_set_queue_cbs_p(dispatch_ptrs, &cb_dispatch_arg) != HSA_STATUS_SUCCESS) {
2038 papi_errno = PAPI_EMISC;
2039 goto fn_fail;
2040 }
2041
2042 fn_exit:
2043 return papi_errno;
2044 fn_fail:
2045 if (context_arg) {
2046 papi_free(context_arg);
2047 }
2048 goto fn_exit;
2049}
#define PAPI_ENOMEM
Definition: f90papi.h:16
#define papi_calloc(a, b)
Definition: papi_memory.h:37
static cb_dispatch_arg_t cb_dispatch_arg
static hsa_status_t(* rocp_pool_open_p)(hsa_agent_t, rocprofiler_feature_t *, uint32_t, rocprofiler_pool_t **, uint32_t, rocprofiler_pool_properties_t *)
Definition: roc_profiler.c:106
static bool context_handler_cb(const rocprofiler_pool_entry_t *, void *)
static hsa_status_t dispatch_cb(const rocprofiler_callback_data_t *, void *, rocprofiler_group_t *)
static hsa_status_t(* rocp_set_queue_cbs_p)(rocprofiler_queue_callbacks_t, void *)
Definition: roc_profiler.c:110
rocprofiler_feature_t * features
Here is the call graph for this function:
Here is the caller graph for this function:

◆ init_event_table()

int init_event_table ( void  )
static

Definition at line 682 of file roc_profiler.c.

683{
684 int papi_errno = PAPI_OK;
685 int i;
686
687 for (i = 0; i < device_table_p->count; ++i) {
688 hsa_status_t rocp_errno = rocp_iterate_info_p(&device_table_p->devices[i],
689 ROCPROFILER_INFO_KIND_METRIC,
692 if (rocp_errno != HSA_STATUS_SUCCESS) {
693 const char *error_string_p;
694 hsa_status_string_p(rocp_errno, &error_string_p);
695 snprintf(error_string, PAPI_MAX_STR_LEN, "%s", error_string_p);
696 papi_errno = PAPI_EMISC;
697 goto fn_fail;
698 }
699 }
700
702 if (ntv_table.events == NULL) {
703 papi_errno = PAPI_ENOMEM;
704 goto fn_fail;
705 }
706
707 struct ntv_arg arg;
708 arg.count = 0;
709
710 for (i = 0; i < device_table_p->count; ++i) {
711 arg.dev_id = i;
712 hsa_status_t rocp_errno = rocp_iterate_info_p(&device_table_p->devices[i],
713 ROCPROFILER_INFO_KIND_METRIC,
715 &arg);
716 if (rocp_errno != HSA_STATUS_SUCCESS) {
717 const char *error_string_p;
718 hsa_status_string_p(rocp_errno, &error_string_p);
719 snprintf(error_string, PAPI_MAX_STR_LEN, "%s", error_string_p);
720 papi_errno = PAPI_EMISC;
721 goto fn_fail;
722 }
723 }
724
726 if (ntv_table.events == NULL) {
727 papi_errno = PAPI_ENOMEM;
728 }
729
730 ntv_table.count = arg.count;
731
732 fn_exit:
733 return papi_errno;
734 fn_fail:
735 goto fn_exit;
736}
#define papi_realloc(a, b)
Definition: papi_memory.h:36
hsa_status_t(* hsa_status_string_p)(hsa_status_t, const char **)
Definition: roc_common.c:14
static hsa_status_t get_ntv_events_cb(const rocprofiler_info_data_t, void *)
Definition: roc_profiler.c:880
static hsa_status_t(* rocp_iterate_info_p)(const hsa_agent_t *, rocprofiler_info_kind_t, hsa_status_t(*)(const rocprofiler_info_data_t, void *), void *)
Definition: roc_profiler.c:89
static hsa_status_t count_ntv_events_cb(const rocprofiler_info_data_t, void *)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ init_features()

int init_features ( uint64_t *  events_id,
int  num_events,
rocprofiler_feature_t *  features 
)
static

rocp_ctx_{open,close,start,stop,read,reset} sampling mode utility functions

Definition at line 1346 of file roc_profiler.c.

1347{
1348 int papi_errno = PAPI_OK;
1349
1350 int i;
1351 for (i = 0; i < num_events; ++i) {
1352 char name[PAPI_MAX_STR_LEN] = { 0 };
1353 event_info_t info;
1354 papi_errno = evt_id_to_info(events_id[i], &info);
1355 if (papi_errno != PAPI_OK) {
1356 break;
1357 }
1358 if (ntv_table_p->events[info.nameid].instances > 1) {
1359 sprintf(name, "%s[%i]", ntv_table_p->events[info.nameid].name, info.instance);
1360 } else {
1361 strcpy(name, ntv_table_p->events[info.nameid].name);
1362 }
1363 features[i].kind = (rocprofiler_feature_kind_t) ROCPROFILER_INFO_KIND_METRIC;
1364 features[i].name = papi_strdup(name);
1365 }
1366
1367 return papi_errno;
1368}
Here is the call graph for this function:
Here is the caller graph for this function:

◆ init_rocp_env()

int init_rocp_env ( void  )
static

Definition at line 580 of file roc_profiler.c.

581{
582 static int rocp_env_initialized;
583
584 if (rocp_env_initialized) {
585 return PAPI_OK;
586 }
587
588 const char *rocp_mode = getenv("ROCP_HSA_INTERCEPT");
589 rocm_prof_mode = (rocp_mode != NULL) ?
590 atoi(rocp_mode) : ROCM_PROFILE_SAMPLING_MODE;
591
592 char pathname[PATH_MAX];
593 char *rocm_root = getenv("PAPI_ROCM_ROOT");
594 if (rocm_root == NULL) {
595 snprintf(error_string, PAPI_MAX_STR_LEN, "Can't set HSA_TOOLS_LIB. PAPI_ROCM_ROOT not set.");
596 return PAPI_EMISC;
597 }
598
599 int err;
600 int override_hsa_tools_lib = 1;
601 struct stat stat_info;
602 char *hsa_tools_lib = getenv("HSA_TOOLS_LIB");
603 if (hsa_tools_lib) {
604 err = stat(hsa_tools_lib, &stat_info);
605 if (err == 0 && S_ISREG(stat_info.st_mode)) {
606 override_hsa_tools_lib = 0;
607 }
608 }
609
610 if (override_hsa_tools_lib) {
611 /* Account for change of librocprofiler64.so file location in rocm-5.2.0
612 * directory structure */
613
614 /* prefer .so.1 as .so might not be available in 5.7 anymore, in 5.6 it
615 * was a linker script. */
616 const char *candidates[] = {
617 "lib/librocprofiler64.so.1",
618 "lib/librocprofiler64.so",
619 "rocprofiler/lib/libprofiler64.so.1",
620 "rocprofiler/lib/libprofiler64.so",
621 NULL
622 };
623 const char **candidate = candidates;
624 while (*candidate) {
625 sprintf(pathname, "%s/%s", rocm_root, *candidate);
626
627 err = stat(pathname, &stat_info);
628 if (err == 0) {
629 break;
630 }
631 ++candidate;
632 }
633 if (!*candidate) {
634 snprintf(error_string, PAPI_MAX_STR_LEN, "Rocprofiler librocprofiler64.so file not found.");
635 return PAPI_EMISC;
636 }
637
638 setenv("HSA_TOOLS_LIB", pathname, 1);
639 }
640
641 int override_rocp_metrics = 1;
642 char *rocp_metrics = getenv("ROCP_METRICS");
643 if (rocp_metrics) {
644 err = stat(rocp_metrics, &stat_info);
645 if (err == 0 && S_ISREG(stat_info.st_mode)) {
646 override_rocp_metrics = 0;
647 }
648 }
649
650 if (override_rocp_metrics) {
651 /* Account for change of metrics file location in rocm-5.2.0
652 * directory structure */
653 sprintf(pathname, "%s/lib/rocprofiler/metrics.xml", rocm_root);
654
655 err = stat(pathname, &stat_info);
656 if (err < 0) {
657 sprintf(pathname, "%s/rocprofiler/lib/metrics.xml", rocm_root);
658
659 err = stat(pathname, &stat_info);
660 if (err < 0) {
661 snprintf(error_string, PAPI_MAX_STR_LEN, "Rocprofiler metrics.xml file not found.");
662 return PAPI_EMISC;
663 }
664 }
665
666 setenv("ROCP_METRICS", pathname, 1);
667 }
668
669 rocp_env_initialized = 1;
670 return PAPI_OK;
671}
Here is the caller graph for this function:

◆ intercept_ctx_close()

int intercept_ctx_close ( rocp_ctx_t  rocp_ctx)
static

Definition at line 1512 of file roc_profiler.c.

1513{
1514 int papi_errno = PAPI_OK;
1515
1517
1518 if (intercept_global_state.active_thread_count == 0) {
1519 goto fn_exit;
1520 }
1521
1522 unsigned long tid;
1523 rocc_thread_get_id(&tid);
1524 papi_errno = unregister_dispatch_counter(tid);
1525 if (papi_errno != PAPI_OK) {
1526 goto fn_exit;
1527 }
1528
1529 ctx_finalize(&rocp_ctx);
1530
1531 fn_exit:
1533 return papi_errno;
1534}
static int unregister_dispatch_counter(unsigned long)
static int ctx_finalize(rocp_ctx_t *)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ intercept_ctx_finalize()

int intercept_ctx_finalize ( rocp_ctx_t *  rocp_ctx)
static

Definition at line 1926 of file roc_profiler.c.

1927{
1928 if (*rocp_ctx == NULL) {
1929 return PAPI_OK;
1930 }
1931
1932 if ((*rocp_ctx)->u.intercept.counters) {
1933 papi_free((*rocp_ctx)->u.intercept.counters);
1934 }
1935
1936 papi_free(*rocp_ctx);
1937 *rocp_ctx = NULL;
1938
1939 return PAPI_OK;
1940}
Here is the caller graph for this function:

◆ intercept_ctx_get_dev_feature_count()

int intercept_ctx_get_dev_feature_count ( rocp_ctx_t  rocp_ctx,
int  i 
)
static

Definition at line 1416 of file roc_profiler.c.

1417{
1418 int start, stop, j = 0;
1419 int num_events = rocp_ctx->u.intercept.feature_count;
1420 uint64_t *events_id = rocp_ctx->u.intercept.events_id;
1421
1422 while (j < num_events && (events_id[j] & DEVICE_MASK) >> DEVICE_SHIFT != (uint64_t) i) {
1423 ++j;
1424 }
1425
1426 start = j;
1427
1428 while (j < num_events && (events_id[j] & DEVICE_MASK) >> DEVICE_SHIFT == (uint64_t) i) {
1429 ++j;
1430 }
1431
1432 stop = j;
1433
1434 return stop - start;
1435}
static struct timeval start
Here is the caller graph for this function:

◆ intercept_ctx_init()

int intercept_ctx_init ( uint64_t *  events_id,
int  num_events,
rocp_ctx_t *  rocp_ctx 
)
static

Definition at line 1755 of file roc_profiler.c.

1756{
1757 int papi_errno = PAPI_OK;
1758 long long *counters = NULL;
1759 *rocp_ctx = NULL;
1760
1761 rocc_bitmap_t bitmap;
1763 if (papi_errno != PAPI_OK) {
1764 return papi_errno;
1765 }
1766
1767 if (intercept_global_state.events_id == NULL) {
1768 int num_unique_events = 0;
1769 count_unique_events(events_id, num_events, &num_unique_events);
1770 intercept_global_state.events_id = papi_calloc(num_unique_events, sizeof(uint64_t));
1771 if (intercept_global_state.events_id == NULL) {
1772 papi_errno = PAPI_ENOMEM;
1773 goto fn_fail_undo;
1774 }
1776
1777 intercept_global_state.features = papi_calloc(num_unique_events, sizeof(*intercept_global_state.features));
1778 if (intercept_global_state.features == NULL) {
1779 papi_errno = PAPI_ENOMEM;
1780 goto fn_fail_undo;
1781 }
1782
1783 papi_errno = init_features(intercept_global_state.events_id, num_unique_events, intercept_global_state.features);
1784 if (papi_errno != PAPI_OK) {
1785 goto fn_fail_undo;
1786 }
1787
1788 intercept_global_state.feature_count = num_unique_events;
1789
1790 papi_errno = save_callback_features(intercept_global_state.features, intercept_global_state.feature_count);
1791 if (papi_errno != PAPI_OK) {
1792 goto fn_fail_undo;
1793 }
1794
1795 papi_errno = init_callbacks(intercept_global_state.features, intercept_global_state.feature_count);
1796 if (papi_errno != PAPI_OK) {
1797 goto fn_fail;
1798 }
1799 }
1800
1801 counters = papi_calloc(num_events, sizeof(*counters));
1802 if (counters == NULL) {
1803 papi_errno = PAPI_ENOMEM;
1804 goto fn_fail;
1805 }
1806
1807 *rocp_ctx = papi_calloc(1, sizeof(**rocp_ctx));
1808 if (*rocp_ctx == NULL) {
1809 return PAPI_ENOMEM;
1810 }
1811
1812 (*rocp_ctx)->u.intercept.events_id = events_id;
1813 (*rocp_ctx)->u.intercept.counters = counters;
1814 (*rocp_ctx)->u.intercept.dispatch_count = 0;
1815 (*rocp_ctx)->u.intercept.device_map = bitmap;
1816 (*rocp_ctx)->u.intercept.feature_count = num_events;
1817
1818 fn_exit:
1819 return papi_errno;
1820 fn_fail:
1821 if (counters) {
1822 papi_free(counters);
1823 }
1824 if (*rocp_ctx) {
1825 papi_free(*rocp_ctx);
1826 }
1827 *rocp_ctx = NULL;
1828 goto fn_exit;
1829 fn_fail_undo:
1831 if (intercept_global_state.events_id) {
1833 }
1834 if (intercept_global_state.features) {
1836 }
1837 goto fn_exit;
1838}
int rocc_dev_get_map(rocc_dev_get_map_cb query_dev_id, uint64_t *events_id, int num_events, rocc_bitmap_t *bitmap)
Definition: roc_common.c:74
int64_t rocc_bitmap_t
Definition: roc_common.h:19
static int count_unique_events(uint64_t *events_id, int num_events, int *num_unique)
static int save_callback_features(rocprofiler_feature_t *features, int feature_count)
static int cleanup_callback_features(rocprofiler_feature_t *features, int feature_count)
static int init_callbacks(rocprofiler_feature_t *, int)
static int copy_unique_events(uint64_t *target, uint64_t *source, int source_len)
static int init_features(uint64_t *, int, rocprofiler_feature_t *)
static int event_id_to_dev_id_cb(uint64_t event_id, int *device)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ intercept_ctx_open()

int intercept_ctx_open ( uint64_t *  events_id,
int  num_events,
rocp_ctx_t *  rocp_ctx 
)
static

Definition at line 1472 of file roc_profiler.c.

1473{
1474 int papi_errno = PAPI_OK;
1475
1476 if (num_events <= 0) {
1477 return PAPI_ENOEVNT;
1478 }
1479
1481
1482 papi_errno = verify_events(events_id, num_events);
1483 if (papi_errno != PAPI_OK) {
1484 SUBDBG("[ROCP intercept mode] Can only monitor one set of events "
1485 "per application run.");
1486 goto fn_fail;
1487 }
1488
1489 papi_errno = ctx_init(events_id, num_events, rocp_ctx);
1490 if (papi_errno != PAPI_OK) {
1491 goto fn_fail;
1492 }
1493
1494 unsigned long tid;
1495 rocc_thread_get_id(&tid);
1496 papi_errno = register_dispatch_counter(tid, &(*rocp_ctx)->u.intercept.dispatch_count);
1497 if (papi_errno != PAPI_OK) {
1498 goto fn_fail;
1499 }
1500
1501 (*rocp_ctx)->u.intercept.state |= ROCM_EVENTS_OPENED;
1502
1503 fn_exit:
1505 return papi_errno;
1506 fn_fail:
1507 ctx_finalize(rocp_ctx);
1508 goto fn_exit;
1509}
#define SUBDBG(format, args...)
Definition: papi_debug.h:64
static int verify_events(uint64_t *, int)
static int register_dispatch_counter(unsigned long, int *)
static int ctx_init(uint64_t *, int, rocp_ctx_t *)
#define ROCM_EVENTS_OPENED
Here is the call graph for this function:
Here is the caller graph for this function:

◆ intercept_ctx_read()

int intercept_ctx_read ( rocp_ctx_t  rocp_ctx,
long long **  counts 
)
static

Definition at line 1607 of file roc_profiler.c.

1608{
1609 int papi_errno = PAPI_OK;
1610
1612
1613 unsigned long tid;
1614 rocc_thread_get_id(&tid);
1615 int dispatch_count = fetch_dispatch_counter(tid);
1616 if (dispatch_count == 0) {
1617 *counts = rocp_ctx->u.intercept.counters;
1618 goto fn_exit;
1619 }
1620
1621 cb_context_node_t *n = NULL;
1622
1623 int devs_count;
1624 papi_errno = rocc_dev_get_count(rocp_ctx->u.intercept.device_map, &devs_count);
1625 if (papi_errno != PAPI_OK) {
1626 goto fn_exit;
1627 }
1628
1629 int i;
1630 for (i = 0; i < devs_count; ++i) {
1631 int dev_id;
1632 papi_errno = rocc_dev_get_id(rocp_ctx->u.intercept.device_map, i, &dev_id);
1633 if (papi_errno != PAPI_OK) {
1634 goto fn_exit;
1635 }
1636
1637 while (dispatch_count > 0) {
1638 get_context_node(dev_id, &n);
1639 if (n == NULL) {
1640 break;
1641 }
1642
1643 get_context_counters(dev_id, n, rocp_ctx);
1644 dispatch_count = decrement_and_fetch_dispatch_counter(tid);
1646 }
1647 }
1648
1649 if (dispatch_count > 0) {
1650 SUBDBG("[ROCP intercept mode] User monitoring GPU i but running on j.");
1651 }
1652
1653 *counts = rocp_ctx->u.intercept.counters;
1654
1655 fn_exit:
1657 return papi_errno;
1658}
static void free_context_node(cb_context_node_t *)
static int get_context_counters(int, cb_context_node_t *, rocp_ctx_t)
static int get_context_node(int, cb_context_node_t **)
static int fetch_dispatch_counter(unsigned long)
static int decrement_and_fetch_dispatch_counter(unsigned long)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ intercept_ctx_reset()

int intercept_ctx_reset ( rocp_ctx_t  rocp_ctx)
static

Definition at line 1661 of file roc_profiler.c.

1662{
1663 int i;
1664
1665 for (i = 0; i < rocp_ctx->u.intercept.feature_count; ++i) {
1666 rocp_ctx->u.intercept.counters[i] = 0;
1667 }
1668
1669 return PAPI_OK;
1670}
Here is the caller graph for this function:

◆ intercept_ctx_start()

int intercept_ctx_start ( rocp_ctx_t  rocp_ctx)
static

Definition at line 1537 of file roc_profiler.c.

1538{
1539 int papi_errno = PAPI_OK;
1540
1542
1543 if (!(rocp_ctx->u.sampling.state & ROCM_EVENTS_OPENED)) {
1544 SUBDBG("[ROCP intercept mode] Cannot start eventset, not opened.");
1545 papi_errno = PAPI_EINVAL;
1546 goto fn_fail;
1547 }
1548
1549 if (rocp_ctx->u.intercept.state & ROCM_EVENTS_RUNNING) {
1550 SUBDBG("[ROCP intercept mode] Cannot start eventset, already running.");
1551 papi_errno = PAPI_EINVAL;
1552 goto fn_fail;
1553 }
1554
1555 if (intercept_global_state.kernel_count++ == 0) {
1556 if (rocp_start_queue_cbs_p() != HSA_STATUS_SUCCESS) {
1557 papi_errno = PAPI_EMISC;
1558 goto fn_fail;
1559 }
1560 }
1561
1562 rocp_ctx->u.intercept.state |= ROCM_EVENTS_RUNNING;
1563
1564 fn_exit:
1566 return papi_errno;
1567 fn_fail:
1568 goto fn_exit;
1569}
#define PAPI_EINVAL
Definition: f90papi.h:115
static hsa_status_t(* rocp_start_queue_cbs_p)(void)
Definition: roc_profiler.c:111
#define ROCM_EVENTS_RUNNING
Here is the call graph for this function:
Here is the caller graph for this function:

◆ intercept_ctx_stop()

int intercept_ctx_stop ( rocp_ctx_t  rocp_ctx)
static

Definition at line 1572 of file roc_profiler.c.

1573{
1574 int papi_errno = PAPI_OK;
1575
1577
1578 if (!(rocp_ctx->u.sampling.state & ROCM_EVENTS_OPENED)) {
1579 SUBDBG("[ROCP intercept mode] Cannot stop eventset, not opened.");
1580 papi_errno = PAPI_EINVAL;
1581 goto fn_fail;
1582 }
1583
1584 if (!(rocp_ctx->u.intercept.state & ROCM_EVENTS_RUNNING)) {
1585 SUBDBG("[ROCP intercept mode] Cannot stop eventset, not running.");
1586 papi_errno = PAPI_EINVAL;
1587 goto fn_fail;
1588 }
1589
1590 if (--intercept_global_state.kernel_count == 0) {
1591 if (rocp_stop_queue_cbs_p() != HSA_STATUS_SUCCESS) {
1592 papi_errno = PAPI_EMISC;
1593 goto fn_fail;
1594 }
1595 }
1596
1597 rocp_ctx->u.intercept.state &= ~ROCM_EVENTS_RUNNING;
1598
1599 fn_exit:
1601 return papi_errno;
1602 fn_fail:
1603 goto fn_exit;
1604}
static hsa_status_t(* rocp_stop_queue_cbs_p)(void)
Definition: roc_profiler.c:112
Here is the call graph for this function:
Here is the caller graph for this function:

◆ intercept_shutdown()

int intercept_shutdown ( void  )
static

Definition at line 1688 of file roc_profiler.c.

1689{
1690 /* calling rocprofiler_pool_close() here would cause
1691 * a double free runtime error. */
1692
1696
1697 (*hsa_shut_down_p)();
1698
1700
1701 if (intercept_global_state.features) {
1704 }
1705
1706 if (intercept_global_state.events_id) {
1708 }
1709
1710 return PAPI_OK;
1711}
static int unload_rocp_sym(void)
Definition: roc_profiler.c:545
static int finalize_features(rocprofiler_feature_t *, int)
static int shutdown_event_table(void)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ load_rocp_sym()

int load_rocp_sym ( void  )
static

rocp_{init,shutdown} and rocp_ctx_{open,close,start,stop,read,reset} functions

rocp_init utility functions

Definition at line 470 of file roc_profiler.c.

471{
472 int papi_errno = PAPI_OK;
473
474 char *pathname = getenv("HSA_TOOLS_LIB");
475 if (pathname == NULL) {
476 snprintf(error_string, PAPI_MAX_STR_LEN, "Can't load librocprofiler64.so, neither PAPI_ROCM_ROOT nor HSA_TOOLS_LIB are set.");
477 goto fn_fail;
478 }
479
480 rocp_dlp = dlopen(pathname, RTLD_NOW | RTLD_GLOBAL);
481 if (rocp_dlp == NULL) {
482 sprintf(error_string, "%s", dlerror());
483 goto fn_fail;
484 }
485
486 rocp_get_info_p = dlsym(rocp_dlp, "rocprofiler_get_info");
487 rocp_iterate_info_p = dlsym(rocp_dlp, "rocprofiler_iterate_info");
488 rocp_error_string_p = dlsym(rocp_dlp, "rocprofiler_error_string");
489 rocp_open_p = dlsym(rocp_dlp, "rocprofiler_open");
490 rocp_close_p = dlsym(rocp_dlp, "rocprofiler_close");
491 rocp_group_count_p = dlsym(rocp_dlp, "rocprofiler_group_count");
492 rocp_start_p = dlsym(rocp_dlp, "rocprofiler_start");
493 rocp_read_p = dlsym(rocp_dlp, "rocprofiler_read");
494 rocp_stop_p = dlsym(rocp_dlp, "rocprofiler_stop");
495 rocp_get_group_p = dlsym(rocp_dlp, "rocprofiler_get_group");
496 rocp_get_data_p = dlsym(rocp_dlp, "rocprofiler_get_data");
497 rocp_group_get_data_p = dlsym(rocp_dlp, "rocprofiler_group_get_data");
498 rocp_get_metrics_p = dlsym(rocp_dlp, "rocprofiler_get_metrics");
499 rocp_reset_p = dlsym(rocp_dlp, "rocprofiler_reset");
500 rocp_pool_open_p = dlsym(rocp_dlp, "rocprofiler_pool_open");
501 rocp_pool_close_p = dlsym(rocp_dlp, "rocprofiler_pool_close");
502 rocp_pool_fetch_p = dlsym(rocp_dlp, "rocprofiler_pool_fetch");
503 rocp_pool_flush_p = dlsym(rocp_dlp, "rocprofiler_pool_flush");
504 rocp_set_queue_cbs_p = dlsym(rocp_dlp, "rocprofiler_set_queue_callbacks");
505 rocp_start_queue_cbs_p = dlsym(rocp_dlp, "rocprofiler_start_queue_callbacks");
506 rocp_stop_queue_cbs_p = dlsym(rocp_dlp, "rocprofiler_stop_queue_callbacks");
507 rocp_remove_queue_cbs_p= dlsym(rocp_dlp, "rocprofiler_remove_queue_callbacks");
508
509 int rocp_not_initialized = (!rocp_get_info_p ||
512 !rocp_open_p ||
513 !rocp_close_p ||
515 !rocp_start_p ||
516 !rocp_read_p ||
517 !rocp_stop_p ||
522 !rocp_reset_p ||
531
532 papi_errno = (rocp_not_initialized) ? PAPI_EMISC : PAPI_OK;
533 if (papi_errno != PAPI_OK) {
534 snprintf(error_string, PAPI_MAX_STR_LEN, "Error while loading rocprofiler symbols.");
535 }
536
537 fn_exit:
538 return papi_errno;
539 fn_fail:
540 papi_errno = PAPI_ENOSUPP;
541 goto fn_exit;
542}
#define PAPI_ENOSUPP
Definition: f90papi.h:244
static hsa_status_t(* rocp_start_p)(rocprofiler_t *, uint32_t)
Definition: roc_profiler.c:96
static hsa_status_t(* rocp_get_metrics_p)(const rocprofiler_t *)
Definition: roc_profiler.c:102
static hsa_status_t(* rocp_stop_p)(rocprofiler_t *, uint32_t)
Definition: roc_profiler.c:98
static hsa_status_t(* rocp_reset_p)(rocprofiler_t *, uint32_t)
Definition: roc_profiler.c:103
static hsa_status_t(* rocp_group_count_p)(const rocprofiler_t *, uint32_t *)
Definition: roc_profiler.c:95
static void * rocp_dlp
Definition: roc_profiler.c:144
static hsa_status_t(* rocp_group_get_data_p)(rocprofiler_group_t *)
Definition: roc_profiler.c:101
static hsa_status_t(* rocp_error_string_p)(const char **)
Definition: roc_profiler.c:90
static hsa_status_t(* rocp_get_info_p)(const hsa_agent_t *, rocprofiler_info_kind_t, void *)
Definition: roc_profiler.c:88
static hsa_status_t(* rocp_get_data_p)(rocprofiler_t *, uint32_t)
Definition: roc_profiler.c:100
static hsa_status_t(* rocp_remove_queue_cbs_p)(void)
Definition: roc_profiler.c:113
static hsa_status_t(* rocp_pool_flush_p)(rocprofiler_pool_t *)
Definition: roc_profiler.c:109
static hsa_status_t(* rocp_pool_close_p)(rocprofiler_pool_t *)
Definition: roc_profiler.c:107
static hsa_status_t(* rocp_read_p)(rocprofiler_t *, uint32_t)
Definition: roc_profiler.c:97
Here is the caller graph for this function:

◆ process_context_entry()

void process_context_entry ( cb_context_payload_t payload,
rocprofiler_feature_t *  features,
int  feature_count 
)
static

intercept mode counter read infrastructure

Definition at line 2167 of file roc_profiler.c.

2168{
2169 fn_check_again:
2171 if (payload->valid == false) {
2173 goto fn_check_again;
2174 }
2175
2176 if (feature_count < 1) {
2177 goto fn_exit;
2178 }
2179
2180 if (rocp_group_get_data_p(&payload->group) != HSA_STATUS_SUCCESS) {
2181 goto fn_exit;
2182 }
2183
2184 if (rocp_get_metrics_p(payload->group.context)) {
2185 goto fn_exit;
2186 }
2187
2188 if (increment_and_fetch_dispatch_counter(payload->tid) < 0) {
2189 /* thread not registered, ignore counters */
2190 goto fn_exit;
2191 }
2192
2194 if (n == NULL) {
2196 goto fn_exit;
2197 }
2198
2199 int dev_id;
2200 rocc_dev_get_agent_id(payload->agent, &dev_id);
2201
2202 n->tid = payload->tid;
2204 put_context_node(dev_id, n);
2205
2206 fn_exit:
2208}
static int increment_and_fetch_dispatch_counter(unsigned long)
static cb_context_node_t * alloc_context_node(int)
static void put_context_counters(rocprofiler_feature_t *, int, cb_context_node_t *)
static void put_context_node(int, cb_context_node_t *)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ put_context_counters()

void put_context_counters ( rocprofiler_feature_t *  features,
int  feature_count,
cb_context_node_t n 
)
static

Definition at line 2228 of file roc_profiler.c.

2229{
2230 int i;
2231 for (i = 0; i < feature_count; ++i) {
2232 const rocprofiler_feature_t *f = &features[i];
2233 switch(f->data.kind) {
2234 case ROCPROFILER_DATA_KIND_INT32:
2235 n->counters[i] = (long long) f->data.result_int32;
2236 break;
2237 case ROCPROFILER_DATA_KIND_INT64:
2238 n->counters[i] = f->data.result_int64;
2239 break;
2240 case ROCPROFILER_DATA_KIND_FLOAT:
2241 n->counters[i] = (long long) f->data.result_float;
2242 break;
2243 case ROCPROFILER_DATA_KIND_DOUBLE:
2244 n->counters[i] = (long long) f->data.result_double;
2245 break;
2246 default:
2247 SUBDBG("Unsupported data kind from rocprofiler");
2248 }
2249 }
2250}
double f(double a)
Definition: cpi.c:23
long long int long long
Definition: sde_internal.h:85
Here is the call graph for this function:
Here is the caller graph for this function:

◆ put_context_node()

void put_context_node ( int  dev_id,
cb_context_node_t n 
)
static

Definition at line 2253 of file roc_profiler.c.

2254{
2255 n->next = NULL;
2256
2257 if (cb_ctx_list_heads[dev_id] != NULL) {
2258 n->next = cb_ctx_list_heads[dev_id];
2259 }
2260
2261 cb_ctx_list_heads[dev_id] = n;
2262}
Here is the caller graph for this function:

◆ register_dispatch_counter()

int register_dispatch_counter ( unsigned long  tid,
int counter 
)
static

Definition at line 2052 of file roc_profiler.c.

2053{
2054 int papi_errno = PAPI_OK;
2055 int htable_errno = HTABLE_SUCCESS;
2056 char key[PAPI_MIN_STR_LEN] = { 0 };
2057
2058 /* FIXME: probably better using a different hash table for this */
2059 sprintf(key, "%lu", tid);
2060 int *counter_p;
2061 htable_errno = htable_find(htable, key, (void **) &counter_p);
2062 if (htable_errno == HTABLE_SUCCESS) {
2063 papi_errno = PAPI_EMISC;
2064 goto fn_exit;
2065 }
2066
2067 htable_insert(htable, (const char *) key, counter);
2068 ++intercept_global_state.active_thread_count;
2069
2070 fn_exit:
2071 return papi_errno;
2072}
Here is the call graph for this function:
Here is the caller graph for this function:

◆ rocp_ctx_close()

int rocp_ctx_close ( rocp_ctx_t  rocp_ctx)

Definition at line 401 of file roc_profiler.c.

402{
404 return sampling_ctx_close(rocp_ctx);
405 }
406
407 return intercept_ctx_close(rocp_ctx);
408}
static int intercept_ctx_close(rocp_ctx_t)
static int sampling_ctx_close(rocp_ctx_t)
Definition: roc_profiler.c:952
Here is the call graph for this function:
Here is the caller graph for this function:

◆ rocp_ctx_open()

int rocp_ctx_open ( uint64_t *  events_id,
int  num_events,
rocp_ctx_t *  rocp_ctx 
)

Definition at line 390 of file roc_profiler.c.

391{
393 return sampling_ctx_open(events_id, num_events, rocp_ctx);
394 }
395
396 return intercept_ctx_open(events_id, num_events, rocp_ctx);
397}
static int sampling_ctx_open(uint64_t *, int, rocp_ctx_t *)
Definition: roc_profiler.c:921
static int intercept_ctx_open(uint64_t *, int, rocp_ctx_t *)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ rocp_ctx_read()

int rocp_ctx_read ( rocp_ctx_t  rocp_ctx,
long long **  counts 
)

Definition at line 434 of file roc_profiler.c.

435{
437 return sampling_ctx_read(rocp_ctx, counts);
438 }
439
440 return intercept_ctx_read(rocp_ctx, counts);
441}
static int intercept_ctx_read(rocp_ctx_t, long long **)
static int sampling_ctx_read(rocp_ctx_t, long long **)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ rocp_ctx_reset()

int rocp_ctx_reset ( rocp_ctx_t  rocp_ctx)

Definition at line 445 of file roc_profiler.c.

446{
448 return sampling_ctx_reset(rocp_ctx);
449 }
450
451 return intercept_ctx_reset(rocp_ctx);
452}
static int intercept_ctx_reset(rocp_ctx_t)
static int sampling_ctx_reset(rocp_ctx_t)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ rocp_ctx_start()

int rocp_ctx_start ( rocp_ctx_t  rocp_ctx)

Definition at line 412 of file roc_profiler.c.

413{
415 return sampling_ctx_start(rocp_ctx);
416 }
417
418 return intercept_ctx_start(rocp_ctx);
419}
static int intercept_ctx_start(rocp_ctx_t)
static int sampling_ctx_start(rocp_ctx_t)
Definition: roc_profiler.c:973
Here is the call graph for this function:
Here is the caller graph for this function:

◆ rocp_ctx_stop()

int rocp_ctx_stop ( rocp_ctx_t  rocp_ctx)

Definition at line 423 of file roc_profiler.c.

424{
426 return sampling_ctx_stop(rocp_ctx);
427 }
428
429 return intercept_ctx_stop(rocp_ctx);
430}
static int intercept_ctx_stop(rocp_ctx_t)
static int sampling_ctx_stop(rocp_ctx_t)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ rocp_evt_code_to_descr()

int rocp_evt_code_to_descr ( uint64_t  event_code,
char *  descr,
int  len 
)

Definition at line 260 of file roc_profiler.c.

261{
262 int papi_errno;
263
264 event_info_t info;
265 papi_errno = evt_id_to_info(event_code, &info);
266 if (papi_errno != PAPI_OK) {
267 return papi_errno;
268 }
269
270 snprintf(descr, (size_t) len, "%s", ntv_table_p->events[info.nameid].descr);
271 return papi_errno;
272}
char * descr
Definition: roc_profiler.c:44
char * descr
Here is the call graph for this function:
Here is the caller graph for this function:

◆ rocp_evt_code_to_info()

int rocp_evt_code_to_info ( uint64_t  event_code,
PAPI_event_info_t info 
)

Definition at line 331 of file roc_profiler.c.

332{
333 int papi_errno;
334
335 event_info_t inf;
336 papi_errno = evt_id_to_info(event_code, &inf);
337 if (papi_errno != PAPI_OK) {
338 return papi_errno;
339 }
340
341 switch (inf.flags) {
342 case 0:
343 sprintf(info->symbol, "%s", ntv_table_p->events[inf.nameid].name);
344 sprintf(info->long_descr, "%s", ntv_table_p->events[inf.nameid].descr);
345 break;
346 case (DEVICE_FLAG | INSTAN_FLAG):
347 {
348 int i;
349 char devices[PAPI_MAX_STR_LEN] = { 0 };
350 for (i = 0; i < device_table_p->count; ++i) {
352 sprintf(devices + strlen(devices), "%i,", i);
353 }
354 }
355 *(devices + strlen(devices) - 1) = 0;
356 sprintf(info->symbol, "%s:device=%i:instance=%i", ntv_table_p->events[inf.nameid].name, inf.device, inf.instance);
357 sprintf(info->long_descr, "%s, masks:Mandatory device qualifier [%s]:Mandatory instance qualifier in range [0-%i]",
359 break;
360 }
361 case DEVICE_FLAG:
362 {
363 int i;
364 char devices[PAPI_MAX_STR_LEN] = { 0 };
365 for (i = 0; i < device_table_p->count; ++i) {
367 sprintf(devices + strlen(devices), "%i,", i);
368 }
369 }
370 *(devices + strlen(devices) - 1) = 0;
371 sprintf(info->symbol, "%s:device=%i", ntv_table_p->events[inf.nameid].name, inf.device);
372 sprintf(info->long_descr, "%s, masks:Mandatory device qualifier [%s]",
374 break;
375 }
376 case INSTAN_FLAG:
377 sprintf(info->symbol, "%s:instance=%i", ntv_table_p->events[inf.nameid].name, inf.instance);
378 sprintf(info->long_descr, "%s, masks:Mandatory instance qualifier in range [0-%i]",
380 break;
381 default:
382 papi_errno = PAPI_EINVAL;
383 }
384
385 return papi_errno;
386}
static nvmlDevice_t * devices
Definition: linux-nvml.c:146
char symbol[PAPI_HUGE_STR_LEN]
Definition: papi.h:960
char long_descr[PAPI_HUGE_STR_LEN]
Definition: papi.h:963
Here is the call graph for this function:
Here is the caller graph for this function:

◆ rocp_evt_code_to_name()

int rocp_evt_code_to_name ( uint64_t  event_code,
char *  name,
int  len 
)

Definition at line 324 of file roc_profiler.c.

325{
326 return evt_code_to_name(event_code, name, len);
327}
static int evt_code_to_name(uint64_t event_code, char *name, int len)
Definition: roc_profiler.c:739
Here is the call graph for this function:
Here is the caller graph for this function:

◆ rocp_evt_enum()

int rocp_evt_enum ( uint64_t *  event_code,
int  modifier 
)

Definition at line 193 of file roc_profiler.c.

194{
195 int papi_errno = PAPI_OK;
196 event_info_t info;
197 SUBDBG("ENTER: event_code: %lu, modifier: %d\n", *event_code, modifier);
198
199
200 switch(modifier) {
201 case PAPI_ENUM_FIRST:
202 if (ntv_table_p->count == 0) {
203 papi_errno = PAPI_ENOEVNT;
204 break;
205 }
206 info.device = 0;
207 info.instance = 0;
208 info.flags = 0;
209 info.nameid = 0;
210 papi_errno = evt_id_create(&info, event_code);
211 break;
212 case PAPI_ENUM_EVENTS:
213 papi_errno = evt_id_to_info(*event_code, &info);
214 if (papi_errno != PAPI_OK) {
215 break;
216 }
217 if (ntv_table_p->count > info.nameid + 1) {
218 info.device = 0;
219 info.instance = 0;
220 info.flags = 0;
221 info.nameid++;
222 papi_errno = evt_id_create(&info, event_code);
223 break;
224 }
225 papi_errno = PAPI_END;
226 break;
228 papi_errno = evt_id_to_info(*event_code, &info);
229 if (papi_errno != PAPI_OK) {
230 break;
231 }
232 if (info.flags == 0) {
233 info.device = 0;
234 info.instance = 0;
235 info.flags = DEVICE_FLAG;
236 papi_errno = evt_id_create(&info, event_code);
237 break;
238 }
239 if (info.flags & DEVICE_FLAG) {
240 if (ntv_table_p->events[info.nameid].instances > 1) {
241 info.device = 0;
242 info.instance = 0;
243 info.flags = INSTAN_FLAG;
244 papi_errno = evt_id_create(&info, event_code);
245 break;
246 }
247 }
248 papi_errno = PAPI_END;
249 break;
250 default:
251 papi_errno = PAPI_EINVAL;
252 }
253
254 SUBDBG("EXIT: %s\n", PAPI_strerror(papi_errno));
255 return papi_errno;
256}
Returns a string describing the PAPI error code.
#define PAPI_ENUM_EVENTS
Definition: f90papi.h:224
#define PAPI_ENUM_FIRST
Definition: f90papi.h:85
#define PAPI_END
Definition: f90papi.h:303
#define PAPI_NTV_ENUM_UMASKS
Definition: f90papi.h:66
static int evt_id_create(event_info_t *info, uint64_t *event_id)
Definition: roc_profiler.c:764
Here is the call graph for this function:
Here is the caller graph for this function:

◆ rocp_evt_name_to_code()

int rocp_evt_name_to_code ( const char *  name,
uint64_t *  event_code 
)

Definition at line 276 of file roc_profiler.c.

277{
278 int papi_errno = PAPI_OK;
279 int htable_errno;
280 SUBDBG("ENTER: name: %s, event_code: %p\n", name, event_code);
281
282 int device;
283 papi_errno = evt_name_to_device(name, &device);
284 if (papi_errno != PAPI_OK) {
285 goto fn_exit;
286 }
287
288 int instance;
289 papi_errno = evt_name_to_instance(name, &instance);
290 if (papi_errno != PAPI_OK) {
291 goto fn_exit;
292 }
293
294 char base[PAPI_MAX_STR_LEN] = { 0 };
295 papi_errno = evt_name_to_basename(name, base, PAPI_MAX_STR_LEN);
296 if (papi_errno != PAPI_OK) {
297 goto fn_exit;
298 }
299
300 ntv_event_t *event;
301 htable_errno = htable_find(htable, base, (void **) &event);
302 if (htable_errno != HTABLE_SUCCESS) {
303 papi_errno = (htable_errno == HTABLE_ENOVAL) ? PAPI_ENOEVNT : PAPI_ECMP;
304 goto fn_exit;
305 }
306
307 int flags = (event->instances > 1) ? (DEVICE_FLAG | INSTAN_FLAG) : DEVICE_FLAG;
308 int nameid = (int) (event - ntv_table_p->events);
309 event_info_t info = { device, instance, flags, nameid };
310 papi_errno = evt_id_create(&info, event_code);
311 if (papi_errno != PAPI_OK) {
312 goto fn_exit;
313 }
314
315 papi_errno = evt_id_to_info(*event_code, &info);
316
317 fn_exit:
318 SUBDBG("EXIT: %s\n", PAPI_strerror(papi_errno));
319 return papi_errno;
320}
#define HTABLE_ENOVAL
Definition: cuda/htable.h:18
#define PAPI_ECMP
Definition: f90papi.h:214
static int evt_name_to_instance(const char *name, int *instance)
Definition: roc_profiler.c:820
static int evt_name_to_device(const char *name, int *device)
Definition: roc_profiler.c:809
Here is the call graph for this function:
Here is the caller graph for this function:

◆ rocp_init()

int rocp_init ( void  )

Definition at line 159 of file roc_profiler.c.

160{
161 int papi_errno = PAPI_OK;
162 SUBDBG("ENTER\n");
163
164 papi_errno = load_rocp_sym();
165 if (papi_errno != PAPI_OK) {
166 goto fn_fail;
167 }
168
170
173 }
174
175 papi_errno = init_event_table();
176 if (papi_errno != PAPI_OK) {
177 (*hsa_shut_down_p)();
178 goto fn_fail;
179 }
180
182
183 fn_exit:
184 SUBDBG("EXIT: %s\n", PAPI_strerror(papi_errno));
185 return papi_errno;
186 fn_fail:
188 goto fn_exit;
189}
static int load_rocp_sym(void)
Definition: roc_profiler.c:470
static int init_event_table(void)
Definition: roc_profiler.c:682
Here is the call graph for this function:
Here is the caller graph for this function:

◆ rocp_init_environment()

int rocp_init_environment ( void  )

Definition at line 152 of file roc_profiler.c.

153{
154 return init_rocp_env();
155}
Here is the call graph for this function:
Here is the caller graph for this function:

◆ rocp_shutdown()

int rocp_shutdown ( void  )

Definition at line 456 of file roc_profiler.c.

457{
459 return sampling_shutdown();
460 }
461
462 return intercept_shutdown();
463}
static int intercept_shutdown(void)
static int sampling_shutdown(void)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ sampling_ctx_close()

int sampling_ctx_close ( rocp_ctx_t  rocp_ctx)
static

Definition at line 952 of file roc_profiler.c.

953{
954 int papi_errno = PAPI_OK;
955
957
958 papi_errno = ctx_close(rocp_ctx);
959 if (papi_errno != PAPI_OK) {
960 goto fn_fail;
961 }
962
963 ctx_finalize(&rocp_ctx);
964
965 fn_exit:
967 return papi_errno;
968 fn_fail:
969 goto fn_exit;
970}
static int ctx_close(rocp_ctx_t)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ sampling_ctx_finalize()

int sampling_ctx_finalize ( rocp_ctx_t *  rocp_ctx)
static

Definition at line 1238 of file roc_profiler.c.

1239{
1240 if (*rocp_ctx == NULL) {
1241 return PAPI_OK;
1242 }
1243
1244 if ((*rocp_ctx)->u.sampling.features) {
1245 finalize_features((*rocp_ctx)->u.sampling.features, (*rocp_ctx)->u.sampling.feature_count);
1246 papi_free((*rocp_ctx)->u.sampling.features);
1247 }
1248
1249 if ((*rocp_ctx)->u.sampling.contexts) {
1250 papi_free((*rocp_ctx)->u.sampling.contexts);
1251 }
1252
1253 if ((*rocp_ctx)->u.sampling.ctx_prop) {
1254 papi_free((*rocp_ctx)->u.sampling.ctx_prop);
1255 }
1256
1257 if ((*rocp_ctx)->u.sampling.counters) {
1258 papi_free((*rocp_ctx)->u.sampling.counters);
1259 }
1260
1261 papi_free(*rocp_ctx);
1262 *rocp_ctx = NULL;
1263
1264 return PAPI_OK;
1265}
Here is the call graph for this function:
Here is the caller graph for this function:

◆ sampling_ctx_get_dev_feature_count()

int sampling_ctx_get_dev_feature_count ( rocp_ctx_t  rocp_ctx,
int  i 
)
static

Definition at line 1394 of file roc_profiler.c.

1395{
1396 int start, stop, j = 0;
1397 int num_events = rocp_ctx->u.sampling.feature_count;
1398 uint64_t *events_id = rocp_ctx->u.sampling.events_id;
1399
1400 while (j < num_events && (events_id[j] & DEVICE_MASK) >> DEVICE_SHIFT != (uint64_t) i) {
1401 ++j;
1402 }
1403
1404 start = j;
1405
1406 while (j < num_events && (events_id[j] & DEVICE_MASK) >> DEVICE_SHIFT == (uint64_t) i) {
1407 ++j;
1408 }
1409
1410 stop = j;
1411
1412 return stop - start;
1413}
Here is the caller graph for this function:

◆ sampling_ctx_init()

int sampling_ctx_init ( uint64_t *  events_id,
int  num_events,
rocp_ctx_t *  rocp_ctx 
)
static

Definition at line 1154 of file roc_profiler.c.

1155{
1156 int papi_errno = PAPI_OK;
1157 int num_devs;
1158 rocprofiler_feature_t *features = NULL;
1159 rocprofiler_t **contexts = NULL;
1160 rocprofiler_properties_t *ctx_prop = NULL;
1161 long long *counters = NULL;
1162 *rocp_ctx = NULL;
1163
1164 rocc_bitmap_t bitmap;
1166 if (papi_errno != PAPI_OK) {
1167 return papi_errno;
1168 }
1169
1170 papi_errno = rocc_dev_get_count(bitmap, &num_devs);
1171 if (papi_errno != PAPI_OK) {
1172 goto fn_fail;
1173 }
1174
1175 contexts = papi_calloc(num_devs, sizeof(*contexts));
1176 if (contexts == NULL) {
1177 papi_errno = PAPI_ENOMEM;
1178 goto fn_fail;
1179 }
1180
1181 ctx_prop = papi_calloc(num_devs, sizeof(*ctx_prop));
1182 if (ctx_prop == NULL) {
1183 papi_errno = PAPI_ENOMEM;
1184 goto fn_fail;
1185 }
1186
1188 if (features == NULL) {
1189 papi_errno = PAPI_ENOMEM;
1190 goto fn_fail;
1191 }
1192
1193 counters = papi_malloc(num_events * sizeof(*counters));
1194 if (counters == NULL) {
1195 papi_errno = PAPI_ENOMEM;
1196 goto fn_fail;
1197 }
1198
1200 if (papi_errno != PAPI_OK) {
1201 goto fn_fail;
1202 }
1203
1204 *rocp_ctx = papi_calloc(1, sizeof(**rocp_ctx));
1205 if (*rocp_ctx == NULL) {
1206 papi_errno = PAPI_ENOMEM;
1207 goto fn_fail;
1208 }
1209
1210 (*rocp_ctx)->u.sampling.events_id = events_id;
1211 (*rocp_ctx)->u.sampling.features = features;
1212 (*rocp_ctx)->u.sampling.feature_count = num_events;
1213 (*rocp_ctx)->u.sampling.contexts = contexts;
1214 (*rocp_ctx)->u.sampling.counters = counters;
1215 (*rocp_ctx)->u.sampling.device_map = bitmap;
1216 (*rocp_ctx)->u.sampling.ctx_prop = ctx_prop;
1217
1218 fn_exit:
1219 return papi_errno;
1220 fn_fail:
1221 if (contexts) {
1222 papi_free(contexts);
1223 }
1224 if (features) {
1226 }
1227 if (counters) {
1228 papi_free(counters);
1229 }
1230 if (*rocp_ctx) {
1231 papi_free(*rocp_ctx);
1232 }
1233 *rocp_ctx = NULL;
1234 goto fn_exit;
1235}
Here is the call graph for this function:
Here is the caller graph for this function:

◆ sampling_ctx_open()

int sampling_ctx_open ( uint64_t *  events_id,
int  num_events,
rocp_ctx_t *  rocp_ctx 
)
static

Definition at line 921 of file roc_profiler.c.

922{
923 int papi_errno = PAPI_OK;
924
925 if (num_events <= 0) {
926 return PAPI_ENOEVNT;
927 }
928
930
931 papi_errno = ctx_init(events_id, num_events, rocp_ctx);
932 if (papi_errno != PAPI_OK) {
933 goto fn_fail;
934 }
935
936 papi_errno = ctx_open(*rocp_ctx);
937 if (papi_errno != PAPI_OK) {
938 goto fn_fail;
939 }
940
941 (*rocp_ctx)->u.sampling.state |= ROCM_EVENTS_OPENED;
942
943 fn_exit:
945 return papi_errno;
946 fn_fail:
947 ctx_finalize(rocp_ctx);
948 goto fn_exit;
949}
static int ctx_open(rocp_ctx_t)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ sampling_ctx_read()

int sampling_ctx_read ( rocp_ctx_t  rocp_ctx,
long long **  counts 
)
static

Definition at line 1029 of file roc_profiler.c.

1030{
1031 int i, j, k = 0;
1032 int dev_feature_offset = 0;
1033 int dev_id, dev_count;
1034 rocprofiler_feature_t *features = rocp_ctx->u.sampling.features;
1035
1036 rocc_dev_get_count(rocp_ctx->u.sampling.device_map, &dev_count);
1037
1038 for (i = 0; i < dev_count; ++i) {
1039 hsa_status_t rocp_errno = rocp_read_p(rocp_ctx->u.sampling.contexts[i], 0);
1040 if (rocp_errno != HSA_STATUS_SUCCESS) {
1041 return PAPI_EMISC;
1042 }
1043
1044 rocp_errno = rocp_get_data_p(rocp_ctx->u.sampling.contexts[i], 0);
1045 if (rocp_errno != HSA_STATUS_SUCCESS) {
1046 return PAPI_EMISC;
1047 }
1048
1049 rocp_errno = rocp_get_metrics_p(rocp_ctx->u.sampling.contexts[i]);
1050 if (rocp_errno != HSA_STATUS_SUCCESS) {
1051 return PAPI_EMISC;
1052 }
1053
1054 int papi_errno = rocc_dev_get_id(rocp_ctx->u.sampling.device_map, i, &dev_id);
1055 if (papi_errno != PAPI_OK) {
1056 return papi_errno;
1057 }
1058
1059 int dev_feature_count = ctx_get_dev_feature_count(rocp_ctx, dev_id);
1060 rocprofiler_feature_t *dev_features = features + dev_feature_offset;
1061 long long *counters = rocp_ctx->u.sampling.counters;
1062
1063 for (j = 0; j < dev_feature_count; ++j) {
1064 switch(dev_features[j].data.kind) {
1065 case ROCPROFILER_DATA_KIND_INT32:
1066 counters[k++] = (long long) dev_features[j].data.result_int32;
1067 break;
1068 case ROCPROFILER_DATA_KIND_INT64:
1069 counters[k++] = dev_features[j].data.result_int64;
1070 break;
1071 case ROCPROFILER_DATA_KIND_FLOAT:
1072 counters[k++] = (long long) dev_features[j].data.result_float;
1073 break;
1074 case ROCPROFILER_DATA_KIND_DOUBLE:
1075 counters[k++] = (long long) dev_features[j].data.result_double;
1076 break;
1077 default:
1078 return PAPI_EINVAL;
1079 }
1080 }
1081 dev_feature_offset += dev_feature_count;
1082 }
1083 *counts = rocp_ctx->u.sampling.counters;
1084
1085 return PAPI_OK;
1086}
Here is the call graph for this function:
Here is the caller graph for this function:

◆ sampling_ctx_reset()

int sampling_ctx_reset ( rocp_ctx_t  rocp_ctx)
static

Definition at line 1089 of file roc_profiler.c.

1090{
1091 int i, devs_count;
1092 rocc_dev_get_count(rocp_ctx->u.sampling.device_map, &devs_count);
1093
1094 for (i = 0; i < devs_count; ++i) {
1095 hsa_status_t rocp_errno = rocp_reset_p(rocp_ctx->u.sampling.contexts[i], 0);
1096 if (rocp_errno != HSA_STATUS_SUCCESS) {
1097 return PAPI_EMISC;
1098 }
1099 }
1100 for (i = 0; i < rocp_ctx->u.sampling.feature_count; ++i) {
1101 rocp_ctx->u.sampling.counters[i] = 0;
1102 }
1103 return PAPI_OK;
1104}
Here is the call graph for this function:
Here is the caller graph for this function:

◆ sampling_ctx_start()

int sampling_ctx_start ( rocp_ctx_t  rocp_ctx)
static

Definition at line 973 of file roc_profiler.c.

974{
975 if (!(rocp_ctx->u.sampling.state & ROCM_EVENTS_OPENED)) {
976 SUBDBG("[ROCP sampling mode] Cannot start eventset, not opened.");
977 return PAPI_EINVAL;
978 }
979
980 if (rocp_ctx->u.sampling.state & ROCM_EVENTS_RUNNING) {
981 SUBDBG("[ROCP sampling mode] Cannot start eventset, already running.");
982 return PAPI_EINVAL;
983 }
984
985 int devs_count;
986 rocc_dev_get_count(rocp_ctx->u.sampling.device_map, &devs_count);
987
988 int i;
989 for (i = 0; i < devs_count; ++i) {
990 hsa_status_t rocp_errno = rocp_start_p(rocp_ctx->u.sampling.contexts[i], 0);
991 if (rocp_errno != HSA_STATUS_SUCCESS) {
992 return PAPI_EMISC;
993 }
994 }
995
996 rocp_ctx->u.sampling.state |= ROCM_EVENTS_RUNNING;
997 return PAPI_OK;
998}
Here is the call graph for this function:
Here is the caller graph for this function:

◆ sampling_ctx_stop()

int sampling_ctx_stop ( rocp_ctx_t  rocp_ctx)
static

Definition at line 1001 of file roc_profiler.c.

1002{
1003 if (!(rocp_ctx->u.sampling.state & ROCM_EVENTS_OPENED)) {
1004 SUBDBG("[ROCP sampling mode] Cannot stop eventset, not opened.");
1005 return PAPI_EINVAL;
1006 }
1007
1008 if (!(rocp_ctx->u.sampling.state & ROCM_EVENTS_RUNNING)) {
1009 SUBDBG("[ROCP sampling mode] Cannot stop eventset, not running.");
1010 return PAPI_EINVAL;
1011 }
1012
1013 int devs_count;
1014 rocc_dev_get_count(rocp_ctx->u.sampling.device_map, &devs_count);
1015
1016 int i;
1017 for (i = 0; i < devs_count; ++i) {
1018 hsa_status_t rocp_errno = rocp_stop_p(rocp_ctx->u.sampling.contexts[i], 0);
1019 if (rocp_errno != HSA_STATUS_SUCCESS) {
1020 return PAPI_EMISC;
1021 }
1022 }
1023
1024 rocp_ctx->u.sampling.state &= ~ROCM_EVENTS_RUNNING;
1025 return PAPI_OK;
1026}
Here is the call graph for this function:
Here is the caller graph for this function:

◆ sampling_shutdown()

int sampling_shutdown ( void  )
static

Definition at line 1113 of file roc_profiler.c.

1114{
1117
1119
1120 return PAPI_OK;
1121}
Here is the call graph for this function:
Here is the caller graph for this function:

◆ save_callback_features()

int save_callback_features ( rocprofiler_feature_t *  features,
int  feature_count 
)
static

Definition at line 1906 of file roc_profiler.c.

1907{
1908 int i;
1909 for (i = 0; i < feature_count; ++i) {
1911 }
1912 return PAPI_OK;
1913}
Here is the call graph for this function:
Here is the caller graph for this function:

◆ shutdown_event_table()

int shutdown_event_table ( void  )
static

rocp_shutdown sampling mode utility functions

Definition at line 1124 of file roc_profiler.c.

1125{
1126 int i;
1127
1128 for (i = 0; i < ntv_table_p->count; ++i) {
1131 }
1132
1133 ntv_table_p->count = 0;
1134
1136
1137 return PAPI_OK;
1138}
Here is the caller graph for this function:

◆ unload_rocp_sym()

int unload_rocp_sym ( void  )
static

Definition at line 545 of file roc_profiler.c.

546{
547 if (rocp_dlp == NULL) {
548 return PAPI_OK;
549 }
550
551 rocp_get_info_p = NULL;
552 rocp_iterate_info_p = NULL;
553 rocp_error_string_p = NULL;
554 rocp_open_p = NULL;
555 rocp_close_p = NULL;
556 rocp_group_count_p = NULL;
557 rocp_start_p = NULL;
558 rocp_read_p = NULL;
559 rocp_stop_p = NULL;
560 rocp_get_group_p = NULL;
561 rocp_get_data_p = NULL;
563 rocp_get_metrics_p = NULL;
564 rocp_reset_p = NULL;
565 rocp_pool_open_p = NULL;
566 rocp_pool_close_p = NULL;
567 rocp_pool_fetch_p = NULL;
568 rocp_pool_flush_p = NULL;
573
574 dlclose(rocp_dlp);
575
576 return PAPI_OK;
577}
Here is the caller graph for this function:

◆ unregister_dispatch_counter()

int unregister_dispatch_counter ( unsigned long  tid)
static

Definition at line 2075 of file roc_profiler.c.

2076{
2077 int papi_errno = PAPI_OK;
2078 int htable_errno = HTABLE_SUCCESS;
2079 char key[PAPI_MIN_STR_LEN] = { 0 };
2080
2081 sprintf(key, "%lu", tid);
2082 int *counter_p;
2083 htable_errno = htable_find(htable, (const char *) key, (void **) &counter_p);
2084 if (htable_errno != HTABLE_SUCCESS) {
2085 papi_errno = PAPI_ECMP;
2086 goto fn_exit;
2087 }
2088
2089 htable_delete(htable, (const char *) key);
2090 --intercept_global_state.active_thread_count;
2091
2092 fn_exit:
2093 return papi_errno;
2094}
Here is the call graph for this function:
Here is the caller graph for this function:

◆ verify_events()

int verify_events ( uint64_t *  events_id,
int  num_events 
)
static

intercept_ctx_{open,close} utility functions

Definition at line 1718 of file roc_profiler.c.

1719{
1720 int papi_errno = PAPI_OK;
1721 int i;
1722 char name[PAPI_MAX_STR_LEN] = { 0 };
1723
1724 if (intercept_global_state.events_id == NULL) {
1725 return papi_errno;
1726 }
1727
1728 for (i = 0; i < num_events; ++i) {
1729 event_info_t info;
1730 papi_errno = evt_id_to_info(events_id[i], &info);
1731 if (papi_errno != PAPI_OK) {
1732 break;
1733 }
1734 if (ntv_table_p->events[info.nameid].instances > 1) {
1735 sprintf(name, "%s[%i]", ntv_table_p->events[info.nameid].name, info.instance);
1736 } else {
1737 sprintf(name, "%s", ntv_table_p->events[info.nameid].name);
1738 }
1739 void *p;
1741 papi_errno = PAPI_ECNFLCT;
1742 break;
1743 }
1744 }
1745
1746 return papi_errno;
1747}
#define PAPI_ECNFLCT
Definition: f90papi.h:234
Here is the call graph for this function:
Here is the caller graph for this function:

Variable Documentation

◆ _rocm_lock

unsigned int _rocm_lock

Definition at line 85 of file roc_profiler.c.

◆ active_thread_count

int active_thread_count

Definition at line 1451 of file roc_profiler.c.

◆ cb_ctx_list_heads

cb_context_node_t* cb_ctx_list_heads[PAPI_ROCM_MAX_DEV_COUNT]
static

The context handler prepares a node for every processes entry. The node is associated with the thread that generated the monitoring request and contains the value of the counters read by rocprofiler. Each node is then added to the corresponding device queue and is eventually read by intercept_ctx_read

Definition at line 2111 of file roc_profiler.c.

◆ cb_dispatch_arg

cb_dispatch_arg_t cb_dispatch_arg
static

Definition at line 1685 of file roc_profiler.c.

◆ events_id

uint64_t* events_id

Definition at line 1448 of file roc_profiler.c.

◆ feature_count

int feature_count

Definition at line 1450 of file roc_profiler.c.

◆ features

rocprofiler_feature_t* features

Definition at line 1449 of file roc_profiler.c.

◆ htable

void* htable
static

Definition at line 147 of file roc_profiler.c.

◆ htable_intercept

void* htable_intercept
static

Definition at line 148 of file roc_profiler.c.

◆ 

struct { ... } intercept_global_state

◆ kernel_count

int kernel_count

Definition at line 1452 of file roc_profiler.c.

◆ ntv_table

ntv_event_table_t ntv_table
static

Definition at line 145 of file roc_profiler.c.

◆ ntv_table_p

ntv_event_table_t* ntv_table_p
static

Definition at line 146 of file roc_profiler.c.

◆ rocm_prof_mode

unsigned int rocm_prof_mode

Definition at line 84 of file roc_profiler.c.

◆ rocp_close_p

hsa_status_t(* rocp_close_p) (rocprofiler_t *) ( rocprofiler_t *  )
static

Definition at line 94 of file roc_profiler.c.

◆ rocp_dlp

void* rocp_dlp = NULL
static

Definition at line 144 of file roc_profiler.c.

◆ rocp_error_string_p

hsa_status_t(* rocp_error_string_p) (const char **) ( const char **  )
static

Definition at line 90 of file roc_profiler.c.

◆ rocp_get_data_p

hsa_status_t(* rocp_get_data_p) (rocprofiler_t *, uint32_t) ( rocprofiler_t *  ,
uint32_t   
)
static

Definition at line 100 of file roc_profiler.c.

◆ rocp_get_group_p

hsa_status_t(* rocp_get_group_p) (rocprofiler_t *, uint32_t, rocprofiler_group_t *) ( rocprofiler_t *  ,
uint32_t  ,
rocprofiler_group_t *   
)
static

Definition at line 99 of file roc_profiler.c.

◆ rocp_get_info_p

hsa_status_t(* rocp_get_info_p) (const hsa_agent_t *, rocprofiler_info_kind_t, void *) ( const hsa_agent_t *  ,
rocprofiler_info_kind_t  ,
void *   
)
static

Definition at line 88 of file roc_profiler.c.

◆ rocp_get_metrics_p

hsa_status_t(* rocp_get_metrics_p) (const rocprofiler_t *) ( const rocprofiler_t *  )
static

Definition at line 102 of file roc_profiler.c.

◆ rocp_group_count_p

hsa_status_t(* rocp_group_count_p) (const rocprofiler_t *, uint32_t *) ( const rocprofiler_t *  ,
uint32_t *   
)
static

Definition at line 95 of file roc_profiler.c.

◆ rocp_group_get_data_p

hsa_status_t(* rocp_group_get_data_p) (rocprofiler_group_t *) ( rocprofiler_group_t *  )
static

Definition at line 101 of file roc_profiler.c.

◆ rocp_iterate_info_p

hsa_status_t(* rocp_iterate_info_p) (const hsa_agent_t *, rocprofiler_info_kind_t, hsa_status_t(*)(const rocprofiler_info_data_t, void *), void *) ( const hsa_agent_t *  ,
rocprofiler_info_kind_t  ,
hsa_status_t(*)(const rocprofiler_info_data_t, void *)  ,
void *   
)
static

Definition at line 89 of file roc_profiler.c.

◆ rocp_open_p

hsa_status_t(* rocp_open_p) (hsa_agent_t, rocprofiler_feature_t *, uint32_t, rocprofiler_t **, uint32_t, rocprofiler_properties_t *) ( hsa_agent_t  ,
rocprofiler_feature_t *  ,
uint32_t  ,
rocprofiler_t **  ,
uint32_t  ,
rocprofiler_properties_t *   
)
static

Definition at line 93 of file roc_profiler.c.

◆ rocp_pool_close_p

hsa_status_t(* rocp_pool_close_p) (rocprofiler_pool_t *) ( rocprofiler_pool_t *  )
static

Definition at line 107 of file roc_profiler.c.

◆ rocp_pool_fetch_p

hsa_status_t(* rocp_pool_fetch_p) (rocprofiler_pool_t *, rocprofiler_pool_entry_t *) ( rocprofiler_pool_t *  ,
rocprofiler_pool_entry_t *   
)
static

Definition at line 108 of file roc_profiler.c.

◆ rocp_pool_flush_p

hsa_status_t(* rocp_pool_flush_p) (rocprofiler_pool_t *) ( rocprofiler_pool_t *  )
static

Definition at line 109 of file roc_profiler.c.

◆ rocp_pool_open_p

hsa_status_t(* rocp_pool_open_p) (hsa_agent_t, rocprofiler_feature_t *, uint32_t, rocprofiler_pool_t **, uint32_t, rocprofiler_pool_properties_t *) ( hsa_agent_t  ,
rocprofiler_feature_t *  ,
uint32_t  ,
rocprofiler_pool_t **  ,
uint32_t  ,
rocprofiler_pool_properties_t *   
)
static

Definition at line 106 of file roc_profiler.c.

◆ rocp_read_p

hsa_status_t(* rocp_read_p) (rocprofiler_t *, uint32_t) ( rocprofiler_t *  ,
uint32_t   
)
static

Definition at line 97 of file roc_profiler.c.

◆ rocp_remove_queue_cbs_p

hsa_status_t(* rocp_remove_queue_cbs_p) (void) ( void  )
static

Definition at line 113 of file roc_profiler.c.

◆ rocp_reset_p

hsa_status_t(* rocp_reset_p) (rocprofiler_t *, uint32_t) ( rocprofiler_t *  ,
uint32_t   
)
static

Definition at line 103 of file roc_profiler.c.

◆ rocp_set_queue_cbs_p

hsa_status_t(* rocp_set_queue_cbs_p) (rocprofiler_queue_callbacks_t, void *) ( rocprofiler_queue_callbacks_t  ,
void *   
)
static

Definition at line 110 of file roc_profiler.c.

◆ rocp_start_p

hsa_status_t(* rocp_start_p) (rocprofiler_t *, uint32_t) ( rocprofiler_t *  ,
uint32_t   
)
static

Definition at line 96 of file roc_profiler.c.

◆ rocp_start_queue_cbs_p

hsa_status_t(* rocp_start_queue_cbs_p) (void) ( void  )
static

Definition at line 111 of file roc_profiler.c.

◆ rocp_stop_p

hsa_status_t(* rocp_stop_p) (rocprofiler_t *, uint32_t) ( rocprofiler_t *  ,
uint32_t   
)
static

Definition at line 98 of file roc_profiler.c.

◆ rocp_stop_queue_cbs_p

hsa_status_t(* rocp_stop_queue_cbs_p) (void) ( void  )
static

Definition at line 112 of file roc_profiler.c.