23#include "hsa_ext_amd.h"
25static void *rocm_dlp = NULL;
27static hsa_status_t (*hsa_initPtr)( void ) = NULL;
28static hsa_status_t (*hsa_shut_downPtr)( void ) = NULL;
29static hsa_status_t (*hsa_iterate_agentsPtr)( hsa_status_t (*)(hsa_agent_t agent,
32static hsa_status_t (*hsa_agent_get_infoPtr)( hsa_agent_t agent,
33 hsa_agent_info_t attribute,
35static hsa_status_t (*hsa_amd_agent_iterate_memory_poolsPtr)( hsa_agent_t agent,
36 hsa_status_t (*)(hsa_amd_memory_pool_t pool,
39static hsa_status_t (*hsa_amd_memory_pool_get_infoPtr)( hsa_amd_memory_pool_t pool,
40 hsa_amd_memory_pool_info_t attribute,
42static hsa_status_t (*hsa_status_stringPtr)( hsa_status_t status,
43 const char **string ) = NULL;
45#define ROCM_CALL(call, err_handle) do { \
46 hsa_status_t _status = (call); \
47 if (_status == HSA_STATUS_SUCCESS || \
48 _status == HSA_STATUS_INFO_BREAK) \
53static hsa_status_t count_devices( hsa_agent_t agent,
void *data );
54static hsa_status_t get_device_count(
int *
count );
55static hsa_status_t get_device_memory( hsa_amd_memory_pool_t pool,
void *info );
56static hsa_status_t get_device_properties( hsa_agent_t agent,
void *info );
59static int hsa_is_enabled(
void );
69static rsmi_status_t (*rsmi_initPtr)(
unsigned long init_flags ) = NULL;
70static rsmi_status_t (*rsmi_shut_downPtr)( void ) = NULL;
71static rsmi_status_t (*rsmi_dev_pci_id_getPtr)(
unsigned int dev_idx,
unsigned long *bdfid ) = NULL;
73#define ROCM_SMI_CALL(call, err_handle) do { \
74 rsmi_status_t _status = (call); \
75 if (_status == RSMI_STATUS_SUCCESS) \
81static int rsmi_is_enabled(
void );
88count_devices( hsa_agent_t agent,
void *data )
90 int *
count = (
int *) data;
92 hsa_device_type_t
type;
93 ROCM_CALL((*hsa_agent_get_infoPtr)(agent, HSA_AGENT_INFO_DEVICE, &
type),
96 if (
type == HSA_DEVICE_TYPE_GPU) {
100 return HSA_STATUS_SUCCESS;
104get_device_count(
int *
count )
108 ROCM_CALL((*hsa_iterate_agentsPtr)(&count_devices,
count),
111 return HSA_STATUS_SUCCESS;
115get_device_memory( hsa_amd_memory_pool_t pool,
void *info )
117 hsa_region_segment_t seg_info;
120 ROCM_CALL((*hsa_amd_memory_pool_get_infoPtr)(pool,
121 HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
125 if (seg_info == HSA_REGION_SEGMENT_GROUP) {
126 ROCM_CALL((*hsa_amd_memory_pool_get_infoPtr)(pool,
127 HSA_AMD_MEMORY_POOL_INFO_SIZE,
130 return HSA_STATUS_INFO_BREAK;
133 return HSA_STATUS_SUCCESS;
137get_device_properties( hsa_agent_t agent,
void *info )
141 hsa_device_type_t
type;
142 ROCM_CALL((*hsa_agent_get_infoPtr)(agent, HSA_AGENT_INFO_DEVICE, &
type),
145 if (
type == HSA_DEVICE_TYPE_GPU) {
149 ROCM_CALL((*hsa_agent_get_infoPtr)(agent,
153 ROCM_CALL((*hsa_agent_get_infoPtr)(agent,
154 HSA_AGENT_INFO_WAVEFRONT_SIZE,
157 unsigned short wg_dims[3];
158 ROCM_CALL((*hsa_agent_get_infoPtr)(agent,
159 HSA_AGENT_INFO_WORKGROUP_MAX_DIM,
162 ROCM_CALL((*hsa_agent_get_infoPtr)(agent,
163 HSA_AGENT_INFO_WORKGROUP_MAX_SIZE,
167 ROCM_CALL((*hsa_agent_get_infoPtr)(agent,
168 HSA_AGENT_INFO_GRID_MAX_DIM,
171 ROCM_CALL((*hsa_agent_get_infoPtr)(agent,
172 HSA_AGENT_INFO_VERSION_MAJOR,
175 ROCM_CALL((*hsa_agent_get_infoPtr)(agent,
176 HSA_AGENT_INFO_VERSION_MINOR,
179 ROCM_CALL((*hsa_agent_get_infoPtr)(agent, (hsa_agent_info_t)
180 HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU,
183 ROCM_CALL((*hsa_agent_get_infoPtr)(agent, (hsa_agent_info_t)
184 HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT,
187 ROCM_CALL((*hsa_agent_get_infoPtr)(agent, (hsa_agent_info_t)
188 HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU,
191 ROCM_CALL((*hsa_amd_agent_iterate_memory_poolsPtr)(agent,
206 return HSA_STATUS_SUCCESS;
212 hsa_status_t status = HSA_STATUS_SUCCESS;
213 const char *
string = NULL;
215 ROCM_CALL((*hsa_iterate_agentsPtr)(&get_device_properties, dev_info),
218 if (status != HSA_STATUS_SUCCESS) {
219 (*hsa_status_stringPtr)(status, &string);
220 SUBDBG(
"error: %s\n",
string );
225hsa_is_enabled(
void )
227 return (hsa_initPtr != NULL &&
228 hsa_shut_downPtr != NULL &&
229 hsa_iterate_agentsPtr != NULL &&
230 hsa_agent_get_infoPtr != NULL &&
231 hsa_amd_agent_iterate_memory_poolsPtr != NULL &&
232 hsa_amd_memory_pool_get_infoPtr != NULL &&
233 hsa_status_stringPtr != NULL);
239 char pathname[PATH_MAX] =
"libhsa-runtime64.so";
240 char *rocm_root = getenv(
"PAPI_ROCM_ROOT");
241 if (rocm_root != NULL) {
242 sprintf(pathname,
"%s/lib/libhsa-runtime64.so", rocm_root);
245 rocm_dlp = dlopen(pathname, RTLD_NOW | RTLD_GLOBAL);
246 if (rocm_dlp == NULL) {
249 SUBDBG(
"Status string truncated.");
254 hsa_initPtr = dlsym(rocm_dlp,
"hsa_init");
255 hsa_shut_downPtr = dlsym(rocm_dlp,
"hsa_shut_down");
256 hsa_iterate_agentsPtr = dlsym(rocm_dlp,
"hsa_iterate_agents");
257 hsa_agent_get_infoPtr = dlsym(rocm_dlp,
"hsa_agent_get_info");
258 hsa_amd_agent_iterate_memory_poolsPtr = dlsym(rocm_dlp,
"hsa_amd_agent_iterate_memory_pools");
259 hsa_amd_memory_pool_get_infoPtr = dlsym(rocm_dlp,
"hsa_amd_memory_pool_get_info");
260 hsa_status_stringPtr = dlsym(rocm_dlp,
"hsa_status_string");
262 if (!hsa_is_enabled() || (*hsa_initPtr)()) {
263 const char *message =
"dlsym() of HSA symbols failed or hsa_init() "
267 SUBDBG(
"Status string truncated.");
278 if (rocm_dlp != NULL) {
279 (*hsa_shut_downPtr)();
284 hsa_shut_downPtr = NULL;
285 hsa_iterate_agentsPtr = NULL;
286 hsa_agent_get_infoPtr = NULL;
287 hsa_amd_agent_iterate_memory_poolsPtr = NULL;
288 hsa_amd_memory_pool_get_infoPtr = NULL;
289 hsa_status_stringPtr = NULL;
291 return hsa_is_enabled();
300 for (dev = 0; dev < dev_count; ++dev) {
302 ROCM_SMI_CALL((*rsmi_dev_pci_id_getPtr)(dev, &uid),
return);
310rsmi_is_enabled(
void )
312 return (rsmi_initPtr != NULL &&
313 rsmi_shut_downPtr != NULL &&
314 rsmi_dev_pci_id_getPtr != NULL);
320 char pathname[PATH_MAX] =
"librocm_smi64.so";
321 char *rsmi_root = getenv(
"PAPI_ROCM_ROOT");
322 if (rsmi_root != NULL) {
323 sprintf(pathname,
"%s/lib/librocm_smi64.so", rsmi_root);
326 rsmi_dlp = dlopen(pathname, RTLD_NOW | RTLD_GLOBAL);
330 SUBDBG(
"Status string truncated.");
335 rsmi_initPtr = dlsym(
rsmi_dlp,
"rsmi_init");
336 rsmi_shut_downPtr = dlsym(
rsmi_dlp,
"rsmi_shut_down");
337 rsmi_dev_pci_id_getPtr = dlsym(
rsmi_dlp,
"rsmi_dev_pci_id_get");
339 if (!rsmi_is_enabled() || (*rsmi_initPtr)(0)) {
340 const char *message =
"dlsym() of RSMI symbols failed or rsmi_init() "
344 SUBDBG(
"Status string truncated.");
356 (*rsmi_shut_downPtr)();
361 rsmi_shut_downPtr = NULL;
362 rsmi_dev_pci_id_getPtr = NULL;
364 return rsmi_is_enabled();
371 memset(dev_type_info, 0,
sizeof(*dev_type_info));
373 strcpy(dev_type_info->
vendor,
"AMD/ATI");
374 strcpy(dev_type_info->
status,
"Device Initialized");
382 hsa_status_t status = get_device_count(&dev_count);
383 if (status != HSA_STATUS_SUCCESS) {
384 if (status != HSA_STATUS_ERROR_NOT_INITIALIZED) {
386 (*hsa_status_stringPtr)(status, &string);
387 printf(
"error: %s\n",
string );
398 fill_dev_affinity_info(arr, dev_count);
402 const char *message =
"RSMI not configured, no device affinity available";
405 SUBDBG(
"Error message truncated.");
412 const char *message =
"ROCm not configured, no ROCm device available";
415 SUBDBG(
"Error message truncated.");
void close_amd_gpu_dev_type(_sysdetect_dev_type_info_t *dev_type_info)
void open_amd_gpu_dev_type(_sysdetect_dev_type_info_t *dev_type_info)
#define PAPI_DEV_TYPE_ID__ROCM
#define SUBDBG(format, args...)
#define papi_calloc(a, b)
static int unload_hsa_sym(void)
static int load_hsa_sym(void)
static int load_rsmi_sym(void)
static int unload_rsmi_sym(void)
char status[PAPI_MAX_STR_LEN]
char vendor[PAPI_MAX_STR_LEN]
_sysdetect_dev_info_u * dev_info_arr
unsigned short max_workgroup_dim_x
unsigned int compute_unit_count
unsigned int max_shmmem_per_workgroup
unsigned int max_threads_per_workgroup
unsigned short max_workgroup_dim_y
unsigned int wavefront_size
unsigned short max_workgroup_dim_z
unsigned int simd_per_compute_unit
char name[PAPI_2MAX_STR_LEN]
unsigned int max_waves_per_compute_unit
struct _sysdetect_gpu_info_u::@8 amd