PAPI 7.1.0.0
Loading...
Searching...
No Matches
amd_gpu.c
Go to the documentation of this file.
1
12#include <stdio.h>
13#include <string.h>
14#include <stdlib.h>
15#include <dlfcn.h>
16#include <errno.h>
17
18#include "sysdetect.h"
19#include "amd_gpu.h"
20
21#ifdef HAVE_ROCM
22#include "hsa.h"
23#include "hsa_ext_amd.h"
24
25static void *rocm_dlp = NULL;
26
27static hsa_status_t (*hsa_initPtr)( void ) = NULL;
28static hsa_status_t (*hsa_shut_downPtr)( void ) = NULL;
29static hsa_status_t (*hsa_iterate_agentsPtr)( hsa_status_t (*)(hsa_agent_t agent,
30 void *value),
31 void *value ) = NULL;
32static hsa_status_t (*hsa_agent_get_infoPtr)( hsa_agent_t agent,
33 hsa_agent_info_t attribute,
34 void *value ) = NULL;
35static hsa_status_t (*hsa_amd_agent_iterate_memory_poolsPtr)( hsa_agent_t agent,
36 hsa_status_t (*)(hsa_amd_memory_pool_t pool,
37 void *value),
38 void *value ) = NULL;
39static hsa_status_t (*hsa_amd_memory_pool_get_infoPtr)( hsa_amd_memory_pool_t pool,
40 hsa_amd_memory_pool_info_t attribute,
41 void *value ) = NULL;
42static hsa_status_t (*hsa_status_stringPtr)( hsa_status_t status,
43 const char **string ) = NULL;
44
45#define ROCM_CALL(call, err_handle) do { \
46 hsa_status_t _status = (call); \
47 if (_status == HSA_STATUS_SUCCESS || \
48 _status == HSA_STATUS_INFO_BREAK) \
49 break; \
50 err_handle; \
51} while(0)
52
53static hsa_status_t count_devices( hsa_agent_t agent, void *data );
54static hsa_status_t get_device_count( int *count );
55static hsa_status_t get_device_memory( hsa_amd_memory_pool_t pool, void *info );
56static hsa_status_t get_device_properties( hsa_agent_t agent, void *info );
57
58static void fill_dev_info( _sysdetect_gpu_info_u *dev_info );
59static int hsa_is_enabled( void );
60static int load_hsa_sym( char *status );
61static int unload_hsa_sym( void );
62#endif /* HAVE_ROCM */
63
64#ifdef HAVE_ROCM_SMI
65#include "rocm_smi.h"
66
67static void *rsmi_dlp = NULL;
68
69static rsmi_status_t (*rsmi_initPtr)( unsigned long init_flags ) = NULL;
70static rsmi_status_t (*rsmi_shut_downPtr)( void ) = NULL;
71static rsmi_status_t (*rsmi_dev_pci_id_getPtr)( unsigned int dev_idx, unsigned long *bdfid ) = NULL;
72
73#define ROCM_SMI_CALL(call, err_handle) do { \
74 rsmi_status_t _status = (call); \
75 if (_status == RSMI_STATUS_SUCCESS) \
76 break; \
77 err_handle; \
78} while(0)
79
80static void fill_dev_affinity_info( _sysdetect_gpu_info_u *dev_info, int dev_count );
81static int rsmi_is_enabled( void );
82static int load_rsmi_sym( char *status );
83static int unload_rsmi_sym( void );
84#endif /* HAVE_ROCM_SMI */
85
86#ifdef HAVE_ROCM
87hsa_status_t
88count_devices( hsa_agent_t agent, void *data )
89{
90 int *count = (int *) data;
91
92 hsa_device_type_t type;
93 ROCM_CALL((*hsa_agent_get_infoPtr)(agent, HSA_AGENT_INFO_DEVICE, &type),
94 return _status);
95
96 if (type == HSA_DEVICE_TYPE_GPU) {
97 ++(*count);
98 }
99
100 return HSA_STATUS_SUCCESS;
101}
102
103hsa_status_t
104get_device_count( int *count )
105{
106 *count = 0;
107
108 ROCM_CALL((*hsa_iterate_agentsPtr)(&count_devices, count),
109 return _status);
110
111 return HSA_STATUS_SUCCESS;
112}
113
114hsa_status_t
115get_device_memory( hsa_amd_memory_pool_t pool, void *info )
116{
117 hsa_region_segment_t seg_info;
118 _sysdetect_gpu_info_u *dev_info = info;
119
120 ROCM_CALL((*hsa_amd_memory_pool_get_infoPtr)(pool,
121 HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
122 &seg_info),
123 return _status);
124
125 if (seg_info == HSA_REGION_SEGMENT_GROUP) {
126 ROCM_CALL((*hsa_amd_memory_pool_get_infoPtr)(pool,
127 HSA_AMD_MEMORY_POOL_INFO_SIZE,
128 &dev_info->amd.max_shmmem_per_workgroup),
129 return _status);
130 return HSA_STATUS_INFO_BREAK;
131 }
132
133 return HSA_STATUS_SUCCESS;
134}
135
136hsa_status_t
137get_device_properties( hsa_agent_t agent, void *info )
138{
139 static int count;
140
141 hsa_device_type_t type;
142 ROCM_CALL((*hsa_agent_get_infoPtr)(agent, HSA_AGENT_INFO_DEVICE, &type),
143 return _status);
144
145 if (type == HSA_DEVICE_TYPE_GPU) {
146 /* query attributes for this device */
147 _sysdetect_gpu_info_u *dev_info = &((_sysdetect_gpu_info_u *) info)[count];
148
149 ROCM_CALL((*hsa_agent_get_infoPtr)(agent,
150 HSA_AGENT_INFO_NAME,
151 dev_info->amd.name),
152 return _status);
153 ROCM_CALL((*hsa_agent_get_infoPtr)(agent,
154 HSA_AGENT_INFO_WAVEFRONT_SIZE,
155 &dev_info->amd.wavefront_size),
156 return _status);
157 unsigned short wg_dims[3];
158 ROCM_CALL((*hsa_agent_get_infoPtr)(agent,
159 HSA_AGENT_INFO_WORKGROUP_MAX_DIM,
160 wg_dims),
161 return _status);
162 ROCM_CALL((*hsa_agent_get_infoPtr)(agent,
163 HSA_AGENT_INFO_WORKGROUP_MAX_SIZE,
164 &dev_info->amd.max_threads_per_workgroup),
165 return _status);
166 hsa_dim3_t gr_dims;
167 ROCM_CALL((*hsa_agent_get_infoPtr)(agent,
168 HSA_AGENT_INFO_GRID_MAX_DIM,
169 &gr_dims),
170 return _status);
171 ROCM_CALL((*hsa_agent_get_infoPtr)(agent,
172 HSA_AGENT_INFO_VERSION_MAJOR,
173 &dev_info->amd.major),
174 return _status);
175 ROCM_CALL((*hsa_agent_get_infoPtr)(agent,
176 HSA_AGENT_INFO_VERSION_MINOR,
177 &dev_info->amd.minor),
178 return _status);
179 ROCM_CALL((*hsa_agent_get_infoPtr)(agent, (hsa_agent_info_t)
180 HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU,
181 &dev_info->amd.simd_per_compute_unit),
182 return _status);
183 ROCM_CALL((*hsa_agent_get_infoPtr)(agent, (hsa_agent_info_t)
184 HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT,
185 &dev_info->amd.compute_unit_count),
186 return _status);
187 ROCM_CALL((*hsa_agent_get_infoPtr)(agent, (hsa_agent_info_t)
188 HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU,
190 return _status);
191 ROCM_CALL((*hsa_amd_agent_iterate_memory_poolsPtr)(agent,
192 &get_device_memory,
193 dev_info),
194 return _status);
195
196 dev_info->amd.max_workgroup_dim_x = wg_dims[0];
197 dev_info->amd.max_workgroup_dim_y = wg_dims[1];
198 dev_info->amd.max_workgroup_dim_z = wg_dims[2];
199 dev_info->amd.max_grid_dim_x = gr_dims.x;
200 dev_info->amd.max_grid_dim_y = gr_dims.y;
201 dev_info->amd.max_grid_dim_z = gr_dims.z;
202
203 ++count;
204 }
205
206 return HSA_STATUS_SUCCESS;
207}
208
209void
210fill_dev_info( _sysdetect_gpu_info_u *dev_info )
211{
212 hsa_status_t status = HSA_STATUS_SUCCESS;
213 const char *string = NULL;
214
215 ROCM_CALL((*hsa_iterate_agentsPtr)(&get_device_properties, dev_info),
216 status = _status);
217
218 if (status != HSA_STATUS_SUCCESS) {
219 (*hsa_status_stringPtr)(status, &string);
220 SUBDBG( "error: %s\n", string );
221 }
222}
223
224int
225hsa_is_enabled( void )
226{
227 return (hsa_initPtr != NULL &&
228 hsa_shut_downPtr != NULL &&
229 hsa_iterate_agentsPtr != NULL &&
230 hsa_agent_get_infoPtr != NULL &&
231 hsa_amd_agent_iterate_memory_poolsPtr != NULL &&
232 hsa_amd_memory_pool_get_infoPtr != NULL &&
233 hsa_status_stringPtr != NULL);
234}
235
236int
237load_hsa_sym( char *status )
238{
239 char pathname[PATH_MAX] = "libhsa-runtime64.so";
240 char *rocm_root = getenv("PAPI_ROCM_ROOT");
241 if (rocm_root != NULL) {
242 sprintf(pathname, "%s/lib/libhsa-runtime64.so", rocm_root);
243 }
244
245 rocm_dlp = dlopen(pathname, RTLD_NOW | RTLD_GLOBAL);
246 if (rocm_dlp == NULL) {
247 int count = snprintf(status, PAPI_MAX_STR_LEN, "%s", dlerror());
248 if (count >= PAPI_MAX_STR_LEN) {
249 SUBDBG("Status string truncated.");
250 }
251 return -1;
252 }
253
254 hsa_initPtr = dlsym(rocm_dlp, "hsa_init");
255 hsa_shut_downPtr = dlsym(rocm_dlp, "hsa_shut_down");
256 hsa_iterate_agentsPtr = dlsym(rocm_dlp, "hsa_iterate_agents");
257 hsa_agent_get_infoPtr = dlsym(rocm_dlp, "hsa_agent_get_info");
258 hsa_amd_agent_iterate_memory_poolsPtr = dlsym(rocm_dlp, "hsa_amd_agent_iterate_memory_pools");
259 hsa_amd_memory_pool_get_infoPtr = dlsym(rocm_dlp, "hsa_amd_memory_pool_get_info");
260 hsa_status_stringPtr = dlsym(rocm_dlp, "hsa_status_string");
261
262 if (!hsa_is_enabled() || (*hsa_initPtr)()) {
263 const char *message = "dlsym() of HSA symbols failed or hsa_init() "
264 "failed";
265 int count = snprintf(status, PAPI_MAX_STR_LEN, "%s", message);
266 if (count >= PAPI_MAX_STR_LEN) {
267 SUBDBG("Status string truncated.");
268 }
269 return -1;
270 }
271
272 return 0;
273}
274
275int
276unload_hsa_sym( void )
277{
278 if (rocm_dlp != NULL) {
279 (*hsa_shut_downPtr)();
280 dlclose(rocm_dlp);
281 }
282
283 hsa_initPtr = NULL;
284 hsa_shut_downPtr = NULL;
285 hsa_iterate_agentsPtr = NULL;
286 hsa_agent_get_infoPtr = NULL;
287 hsa_amd_agent_iterate_memory_poolsPtr = NULL;
288 hsa_amd_memory_pool_get_infoPtr = NULL;
289 hsa_status_stringPtr = NULL;
290
291 return hsa_is_enabled();
292}
293#endif /* HAVE_ROCM */
294
295#ifdef HAVE_ROCM_SMI
296void
297fill_dev_affinity_info( _sysdetect_gpu_info_u *info, int dev_count )
298{
299 int dev;
300 for (dev = 0; dev < dev_count; ++dev) {
301 unsigned long uid;
302 ROCM_SMI_CALL((*rsmi_dev_pci_id_getPtr)(dev, &uid), return);
303
304 _sysdetect_gpu_info_u *dev_info = &info[dev];
305 dev_info->amd.uid = uid;
306 }
307}
308
309int
310rsmi_is_enabled( void )
311{
312 return (rsmi_initPtr != NULL &&
313 rsmi_shut_downPtr != NULL &&
314 rsmi_dev_pci_id_getPtr != NULL);
315}
316
317int
318load_rsmi_sym( char *status )
319{
320 char pathname[PATH_MAX] = "librocm_smi64.so";
321 char *rsmi_root = getenv("PAPI_ROCM_ROOT");
322 if (rsmi_root != NULL) {
323 sprintf(pathname, "%s/lib/librocm_smi64.so", rsmi_root);
324 }
325
326 rsmi_dlp = dlopen(pathname, RTLD_NOW | RTLD_GLOBAL);
327 if (rsmi_dlp == NULL) {
328 int count = snprintf(status, PAPI_MAX_STR_LEN, "%s", dlerror());
329 if (count >= PAPI_MAX_STR_LEN) {
330 SUBDBG("Status string truncated.");
331 }
332 return -1;
333 }
334
335 rsmi_initPtr = dlsym(rsmi_dlp, "rsmi_init");
336 rsmi_shut_downPtr = dlsym(rsmi_dlp, "rsmi_shut_down");
337 rsmi_dev_pci_id_getPtr = dlsym(rsmi_dlp, "rsmi_dev_pci_id_get");
338
339 if (!rsmi_is_enabled() || (*rsmi_initPtr)(0)) {
340 const char *message = "dlsym() of RSMI symbols failed or rsmi_init() "
341 "failed";
342 int count = snprintf(status, PAPI_MAX_STR_LEN, "%s", message);
343 if (count >= PAPI_MAX_STR_LEN) {
344 SUBDBG("Status string truncated.");
345 }
346 return -1;
347 }
348
349 return 0;
350}
351
352int
353unload_rsmi_sym( void )
354{
355 if (rsmi_dlp != NULL) {
356 (*rsmi_shut_downPtr)();
357 dlclose(rsmi_dlp);
358 }
359
360 rsmi_initPtr = NULL;
361 rsmi_shut_downPtr = NULL;
362 rsmi_dev_pci_id_getPtr = NULL;
363
364 return rsmi_is_enabled();
365}
366#endif /* HAVE_ROCM_SMI */
367
368void
370{
371 memset(dev_type_info, 0, sizeof(*dev_type_info));
372 dev_type_info->id = PAPI_DEV_TYPE_ID__ROCM;
373 strcpy(dev_type_info->vendor, "AMD/ATI");
374 strcpy(dev_type_info->status, "Device Initialized");
375
376#ifdef HAVE_ROCM
377 if (load_hsa_sym(dev_type_info->status)) {
378 return;
379 }
380
381 int dev_count = 0;
382 hsa_status_t status = get_device_count(&dev_count);
383 if (status != HSA_STATUS_SUCCESS) {
384 if (status != HSA_STATUS_ERROR_NOT_INITIALIZED) {
385 const char *string;
386 (*hsa_status_stringPtr)(status, &string);
387 printf( "error: %s\n", string );
388 }
389 return;
390 }
391 dev_type_info->num_devices = dev_count;
392
393 _sysdetect_gpu_info_u *arr = papi_calloc(dev_count, sizeof(*arr));
394 fill_dev_info(arr);
395
396#ifdef HAVE_ROCM_SMI
397 if (!load_rsmi_sym(dev_type_info->status)) {
398 fill_dev_affinity_info(arr, dev_count);
400 }
401#else
402 const char *message = "RSMI not configured, no device affinity available";
403 int count = snprintf(dev_type_info->status, PAPI_MAX_STR_LEN, "%s", message);
404 if (count >= PAPI_MAX_STR_LEN) {
405 SUBDBG("Error message truncated.");
406 }
407#endif /* HAVE_ROCM_SMI */
408
410 dev_type_info->dev_info_arr = (_sysdetect_dev_info_u *)arr;
411#else
412 const char *message = "ROCm not configured, no ROCm device available";
413 int count = snprintf(dev_type_info->status, PAPI_MAX_STR_LEN, "%s", message);
414 if (count >= PAPI_MAX_STR_LEN) {
415 SUBDBG("Error message truncated.");
416 }
417#endif /* HAVE_ROCM */
418}
419
420void
422{
423 papi_free(dev_type_info->dev_info_arr);
424}
void close_amd_gpu_dev_type(_sysdetect_dev_type_info_t *dev_type_info)
Definition: amd_gpu.c:421
void open_amd_gpu_dev_type(_sysdetect_dev_type_info_t *dev_type_info)
Definition: amd_gpu.c:369
static long count
#define PAPI_MAX_STR_LEN
Definition: f90papi.h:77
#define PAPI_DEV_TYPE_ID__ROCM
Definition: f90papi.h:194
uint16_t type
#define SUBDBG(format, args...)
Definition: papi_debug.h:64
#define papi_calloc(a, b)
Definition: papi_memory.h:37
#define papi_free(a)
Definition: papi_memory.h:35
static int unload_hsa_sym(void)
Definition: roc_common.c:248
static int load_hsa_sym(void)
Definition: roc_common.c:200
static int load_rsmi_sym(void)
Definition: rocs.c:665
static void * rsmi_dlp
Definition: rocs.c:323
static int unload_rsmi_sym(void)
Definition: rocs.c:829
char status[PAPI_MAX_STR_LEN]
Definition: sysdetect.h:88
char vendor[PAPI_MAX_STR_LEN]
Definition: sysdetect.h:86
PAPI_dev_type_id_e id
Definition: sysdetect.h:85
_sysdetect_dev_info_u * dev_info_arr
Definition: sysdetect.h:90
unsigned short max_workgroup_dim_x
Definition: sysdetect.h:43
unsigned int compute_unit_count
Definition: sysdetect.h:49
unsigned int max_shmmem_per_workgroup
Definition: sysdetect.h:42
unsigned int max_threads_per_workgroup
Definition: sysdetect.h:40
unsigned short max_workgroup_dim_y
Definition: sysdetect.h:44
unsigned int wavefront_size
Definition: sysdetect.h:38
unsigned short max_workgroup_dim_z
Definition: sysdetect.h:45
unsigned int simd_per_compute_unit
Definition: sysdetect.h:39
char name[PAPI_2MAX_STR_LEN]
Definition: sysdetect.h:13
unsigned long uid
Definition: sysdetect.h:12
unsigned int max_waves_per_compute_unit
Definition: sysdetect.h:41
struct _sysdetect_gpu_info_u::@8 amd