PAPI 7.1.0.0
Loading...
Searching...
No Matches
nvidia_gpu.c
Go to the documentation of this file.
1
12#include <stdio.h>
13#include <string.h>
14#include <stdlib.h>
15#include <dlfcn.h>
16
17#include "sysdetect.h"
18#include "nvidia_gpu.h"
19
20#ifdef HAVE_CUDA
21#include "cuda.h"
22
23static void *cuda_dlp = NULL;
24
25static CUresult (*cuInitPtr)( unsigned int flags ) = NULL;
26static CUresult (*cuDeviceGetPtr)( CUdevice *device, int ordinal ) = NULL;
27static CUresult (*cuDeviceGetNamePtr)( char *name, int len, CUdevice dev ) = NULL;
28static CUresult (*cuDeviceGetCountPtr)( int *count ) = NULL;
29static CUresult (*cuDeviceGetAttributePtr)( int *pi, CUdevice_attribute attrib,
30 CUdevice dev ) = NULL;
31static CUresult (*cuDeviceGetPCIBusIdPtr)( char *bus_id_string, int len,
32 CUdevice dev ) = NULL;
33
34#define CU_CALL(call, err_handle) do { \
35 CUresult _status = (call); \
36 if (_status != CUDA_SUCCESS) { \
37 if (_status == CUDA_ERROR_NOT_INITIALIZED) { \
38 if ((*cuInitPtr)(0) == CUDA_SUCCESS) { \
39 _status = (call); \
40 if (_status == CUDA_SUCCESS) { \
41 break; \
42 } \
43 } \
44 } \
45 SUBDBG("error: function %s failed with error %d.\n", #call, _status); \
46 err_handle; \
47 } \
48} while(0)
49
50static void fill_dev_info( _sysdetect_gpu_info_u *dev_info, int dev );
51static int cuda_is_enabled( void );
52static int load_cuda_sym( char *status );
53static int unload_cuda_sym( void );
54#endif /* HAVE_CUDA */
55
56#ifdef HAVE_NVML
57#include "nvml.h"
58
59static void *nvml_dlp = NULL;
60
61static nvmlReturn_t (*nvmlInitPtr)( void );
62static nvmlReturn_t (*nvmlDeviceGetCountPtr)( unsigned int *deviceCount ) = NULL;
63static nvmlReturn_t (*nvmlDeviceGetHandleByPciBusIdPtr)( const char *bus_id_str,
64 nvmlDevice_t *device ) = NULL;
65static nvmlReturn_t (*nvmlDeviceGetUUIDPtr)( nvmlDevice_t device, char *uuid,
66 unsigned int length ) = NULL;
67
68#define NVML_CALL(call, err_handle) do { \
69 nvmlReturn_t _status = (call); \
70 if (_status != NVML_SUCCESS) { \
71 if (_status == NVML_ERROR_UNINITIALIZED) { \
72 if ((*nvmlInitPtr)() == NVML_SUCCESS) { \
73 _status = (call); \
74 if (_status == NVML_SUCCESS) { \
75 break; \
76 } \
77 } \
78 } \
79 SUBDBG("error: function %s failed with error %d.\n", #call, _status); \
80 err_handle; \
81 } \
82} while(0)
83
84static void fill_dev_affinity_info( _sysdetect_gpu_info_u *dev_info, int dev_count );
85static int nvml_is_enabled( void );
86static int load_nvml_sym( char *status );
87static int unload_nvml_sym( void );
88static unsigned long hash(unsigned char *str);
89#endif /* HAVE_NVML */
90
91#ifdef HAVE_CUDA
92void
93fill_dev_info( _sysdetect_gpu_info_u *dev_info, int dev )
94{
95 CUdevice device;
96 CU_CALL((*cuDeviceGetPtr)(&device,
97 dev),
98 return);
99 CU_CALL((*cuDeviceGetNamePtr)(dev_info->nvidia.name,
101 device),
102 dev_info->nvidia.name[0] = '\0');
103 CU_CALL((*cuDeviceGetAttributePtr)(&dev_info->nvidia.warp_size,
104 CU_DEVICE_ATTRIBUTE_WARP_SIZE,
105 device),
106 dev_info->nvidia.warp_size = -1);
108 CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK,
109 device),
110 dev_info->nvidia.max_shmmem_per_block = -1);
112 CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR,
113 device),
114 dev_info->nvidia.max_shmmem_per_multi_proc = -1);
115 CU_CALL((*cuDeviceGetAttributePtr)(&dev_info->nvidia.max_block_dim_x,
116 CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X,
117 device),
118 dev_info->nvidia.max_block_dim_x = -1);
119 CU_CALL((*cuDeviceGetAttributePtr)(&dev_info->nvidia.max_block_dim_y,
120 CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y,
121 device),
122 dev_info->nvidia.max_block_dim_y = -1);
123 CU_CALL((*cuDeviceGetAttributePtr)(&dev_info->nvidia.max_block_dim_z,
124 CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z,
125 device),
126 dev_info->nvidia.max_block_dim_z = -1);
127 CU_CALL((*cuDeviceGetAttributePtr)(&dev_info->nvidia.max_grid_dim_x,
128 CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X,
129 device),
130 dev_info->nvidia.max_grid_dim_x = -1);
131 CU_CALL((*cuDeviceGetAttributePtr)(&dev_info->nvidia.max_grid_dim_y,
132 CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y,
133 device),
134 dev_info->nvidia.max_grid_dim_y = -1);
135 CU_CALL((*cuDeviceGetAttributePtr)(&dev_info->nvidia.max_grid_dim_z,
136 CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z,
137 device),
138 dev_info->nvidia.max_grid_dim_z = -1);
140 CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
141 device),
142 dev_info->nvidia.max_threads_per_block = -1);
144 CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
145 device),
146 dev_info->nvidia.multi_processor_count = -1);
148 CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS,
149 device),
150 dev_info->nvidia.multi_kernel_per_ctx = -1);
151 CU_CALL((*cuDeviceGetAttributePtr)(&dev_info->nvidia.can_map_host_mem,
152 CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY,
153 device),
154 dev_info->nvidia.can_map_host_mem = -1);
156 CU_DEVICE_ATTRIBUTE_GPU_OVERLAP,
157 device),
160 CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING,
161 device),
162 dev_info->nvidia.unified_addressing = -1);
163 CU_CALL((*cuDeviceGetAttributePtr)(&dev_info->nvidia.managed_memory,
164 CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY,
165 device),
166 dev_info->nvidia.managed_memory = -1);
167 CU_CALL((*cuDeviceGetAttributePtr)(&dev_info->nvidia.major,
168 CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
169 device),
170 dev_info->nvidia.major = -1);
171 CU_CALL((*cuDeviceGetAttributePtr)(&dev_info->nvidia.minor,
172 CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
173 device),
174 dev_info->nvidia.minor = -1);
175
176#if CUDA_VERSION >= 11000
178 CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR,
179 device),
180 dev_info->nvidia.max_blocks_per_multi_proc = -1);
181#else
182 dev_info->nvidia.max_blocks_per_multi_proc = -1;
183#endif /* CUDA_VERSION */
184}
185
186int
187cuda_is_enabled( void )
188{
189 return (cuInitPtr != NULL &&
190 cuDeviceGetPtr != NULL &&
191 cuDeviceGetNamePtr != NULL &&
192 cuDeviceGetCountPtr != NULL &&
193 cuDeviceGetAttributePtr != NULL &&
194 cuDeviceGetPCIBusIdPtr != NULL);
195}
196
197int
198load_cuda_sym( char *status )
199{
200 cuda_dlp = dlopen("libcuda.so", RTLD_NOW | RTLD_GLOBAL);
201 if (cuda_dlp == NULL) {
202 int count = snprintf(status, PAPI_MAX_STR_LEN, "%s", dlerror());
203 if (count >= PAPI_MAX_STR_LEN) {
204 SUBDBG("Status string truncated.");
205 }
206 return -1;
207 }
208
209 cuInitPtr = dlsym(cuda_dlp, "cuInit");
210 cuDeviceGetPtr = dlsym(cuda_dlp, "cuDeviceGet");
211 cuDeviceGetNamePtr = dlsym(cuda_dlp, "cuDeviceGetName");
212 cuDeviceGetCountPtr = dlsym(cuda_dlp, "cuDeviceGetCount");
213 cuDeviceGetAttributePtr = dlsym(cuda_dlp, "cuDeviceGetAttribute");
214 cuDeviceGetPCIBusIdPtr = dlsym(cuda_dlp, "cuDeviceGetPCIBusId");
215
216 if (!cuda_is_enabled()) {
217 const char *message = "dlsym() of CUDA symbols failed";
218 int count = snprintf(status, PAPI_MAX_STR_LEN, "%s", message);
219 if (count >= PAPI_MAX_STR_LEN) {
220 SUBDBG("Status string truncated.");
221 }
222 return -1;
223 }
224
225 return 0;
226}
227
228int
229unload_cuda_sym( void )
230{
231 if (cuda_dlp != NULL) {
232 dlclose(cuda_dlp);
233 }
234
235 cuInitPtr = NULL;
236 cuDeviceGetPtr = NULL;
237 cuDeviceGetNamePtr = NULL;
238 cuDeviceGetCountPtr = NULL;
240 cuDeviceGetPCIBusIdPtr = NULL;
241
242 return cuda_is_enabled();
243}
244#endif /* HAVE_CUDA */
245
246#ifdef HAVE_NVML
247void
248fill_dev_affinity_info( _sysdetect_gpu_info_u *info, int dev_count )
249{
250 int dev;
251 for (dev = 0; dev < dev_count; ++dev) {
252 char bus_id_str[20] = { 0 };
253 CU_CALL((*cuDeviceGetPCIBusIdPtr)(bus_id_str, 20, dev), return);
254
255 nvmlDevice_t device;
256 NVML_CALL((*nvmlDeviceGetHandleByPciBusIdPtr)(bus_id_str, &device),
257 return);
258
259 char uuid_str[PAPI_NVML_DEV_BUFFER_SIZE] = { 0 };
260 NVML_CALL((*nvmlDeviceGetUUIDPtr)(device, uuid_str,
262 return);
263
264 _sysdetect_gpu_info_u *dev_info = &info[dev];
265 dev_info->nvidia.uid = hash((unsigned char *) uuid_str);
266 }
267}
268
269unsigned long
270hash(unsigned char *str)
271{
272 unsigned long hash = 5381;
273 int c;
274
275 while ((c = *str++)) {
276 hash = ((hash << 5) + hash) + c;
277 }
278
279 return hash;
280}
281
282int
283nvml_is_enabled( void )
284{
285 return (nvmlInitPtr != NULL &&
286 nvmlDeviceGetCountPtr != NULL &&
287 nvmlDeviceGetHandleByPciBusIdPtr != NULL &&
288 nvmlDeviceGetUUIDPtr != NULL);
289}
290
291int
292load_nvml_sym( char *status )
293{
294 nvml_dlp = dlopen("libnvidia-ml.so", RTLD_NOW | RTLD_GLOBAL);
295 if (nvml_dlp == NULL) {
296 int count = snprintf(status, PAPI_MAX_STR_LEN, "%s", dlerror());
297 if (count >= PAPI_MAX_STR_LEN) {
298 SUBDBG("Status string truncated.");
299 }
300 return -1;
301 }
302
303 nvmlInitPtr = dlsym(nvml_dlp, "nvmlInit_v2");
304 nvmlDeviceGetCountPtr = dlsym(nvml_dlp, "nvmlDeviceGetCount_v2");
305 nvmlDeviceGetHandleByPciBusIdPtr = dlsym(nvml_dlp, "nvmlDeviceGetHandleByPciBusId_v2");
306 nvmlDeviceGetUUIDPtr = dlsym(nvml_dlp, "nvmlDeviceGetUUID");
307
308 if (!nvml_is_enabled()) {
309 const char *message = "dlsym() of NVML symbols failed";
310 int count = snprintf(status, PAPI_MAX_STR_LEN, "%s", message);
311 if (count >= PAPI_MAX_STR_LEN) {
312 SUBDBG("Status string truncated.");
313 }
314 return -1;
315 }
316
317 return 0;
318}
319
320int
321unload_nvml_sym( void )
322{
323 if (nvml_dlp != NULL) {
324 dlclose(nvml_dlp);
325 }
326
327 nvmlInitPtr = NULL;
329 nvmlDeviceGetHandleByPciBusIdPtr = NULL;
330 nvmlDeviceGetUUIDPtr = NULL;
331
332 return nvml_is_enabled();
333}
334#endif /* HAVE_NVML */
335
336void
338{
339 memset(dev_type_info, 0, sizeof(*dev_type_info));
340 dev_type_info->id = PAPI_DEV_TYPE_ID__CUDA;
341 strcpy(dev_type_info->vendor, "NVIDIA");
342 strcpy(dev_type_info->status, "Device Initialized");
343
344#ifdef HAVE_CUDA
345 if (load_cuda_sym(dev_type_info->status)) {
346 return;
347 }
348
349 int dev, dev_count;
350 CU_CALL((*cuDeviceGetCountPtr)(&dev_count), return);
351 dev_type_info->num_devices = dev_count;
352 if (dev_count == 0) {
353 return;
354 }
355
356 _sysdetect_gpu_info_u *arr = papi_calloc(dev_count, sizeof(*arr));
357 for (dev = 0; dev < dev_count; ++dev) {
358 fill_dev_info(&arr[dev], dev);
359 }
360
361#ifdef HAVE_NVML
362 if (!load_nvml_sym(dev_type_info->status)) {
363 fill_dev_affinity_info(arr, dev_count);
364 unload_nvml_sym();
365 }
366#else
367 const char *message = "NVML not configured, no device affinity available";
368 int count = snprintf(dev_type_info->status, PAPI_MAX_STR_LEN, "%s", message);
369 if (count >= PAPI_MAX_STR_LEN) {
370 SUBDBG("Status string truncated.");
371 }
372#endif /* HAVE_NVML */
373
375 dev_type_info->dev_info_arr = (_sysdetect_dev_info_u *)arr;
376#else
377 const char *message = "CUDA not configured, no CUDA device available";
378 int count = snprintf(dev_type_info->status, PAPI_MAX_STR_LEN, "%s", message);
379 if (count >= PAPI_MAX_STR_LEN) {
380 SUBDBG("Status string truncated.");
381 }
382#endif /* HAVE_CUDA */
383}
384
385void
387{
388 papi_free(dev_type_info->dev_info_arr);
389}
nvmlReturn_t(* nvmlDeviceGetCountPtr)(unsigned int *dest)
Definition: benchSANVML.c:66
nvmlReturn_t(* nvmlInitPtr)(void)
Definition: benchSANVML.c:83
static long count
CUresult(* cuDeviceGetCountPtr)(int *)
Definition: cupti_common.c:29
CUresult(* cuDeviceGetAttributePtr)(int *, CUdevice_attribute, CUdevice)
Definition: cupti_common.c:38
CUresult(* cuDeviceGetPtr)(CUdevice *, int)
Definition: cupti_common.c:28
static int unload_cuda_sym(void)
Definition: cupti_common.c:85
CUresult(* cuDeviceGetNamePtr)(char *, int, CUdevice)
Definition: cupti_common.c:30
static int load_cuda_sym(void)
Definition: cupti_common.c:52
CUresult(* cuInitPtr)(unsigned int)
Definition: cupti_common.c:33
#define PAPI_DEV_TYPE_ID__CUDA
Definition: f90papi.h:186
#define PAPI_MAX_STR_LEN
Definition: f90papi.h:77
#define PAPI_2MAX_STR_LEN
Definition: f90papi.h:180
static double c[MATRIX_SIZE][MATRIX_SIZE]
Definition: libmsr_basic.c:40
void open_nvidia_gpu_dev_type(_sysdetect_dev_type_info_t *dev_type_info)
Definition: nvidia_gpu.c:337
void close_nvidia_gpu_dev_type(_sysdetect_dev_type_info_t *dev_type_info)
Definition: nvidia_gpu.c:386
#define PAPI_NVML_DEV_BUFFER_SIZE
Definition: nvidia_gpu.h:7
#define SUBDBG(format, args...)
Definition: papi_debug.h:64
#define papi_calloc(a, b)
Definition: papi_memory.h:37
#define papi_free(a)
Definition: papi_memory.h:35
unsigned int length
const char * name
Definition: rocs.c:225
char status[PAPI_MAX_STR_LEN]
Definition: sysdetect.h:88
char vendor[PAPI_MAX_STR_LEN]
Definition: sysdetect.h:86
PAPI_dev_type_id_e id
Definition: sysdetect.h:85
_sysdetect_dev_info_u * dev_info_arr
Definition: sysdetect.h:90
int can_overlap_comp_and_data_xfer
Definition: sysdetect.h:28
struct _sysdetect_gpu_info_u::@7 nvidia
char name[PAPI_2MAX_STR_LEN]
Definition: sysdetect.h:13
unsigned long uid
Definition: sysdetect.h:12