23static void *cuda_dlp = NULL;
25static CUresult (*
cuInitPtr)(
unsigned int flags ) = NULL;
26static CUresult (*
cuDeviceGetPtr)( CUdevice *device,
int ordinal ) = NULL;
30 CUdevice dev ) = NULL;
31static CUresult (*cuDeviceGetPCIBusIdPtr)(
char *bus_id_string,
int len,
32 CUdevice dev ) = NULL;
34#define CU_CALL(call, err_handle) do { \
35 CUresult _status = (call); \
36 if (_status != CUDA_SUCCESS) { \
37 if (_status == CUDA_ERROR_NOT_INITIALIZED) { \
38 if ((*cuInitPtr)(0) == CUDA_SUCCESS) { \
40 if (_status == CUDA_SUCCESS) { \
45 SUBDBG("error: function %s failed with error %d.\n", #call, _status); \
51static int cuda_is_enabled(
void );
59static void *nvml_dlp = NULL;
63static nvmlReturn_t (*nvmlDeviceGetHandleByPciBusIdPtr)(
const char *bus_id_str,
64 nvmlDevice_t *device ) = NULL;
65static nvmlReturn_t (*nvmlDeviceGetUUIDPtr)( nvmlDevice_t device,
char *uuid,
66 unsigned int length ) = NULL;
68#define NVML_CALL(call, err_handle) do { \
69 nvmlReturn_t _status = (call); \
70 if (_status != NVML_SUCCESS) { \
71 if (_status == NVML_ERROR_UNINITIALIZED) { \
72 if ((*nvmlInitPtr)() == NVML_SUCCESS) { \
74 if (_status == NVML_SUCCESS) { \
79 SUBDBG("error: function %s failed with error %d.\n", #call, _status); \
85static int nvml_is_enabled(
void );
86static int load_nvml_sym(
char *status );
87static int unload_nvml_sym(
void );
88static unsigned long hash(
unsigned char *str);
104 CU_DEVICE_ATTRIBUTE_WARP_SIZE,
108 CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK,
112 CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR,
116 CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X,
120 CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y,
124 CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z,
128 CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X,
132 CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y,
136 CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z,
140 CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
144 CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
148 CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS,
152 CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY,
156 CU_DEVICE_ATTRIBUTE_GPU_OVERLAP,
160 CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING,
164 CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY,
168 CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
172 CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
176#if CUDA_VERSION >= 11000
178 CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR,
187cuda_is_enabled(
void )
194 cuDeviceGetPCIBusIdPtr != NULL);
200 cuda_dlp = dlopen(
"libcuda.so", RTLD_NOW | RTLD_GLOBAL);
201 if (cuda_dlp == NULL) {
204 SUBDBG(
"Status string truncated.");
214 cuDeviceGetPCIBusIdPtr = dlsym(cuda_dlp,
"cuDeviceGetPCIBusId");
216 if (!cuda_is_enabled()) {
217 const char *message =
"dlsym() of CUDA symbols failed";
220 SUBDBG(
"Status string truncated.");
231 if (cuda_dlp != NULL) {
240 cuDeviceGetPCIBusIdPtr = NULL;
242 return cuda_is_enabled();
251 for (dev = 0; dev < dev_count; ++dev) {
252 char bus_id_str[20] = { 0 };
253 CU_CALL((*cuDeviceGetPCIBusIdPtr)(bus_id_str, 20, dev),
return);
256 NVML_CALL((*nvmlDeviceGetHandleByPciBusIdPtr)(bus_id_str, &device),
260 NVML_CALL((*nvmlDeviceGetUUIDPtr)(device, uuid_str,
265 dev_info->
nvidia.
uid = hash((
unsigned char *) uuid_str);
270hash(
unsigned char *str)
272 unsigned long hash = 5381;
275 while ((
c = *str++)) {
276 hash = ((hash << 5) + hash) +
c;
283nvml_is_enabled(
void )
287 nvmlDeviceGetHandleByPciBusIdPtr != NULL &&
288 nvmlDeviceGetUUIDPtr != NULL);
292load_nvml_sym(
char *status )
294 nvml_dlp = dlopen(
"libnvidia-ml.so", RTLD_NOW | RTLD_GLOBAL);
295 if (nvml_dlp == NULL) {
298 SUBDBG(
"Status string truncated.");
305 nvmlDeviceGetHandleByPciBusIdPtr = dlsym(nvml_dlp,
"nvmlDeviceGetHandleByPciBusId_v2");
306 nvmlDeviceGetUUIDPtr = dlsym(nvml_dlp,
"nvmlDeviceGetUUID");
308 if (!nvml_is_enabled()) {
309 const char *message =
"dlsym() of NVML symbols failed";
312 SUBDBG(
"Status string truncated.");
321unload_nvml_sym(
void )
323 if (nvml_dlp != NULL) {
329 nvmlDeviceGetHandleByPciBusIdPtr = NULL;
330 nvmlDeviceGetUUIDPtr = NULL;
332 return nvml_is_enabled();
339 memset(dev_type_info, 0,
sizeof(*dev_type_info));
341 strcpy(dev_type_info->
vendor,
"NVIDIA");
342 strcpy(dev_type_info->
status,
"Device Initialized");
352 if (dev_count == 0) {
357 for (dev = 0; dev < dev_count; ++dev) {
358 fill_dev_info(&arr[dev], dev);
362 if (!load_nvml_sym(dev_type_info->
status)) {
363 fill_dev_affinity_info(arr, dev_count);
367 const char *message =
"NVML not configured, no device affinity available";
370 SUBDBG(
"Status string truncated.");
377 const char *message =
"CUDA not configured, no CUDA device available";
380 SUBDBG(
"Status string truncated.");
nvmlReturn_t(* nvmlDeviceGetCountPtr)(unsigned int *dest)
nvmlReturn_t(* nvmlInitPtr)(void)
CUresult(* cuDeviceGetCountPtr)(int *)
CUresult(* cuDeviceGetAttributePtr)(int *, CUdevice_attribute, CUdevice)
CUresult(* cuDeviceGetPtr)(CUdevice *, int)
static int unload_cuda_sym(void)
CUresult(* cuDeviceGetNamePtr)(char *, int, CUdevice)
static int load_cuda_sym(void)
CUresult(* cuInitPtr)(unsigned int)
#define PAPI_DEV_TYPE_ID__CUDA
#define PAPI_2MAX_STR_LEN
static double c[MATRIX_SIZE][MATRIX_SIZE]
void open_nvidia_gpu_dev_type(_sysdetect_dev_type_info_t *dev_type_info)
void close_nvidia_gpu_dev_type(_sysdetect_dev_type_info_t *dev_type_info)
#define PAPI_NVML_DEV_BUFFER_SIZE
#define SUBDBG(format, args...)
#define papi_calloc(a, b)
char status[PAPI_MAX_STR_LEN]
char vendor[PAPI_MAX_STR_LEN]
_sysdetect_dev_info_u * dev_info_arr
int can_overlap_comp_and_data_xfer
struct _sysdetect_gpu_info_u::@7 nvidia
int max_threads_per_block
char name[PAPI_2MAX_STR_LEN]
int multi_processor_count
int max_blocks_per_multi_proc
int max_shmmem_per_multi_proc