31typedef struct command_flags
39 printf(
"This is the PAPI hardware avail program.\n" );
40 printf(
"It provides availability of system's equipped hardware devices.\n" );
41 printf(
"Usage: %s [options]\n", argv[0] );
42 printf(
"Options:\n\n" );
43 printf(
" --help, -h print this help message\n" );
53 for (
i = 1;
i < argc;
i++ ) {
54 if ( !strcmp( argv[
i],
"-h" ) || !strcmp( argv[
i],
"--help" ) )
57 printf(
"%s is not supported\n", argv[
i] );
69main(
int argc,
char **argv )
76 int sysdetect_avail = 0;
81 fprintf(
stderr,
"Error! PAPI_library_init\n");
89 fprintf(
stderr,
"Error! PAPI_set_debug\n");
94 for (
i = 0;
i < numcmp;
i++) {
96 if (strcmp(
"sysdetect", cmpinfo->
name) == 0)
100 if (sysdetect_avail == 0) {
101 fprintf(
stderr,
"Error! Sysdetect component not enabled\n");
105 printf(
"\nDevice Summary -----------------------------------------------------------------\n" );
108 int id, vendor_id, dev_count;
109 const char *vendor_name, *status;
111 printf(
"Vendor DevCount \n" );
117 printf(
"%-18s (%d)\n", vendor_name, dev_count);
118 printf(
" \\-> Status: %s\n", status );
122 printf(
"\nDevice Information -------------------------------------------------------------\n" );
131 unsigned int numas = 1;
132 for (
i = 0;
i < dev_count; ++
i ) {
133 const char *cpu_name;
135 unsigned int sockets, cores, threads;
136 unsigned int l1i_size, l1d_size, l2u_size, l3u_size;
137 unsigned int l1i_line_sz, l1d_line_sz, l2u_line_sz, l3u_line_sz;
138 unsigned int l1i_line_cnt, l1d_line_cnt, l2u_line_cnt, l3u_line_cnt;
139 unsigned int l1i_cache_ass, l1d_cache_ass, l2u_cache_ass, l3u_cache_ass;
166 printf(
"Vendor : %s (%u,0x%x)\n",
170 printf(
"Id : %u\n",
i );
171 printf(
"Name : %s\n", cpu_name );
172 printf(
"CPUID : Family/Model/Stepping %u/%u/%u 0x%02x/0x%02x/0x%02x\n",
174 printf(
"Sockets : %u\n", sockets );
175 printf(
"Numa regions : %u\n", numas );
176 printf(
"Cores per socket : %u\n", cores );
177 printf(
"Cores per NUMA region : %u\n", threads / numas );
178 printf(
"SMT threads per core : %u\n", threads / sockets / cores );
181 printf(
"L1i Cache : Size/LineSize/Lines/Assoc %uKB/%uB/%u/%u\n",
182 l1i_size >> 10, l1i_line_sz, l1i_line_cnt, l1i_cache_ass);
183 printf(
"L1d Cache : Size/LineSize/Lines/Assoc %uKB/%uB/%u/%u\n",
184 l1d_size >> 10, l1d_line_sz, l1d_line_cnt, l1d_cache_ass);
188 printf(
"L2 Cache : Size/LineSize/Lines/Assoc %uKB/%uB/%u/%u\n",
189 l2u_size >> 10, l2u_line_sz, l2u_line_cnt, l2u_cache_ass );
193 printf(
"L3 Cache : Size/LineSize/Lines/Assoc %uKB/%uB/%u/%u\n",
194 l3u_size >> 10, l3u_line_sz, l3u_line_cnt, l3u_cache_ass );
197#define MAX_NUMA_NODES (16)
198#define MAX_CPU_THREADS (512)
203 for (j = 0; j < threads; ++j) {
205 numa_threads[affinity[j]][numa_threads_count[affinity[j]]++] = j;
208 for ( j = 0; j < numas; ++j ) {
209 unsigned int k, memsize;
211 printf(
"Numa Node %u Memory : %uMB\n", j, memsize );
212 printf(
"Numa Node %u Threads : ", j );
213 for (k = 0; k < numa_threads_count[j]; ++k) {
214 printf(
"%u ", numa_threads[j][k] );
223 printf(
"Vendor : %s\n", vendor_name );
225 for (
i = 0;
i < dev_count; ++
i ) {
227 unsigned int warp_size, thread_per_block, block_per_sm;
228 unsigned int shm_per_block, shm_per_sm;
229 unsigned int blk_dim_x, blk_dim_y, blk_dim_z;
230 unsigned int grd_dim_x, grd_dim_y, grd_dim_z;
231 unsigned int sm_count,
multi_kernel, map_host_mem, async_memcpy;
232 unsigned int unif_addr, managed_mem;
233 unsigned int cc_major, cc_minor;
234 const char *dev_name;
258 printf(
"Id : %d\n",
i );
259 printf(
"UID : %lu\n", uid );
260 printf(
"Name : %s\n", dev_name );
261 printf(
"Warp size : %u\n", warp_size );
262 printf(
"Max threads per block : %u\n", thread_per_block );
263 printf(
"Max blocks per multiprocessor : %u\n", block_per_sm );
264 printf(
"Max shared memory per block : %u\n", shm_per_block );
265 printf(
"Max shared memory per multiprocessor : %u\n", shm_per_sm );
266 printf(
"Max block dim x : %u\n", blk_dim_x );
267 printf(
"Max block dim y : %u\n", blk_dim_y );
268 printf(
"Max block dim z : %u\n", blk_dim_z );
269 printf(
"Max grid dim x : %u\n", grd_dim_x );
270 printf(
"Max grid dim y : %u\n", grd_dim_y );
271 printf(
"Max grid dim z : %u\n", grd_dim_z );
272 printf(
"Multiprocessor count : %u\n", sm_count );
273 printf(
"Multiple kernels per context : %s\n",
multi_kernel ?
"yes" :
"no" );
274 printf(
"Can map host memory : %s\n", map_host_mem ?
"yes" :
"no");
275 printf(
"Can overlap compute and data transfer : %s\n", async_memcpy ?
"yes" :
"no" );
276 printf(
"Has unified addressing : %s\n", unif_addr ?
"yes" :
"no" );
277 printf(
"Has managed memory : %s\n", managed_mem ?
"yes" :
"no" );
278 printf(
"Compute capability : %u.%u\n", cc_major, cc_minor );
284 printf(
"Vendor : %s\n", vendor_name );
287 const char *dev_name;
288 unsigned int wf_size, simd_per_cu, wg_size;
289 unsigned int wf_per_cu, shm_per_wg, wg_dim_x, wg_dim_y, wg_dim_z;
290 unsigned int grd_dim_x, grd_dim_y, grd_dim_z;
291 unsigned int cu_count;
292 unsigned int cc_major, cc_minor;
294 for (
i = 0;
i < dev_count; ++
i ) {
312 printf(
"Id : %d\n",
i );
313 printf(
"Name : %s\n", dev_name );
314 printf(
"Wavefront size : %u\n", wf_size );
315 printf(
"SIMD per compute unit : %u\n", simd_per_cu );
316 printf(
"Max threads per workgroup : %u\n", wg_size );
317 printf(
"Max waves per compute unit : %u\n", wf_per_cu );
318 printf(
"Max shared memory per workgroup : %u\n", shm_per_wg );
319 printf(
"Max workgroup dim x : %u\n", wg_dim_x );
320 printf(
"Max workgroup dim y : %u\n", wg_dim_y );
321 printf(
"Max workgroup dim z : %u\n", wg_dim_z );
322 printf(
"Max grid dim x : %u\n", grd_dim_x );
323 printf(
"Max grid dim y : %u\n", grd_dim_y );
324 printf(
"Max grid dim z : %u\n", grd_dim_z );
325 printf(
"Compute unit count : %u\n", cu_count );
326 printf(
"Compute capability : %u.%u\n", cc_major, cc_minor );
332 printf(
"--------------------------------------------------------------------------------\n" );
static papi_handle_t handle
returns handle of next device type
get information about a specific software component
returns device attributes
returns device type attributes
initialize the PAPI library.
Get the number of components available on the system.
Set the current debug level for error output from PAPI.
Finish using PAPI and free all related resources.
#define PAPI_DEV_ATTR__ROCM_UINT_WG_DIM_Y
#define PAPI_DEV_ATTR__CUDA_UINT_THR_PER_BLK
#define PAPI_DEV_ATTR__CUDA_UINT_WARP_SIZE
#define PAPI_DEV_ATTR__CPU_UINT_L1D_CACHE_LINE_COUNT
#define PAPI_DEV_ATTR__ROCM_UINT_GRD_DIM_Y
#define PAPI_DEV_ATTR__ROCM_UINT_WAVE_PER_CU
#define PAPI_DEV_ATTR__CUDA_UINT_BLK_PER_SM
#define PAPI_DEV_ATTR__CUDA_ULONG_UID
#define PAPI_DEV_TYPE_ENUM__ALL
#define PAPI_DEV_ATTR__CPU_UINT_FAMILY
#define PAPI_DEV_TYPE_ID__CUDA
#define PAPI_DEV_ATTR__CPU_UINT_SOCKET_COUNT
#define PAPI_DEV_TYPE_ATTR__INT_PAPI_ID
#define PAPI_DEV_ATTR__CPU_UINT_L1I_CACHE_LINE_SIZE
#define PAPI_DEV_ATTR__CUDA_UINT_BLK_DIM_Z
#define PAPI_DEV_ATTR__ROCM_UINT_WAVEFRONT_SIZE
#define PAPI_DEV_ATTR__CUDA_UINT_UNIFIED_ADDR
#define PAPI_DEV_ATTR__CUDA_UINT_SM_COUNT
#define PAPI_DEV_ATTR__CPU_UINT_L3U_CACHE_ASSOC
#define PAPI_DEV_ATTR__CUDA_UINT_GRD_DIM_Y
#define PAPI_DEV_ATTR__CPU_UINT_L1I_CACHE_SIZE
#define PAPI_DEV_ATTR__CPU_UINT_L3U_CACHE_LINE_SIZE
#define PAPI_DEV_ATTR__CPU_UINT_THREAD_COUNT
#define PAPI_DEV_TYPE_ATTR__INT_COUNT
#define PAPI_DEV_ATTR__CPU_UINT_NUMA_COUNT
#define PAPI_DEV_TYPE_ATTR__INT_VENDOR_ID
#define PAPI_DEV_ATTR__CPU_UINT_L1I_CACHE_LINE_COUNT
#define PAPI_DEV_ATTR__CUDA_UINT_MANAGED_MEM
#define PAPI_DEV_ATTR__CPU_UINT_L3U_CACHE_SIZE
#define PAPI_DEV_ATTR__ROCM_UINT_COMP_CAP_MINOR
#define PAPI_DEV_ATTR__CPU_UINT_L2U_CACHE_SIZE
#define PAPI_DEV_ATTR__CUDA_UINT_BLK_DIM_Y
#define PAPI_DEV_ATTR__ROCM_CHAR_DEVICE_NAME
#define PAPI_DEV_ATTR__CUDA_UINT_SHM_PER_BLK
#define PAPI_DEV_ATTR__ROCM_UINT_WORKGROUP_SIZE
#define PAPI_DEV_ATTR__CPU_UINT_L2U_CACHE_LINE_COUNT
#define PAPI_DEV_ATTR__ROCM_UINT_COMP_CAP_MAJOR
#define PAPI_DEV_ATTR__CPU_CHAR_NAME
#define PAPI_DEV_ATTR__CUDA_UINT_GRD_DIM_X
#define PAPI_DEV_ATTR__ROCM_ULONG_UID
#define PAPI_DEV_ATTR__CPU_UINT_L3U_CACHE_LINE_COUNT
#define PAPI_DEV_TYPE_ATTR__CHAR_NAME
#define PAPI_DEV_ATTR__CUDA_UINT_MEMCPY_OVERLAP
#define PAPI_DEV_ATTR__ROCM_UINT_SHM_PER_WG
#define PAPI_DEV_ATTR__CUDA_UINT_COMP_CAP_MINOR
#define PAPI_DEV_ATTR__CUDA_UINT_MAP_HOST_MEM
#define PAPI_DEV_ATTR__CPU_UINT_L1D_CACHE_ASSOC
#define PAPI_DEV_ATTR__CUDA_UINT_SHM_PER_SM
#define PAPI_DEV_ATTR__CPU_UINT_STEPPING
#define PAPI_DEV_ATTR__ROCM_UINT_CU_COUNT
#define PAPI_DEV_ATTR__CPU_UINT_THR_NUMA_AFFINITY
#define PAPI_DEV_ATTR__CUDA_UINT_BLK_DIM_X
#define PAPI_DEV_ATTR__CPU_UINT_NUMA_MEM_SIZE
#define PAPI_DEV_ATTR__ROCM_UINT_SIMD_PER_CU
#define PAPI_DEV_ATTR__ROCM_UINT_WG_DIM_X
#define PAPI_DEV_ATTR__CUDA_UINT_MULTI_KERNEL
#define PAPI_DEV_ATTR__CPU_UINT_L2U_CACHE_ASSOC
#define PAPI_DEV_ATTR__CUDA_UINT_GRD_DIM_Z
#define PAPI_DEV_ATTR__ROCM_UINT_GRD_DIM_X
#define PAPI_DEV_TYPE_ID__CPU
#define PAPI_DEV_ATTR__CPU_UINT_L1D_CACHE_SIZE
#define PAPI_DEV_ATTR__CPU_UINT_MODEL
#define PAPI_DEV_ATTR__ROCM_UINT_WG_DIM_Z
#define PAPI_DEV_ATTR__ROCM_UINT_GRD_DIM_Z
#define PAPI_DEV_ATTR__CPU_UINT_L2U_CACHE_LINE_SIZE
#define PAPI_DEV_ATTR__CPU_UINT_CORE_COUNT
#define PAPI_DEV_ATTR__CPU_UINT_L1I_CACHE_ASSOC
#define PAPI_DEV_ATTR__CPU_UINT_L1D_CACHE_LINE_SIZE
#define PAPI_DEV_ATTR__CUDA_CHAR_DEVICE_NAME
#define PAPI_DEV_TYPE_ATTR__CHAR_STATUS
#define PAPI_DEV_ATTR__CUDA_UINT_COMP_CAP_MAJOR
#define PAPI_DEV_TYPE_ID__ROCM
int multi_kernel(int argc, char *argv[])
Return codes and api definitions.
static void print_help(void)
static void parse_args(int argc, char **argv, command_flags_t *f)
char name[PAPI_MAX_STR_LEN]