17int main(
int argc,
char *argv[])
28 printf(
"Testing sysdetect component with PAPI %d.%d.%d\n",
35 printf(
"\nDevice Summary -----------------------------------------------------------------\n" );
40 int id, vendor_id, dev_count;
41 const char *vendor_name, *status;
44 printf(
"Vendor DevCount \n" );
53 printf(
"%-18s (%d)\n", vendor_name, dev_count);
54 printf(
" \\-> Status: %s\n", status );
60 printf(
"\nDevice Information -------------------------------------------------------------\n" );
71 for (
i = 0;
i < dev_count; ++
i ) {
74 unsigned int sockets, cores, threads;
75 unsigned int l1i_size, l1d_size, l2u_size, l3u_size;
76 unsigned int l1i_line_sz, l1d_line_sz, l2u_line_sz, l3u_line_sz;
77 unsigned int l1i_line_cnt, l1d_line_cnt, l2u_line_cnt, l3u_line_cnt;
78 unsigned int l1i_cache_ass, l1d_cache_ass, l2u_cache_ass, l3u_cache_ass;
106 printf(
"Vendor : %s (%u,0x%x)\n",
110 printf(
"Id : %u\n",
i );
111 printf(
"Name : %s\n", cpu_name );
112 printf(
"CPUID : Family/Model/Stepping %u/%u/%u 0x%02x/0x%02x/0x%02x\n",
114 printf(
"Sockets : %u\n", sockets );
115 printf(
"Numa regions : %u\n", numas );
116 printf(
"Cores per socket : %u\n", cores );
117 printf(
"Cores per NUMA region : %u\n", threads / numas );
118 printf(
"SMT threads per core : %u\n", threads / sockets / cores );
121 printf(
"L1i Cache : Size/LineSize/Lines/Assoc %uKB/%uB/%u/%u\n",
122 l1i_size >> 10, l1i_line_sz, l1i_line_cnt, l1i_cache_ass);
123 printf(
"L1d Cache : Size/LineSize/Lines/Assoc %uKB/%uB/%u/%u\n",
124 l1d_size >> 10, l1d_line_sz, l1d_line_cnt, l1d_cache_ass);
128 printf(
"L2 Cache : Size/LineSize/Lines/Assoc %uKB/%uB/%u/%u\n",
129 l2u_size >> 10, l2u_line_sz, l2u_line_cnt, l2u_cache_ass );
133 printf(
"L3 Cache : Size/LineSize/Lines/Assoc %uKB/%uB/%u/%u\n",
134 l3u_size >> 10, l3u_line_sz, l3u_line_cnt, l3u_cache_ass );
146 printf(
"Vendor : %s\n", vendor_name );
149 for (
i = 0;
i < dev_count; ++
i ) {
151 unsigned int warp_size, thread_per_block, block_per_sm;
152 unsigned int shm_per_block, shm_per_sm;
153 unsigned int blk_dim_x, blk_dim_y, blk_dim_z;
154 unsigned int grd_dim_x, grd_dim_y, grd_dim_z;
155 unsigned int sm_count,
multi_kernel, map_host_mem, async_memcpy;
156 unsigned int unif_addr, managed_mem;
157 unsigned int cc_major, cc_minor;
158 const char *dev_name;
183 printf(
"Id : %d\n",
i );
184 printf(
"UID : %lu\n", uid );
185 printf(
"Name : %s\n", dev_name );
186 printf(
"Warp size : %u\n", warp_size );
187 printf(
"Max threads per block : %u\n", thread_per_block );
188 printf(
"Max blocks per multiprocessor : %u\n", block_per_sm );
189 printf(
"Max shared memory per block : %u\n", shm_per_block );
190 printf(
"Max shared memory per multiprocessor : %u\n", shm_per_sm );
191 printf(
"Max block dim x : %u\n", blk_dim_x );
192 printf(
"Max block dim y : %u\n", blk_dim_y );
193 printf(
"Max block dim z : %u\n", blk_dim_z );
194 printf(
"Max grid dim x : %u\n", grd_dim_x );
195 printf(
"Max grid dim y : %u\n", grd_dim_y );
196 printf(
"Max grid dim z : %u\n", grd_dim_z );
197 printf(
"Multiprocessor count : %u\n", sm_count );
198 printf(
"Multiple kernels per context : %s\n",
multi_kernel ?
"yes" :
"no" );
199 printf(
"Can map host memory : %s\n", map_host_mem ?
"yes" :
"no");
200 printf(
"Can overlap compute and data transfer : %s\n", async_memcpy ?
"yes" :
"no" );
201 printf(
"Has unified addressing : %s\n", unif_addr ?
"yes" :
"no" );
202 printf(
"Has managed memory : %s\n", managed_mem ?
"yes" :
"no" );
203 printf(
"Compute capability : %u.%u\n", cc_major, cc_minor );
211 printf(
"Vendor : %s\n", vendor_name );
215 const char *dev_name;
216 unsigned int wf_size, simd_per_cu, wg_size;
217 unsigned int wf_per_cu, shm_per_wg, wg_dim_x, wg_dim_y, wg_dim_z;
218 unsigned int grd_dim_x, grd_dim_y, grd_dim_z;
219 unsigned int cu_count;
220 unsigned int cc_major, cc_minor;
222 for (
i = 0;
i < dev_count; ++
i ) {
241 printf(
"Id : %d\n",
i );
242 printf(
"Name : %s\n", dev_name );
243 printf(
"Wavefront size : %u\n", wf_size );
244 printf(
"SIMD per compute unit : %u\n", simd_per_cu );
245 printf(
"Max threads per workgroup : %u\n", wg_size );
246 printf(
"Max waves per compute unit : %u\n", wf_per_cu );
247 printf(
"Max shared memory per workgroup : %u\n", shm_per_wg );
248 printf(
"Max workgroup dim x : %u\n", wg_dim_x );
249 printf(
"Max workgroup dim y : %u\n", wg_dim_y );
250 printf(
"Max workgroup dim z : %u\n", wg_dim_z );
251 printf(
"Max grid dim x : %u\n", grd_dim_x );
252 printf(
"Max grid dim y : %u\n", grd_dim_y );
253 printf(
"Max grid dim z : %u\n", grd_dim_z );
254 printf(
"Compute unit count : %u\n", cu_count );
255 printf(
"Compute capability : %u.%u\n", cc_major, cc_minor );
263 printf(
"--------------------------------------------------------------------------------\n" );
267 if (!
quiet) printf(
"\n");
static papi_handle_t handle
returns handle of next device type
returns device attributes
returns device type attributes
initialize the PAPI library.
#define PAPI_DEV_ATTR__ROCM_UINT_WG_DIM_Y
#define PAPI_DEV_ATTR__CUDA_UINT_THR_PER_BLK
#define PAPI_DEV_ATTR__CUDA_UINT_WARP_SIZE
#define PAPI_DEV_ATTR__CPU_UINT_L1D_CACHE_LINE_COUNT
#define PAPI_DEV_ATTR__ROCM_UINT_GRD_DIM_Y
#define PAPI_DEV_ATTR__ROCM_UINT_WAVE_PER_CU
#define PAPI_DEV_ATTR__CUDA_UINT_BLK_PER_SM
#define PAPI_DEV_ATTR__CUDA_ULONG_UID
#define PAPI_DEV_TYPE_ENUM__ALL
#define PAPI_DEV_ATTR__CPU_UINT_FAMILY
#define PAPI_DEV_TYPE_ID__CUDA
#define PAPI_DEV_ATTR__CPU_UINT_SOCKET_COUNT
#define PAPI_DEV_TYPE_ATTR__INT_PAPI_ID
#define PAPI_DEV_ATTR__CPU_UINT_L1I_CACHE_LINE_SIZE
#define PAPI_DEV_ATTR__CUDA_UINT_BLK_DIM_Z
#define PAPI_DEV_ATTR__ROCM_UINT_WAVEFRONT_SIZE
#define PAPI_DEV_ATTR__CUDA_UINT_UNIFIED_ADDR
#define PAPI_DEV_ATTR__CUDA_UINT_SM_COUNT
#define PAPI_DEV_ATTR__CPU_UINT_L3U_CACHE_ASSOC
#define PAPI_DEV_ATTR__CUDA_UINT_GRD_DIM_Y
#define PAPI_DEV_ATTR__CPU_UINT_L1I_CACHE_SIZE
#define PAPI_DEV_ATTR__CPU_UINT_L3U_CACHE_LINE_SIZE
#define PAPI_DEV_ATTR__CPU_UINT_THREAD_COUNT
#define PAPI_DEV_TYPE_ATTR__INT_COUNT
#define PAPI_DEV_ATTR__CPU_UINT_NUMA_COUNT
#define PAPI_DEV_TYPE_ATTR__INT_VENDOR_ID
#define PAPI_DEV_ATTR__CPU_UINT_L1I_CACHE_LINE_COUNT
#define PAPI_DEV_ATTR__CUDA_UINT_MANAGED_MEM
#define PAPI_DEV_ATTR__CPU_UINT_L3U_CACHE_SIZE
#define PAPI_DEV_ATTR__ROCM_UINT_COMP_CAP_MINOR
#define PAPI_DEV_ATTR__CPU_UINT_L2U_CACHE_SIZE
#define PAPI_DEV_ATTR__CUDA_UINT_BLK_DIM_Y
#define PAPI_DEV_ATTR__ROCM_CHAR_DEVICE_NAME
#define PAPI_DEV_ATTR__CUDA_UINT_SHM_PER_BLK
#define PAPI_DEV_ATTR__ROCM_UINT_WORKGROUP_SIZE
#define PAPI_DEV_ATTR__CPU_UINT_L2U_CACHE_LINE_COUNT
#define PAPI_DEV_ATTR__ROCM_UINT_COMP_CAP_MAJOR
#define PAPI_DEV_ATTR__CPU_CHAR_NAME
#define PAPI_DEV_ATTR__CUDA_UINT_GRD_DIM_X
#define PAPI_DEV_ATTR__ROCM_ULONG_UID
#define PAPI_DEV_ATTR__CPU_UINT_L3U_CACHE_LINE_COUNT
#define PAPI_DEV_TYPE_ATTR__CHAR_NAME
#define PAPI_DEV_ATTR__CUDA_UINT_MEMCPY_OVERLAP
#define PAPI_DEV_ATTR__ROCM_UINT_SHM_PER_WG
#define PAPI_DEV_ATTR__CUDA_UINT_COMP_CAP_MINOR
#define PAPI_DEV_ATTR__CUDA_UINT_MAP_HOST_MEM
#define PAPI_DEV_ATTR__CPU_UINT_L1D_CACHE_ASSOC
#define PAPI_DEV_ATTR__CUDA_UINT_SHM_PER_SM
#define PAPI_DEV_ATTR__CPU_UINT_STEPPING
#define PAPI_DEV_ATTR__ROCM_UINT_CU_COUNT
#define PAPI_DEV_ATTR__CUDA_UINT_BLK_DIM_X
#define PAPI_DEV_ATTR__ROCM_UINT_SIMD_PER_CU
#define PAPI_DEV_ATTR__ROCM_UINT_WG_DIM_X
#define PAPI_DEV_ATTR__CUDA_UINT_MULTI_KERNEL
#define PAPI_DEV_ATTR__CPU_UINT_L2U_CACHE_ASSOC
#define PAPI_DEV_ATTR__CUDA_UINT_GRD_DIM_Z
#define PAPI_DEV_ATTR__ROCM_UINT_GRD_DIM_X
#define PAPI_DEV_TYPE_ID__CPU
#define PAPI_DEV_ATTR__CPU_UINT_L1D_CACHE_SIZE
#define PAPI_DEV_ATTR__CPU_UINT_MODEL
#define PAPI_DEV_ATTR__ROCM_UINT_WG_DIM_Z
#define PAPI_DEV_ATTR__ROCM_UINT_GRD_DIM_Z
#define PAPI_DEV_ATTR__CPU_UINT_L2U_CACHE_LINE_SIZE
#define PAPI_DEV_ATTR__CPU_UINT_CORE_COUNT
#define PAPI_DEV_ATTR__CPU_UINT_L1I_CACHE_ASSOC
#define PAPI_DEV_ATTR__CPU_UINT_L1D_CACHE_LINE_SIZE
#define PAPI_DEV_ATTR__CUDA_CHAR_DEVICE_NAME
#define PAPI_DEV_TYPE_ATTR__CHAR_STATUS
#define PAPI_DEV_ATTR__CUDA_UINT_COMP_CAP_MAJOR
#define PAPI_DEV_TYPE_ID__ROCM
int multi_kernel(int argc, char *argv[])
Return codes and api definitions.
#define PAPI_VERSION_REVISION(x)
#define PAPI_VERSION_MAJOR(x)
#define PAPI_VERSION_MINOR(x)
int tests_quiet(int argc, char **argv)
void PAPI_NORETURN test_fail(const char *file, int line, const char *call, int retval)
void PAPI_NORETURN test_pass(const char *filename)