18{
21
25 }
26
28 printf("Testing sysdetect component with PAPI %d.%d.%d\n",
32 }
33
35 printf( "\nDevice Summary -----------------------------------------------------------------\n" );
36 }
37
40 int id, vendor_id, dev_count;
41 const char *vendor_name, *status;
42
44 printf( "Vendor DevCount \n" );
45 }
46
51
53 printf( "%-18s (%d)\n", vendor_name, dev_count);
54 printf( " \\-> Status: %s\n", status );
55 printf( "\n" );
56 }
57 }
58
60 printf( "\nDevice Information -------------------------------------------------------------\n" );
61 }
62
68
70 int numas = 1;
71 for (
i = 0;
i < dev_count; ++
i ) {
72 const char *cpu_name;
74 unsigned int sockets, cores, threads;
75 unsigned int l1i_size, l1d_size, l2u_size, l3u_size;
76 unsigned int l1i_line_sz, l1d_line_sz, l2u_line_sz, l3u_line_sz;
77 unsigned int l1i_line_cnt, l1d_line_cnt, l2u_line_cnt, l3u_line_cnt;
78 unsigned int l1i_cache_ass, l1d_cache_ass, l2u_cache_ass, l3u_cache_ass;
79
104
106 printf( "Vendor : %s (%u,0x%x)\n",
107 vendor_name,
108 vendor_id,
109 vendor_id );
110 printf(
"Id : %u\n",
i );
111 printf( "Name : %s\n", cpu_name );
112 printf( "CPUID : Family/Model/Stepping %u/%u/%u 0x%02x/0x%02x/0x%02x\n",
114 printf( "Sockets : %u\n", sockets );
115 printf( "Numa regions : %u\n", numas );
116 printf( "Cores per socket : %u\n", cores );
117 printf( "Cores per NUMA region : %u\n", threads / numas );
118 printf( "SMT threads per core : %u\n", threads / sockets / cores );
119
120 if (l1i_size > 0) {
121 printf( "L1i Cache : Size/LineSize/Lines/Assoc %uKB/%uB/%u/%u\n",
122 l1i_size >> 10, l1i_line_sz, l1i_line_cnt, l1i_cache_ass);
123 printf( "L1d Cache : Size/LineSize/Lines/Assoc %uKB/%uB/%u/%u\n",
124 l1d_size >> 10, l1d_line_sz, l1d_line_cnt, l1d_cache_ass);
125 }
126
127 if (l2u_size > 0) {
128 printf( "L2 Cache : Size/LineSize/Lines/Assoc %uKB/%uB/%u/%u\n",
129 l2u_size >> 10, l2u_line_sz, l2u_line_cnt, l2u_cache_ass );
130 }
131
132 if (l3u_size > 0) {
133 printf( "L3 Cache : Size/LineSize/Lines/Assoc %uKB/%uB/%u/%u\n",
134 l3u_size >> 10, l3u_line_sz, l3u_line_cnt, l3u_cache_ass );
135 }
136 }
137 }
138
140 printf( "\n" );
141 }
142 }
143
146 printf( "Vendor : %s\n", vendor_name );
147 }
148
149 for (
i = 0;
i < dev_count; ++
i ) {
150 unsigned long uid;
151 unsigned int warp_size, thread_per_block, block_per_sm;
152 unsigned int shm_per_block, shm_per_sm;
153 unsigned int blk_dim_x, blk_dim_y, blk_dim_z;
154 unsigned int grd_dim_x, grd_dim_y, grd_dim_z;
155 unsigned int sm_count,
multi_kernel, map_host_mem, async_memcpy;
156 unsigned int unif_addr, managed_mem;
157 unsigned int cc_major, cc_minor;
158 const char *dev_name;
159
181
183 printf(
"Id : %d\n",
i );
184 printf( "UID : %lu\n", uid );
185 printf( "Name : %s\n", dev_name );
186 printf( "Warp size : %u\n", warp_size );
187 printf( "Max threads per block : %u\n", thread_per_block );
188 printf( "Max blocks per multiprocessor : %u\n", block_per_sm );
189 printf( "Max shared memory per block : %u\n", shm_per_block );
190 printf( "Max shared memory per multiprocessor : %u\n", shm_per_sm );
191 printf( "Max block dim x : %u\n", blk_dim_x );
192 printf( "Max block dim y : %u\n", blk_dim_y );
193 printf( "Max block dim z : %u\n", blk_dim_z );
194 printf( "Max grid dim x : %u\n", grd_dim_x );
195 printf( "Max grid dim y : %u\n", grd_dim_y );
196 printf( "Max grid dim z : %u\n", grd_dim_z );
197 printf( "Multiprocessor count : %u\n", sm_count );
198 printf(
"Multiple kernels per context : %s\n",
multi_kernel ?
"yes" :
"no" );
199 printf( "Can map host memory : %s\n", map_host_mem ? "yes" : "no");
200 printf( "Can overlap compute and data transfer : %s\n", async_memcpy ? "yes" : "no" );
201 printf( "Has unified addressing : %s\n", unif_addr ? "yes" : "no" );
202 printf( "Has managed memory : %s\n", managed_mem ? "yes" : "no" );
203 printf( "Compute capability : %u.%u\n", cc_major, cc_minor );
204 printf( "\n" );
205 }
206 }
207 }
208
211 printf( "Vendor : %s\n", vendor_name );
212 }
213
214 unsigned long uid;
215 const char *dev_name;
216 unsigned int wf_size, simd_per_cu, wg_size;
217 unsigned int wf_per_cu, shm_per_wg, wg_dim_x, wg_dim_y, wg_dim_z;
218 unsigned int grd_dim_x, grd_dim_y, grd_dim_z;
219 unsigned int cu_count;
220 unsigned int cc_major, cc_minor;
221
222 for (
i = 0;
i < dev_count; ++
i ) {
239
241 printf(
"Id : %d\n",
i );
242 printf( "Name : %s\n", dev_name );
243 printf( "Wavefront size : %u\n", wf_size );
244 printf( "SIMD per compute unit : %u\n", simd_per_cu );
245 printf( "Max threads per workgroup : %u\n", wg_size );
246 printf( "Max waves per compute unit : %u\n", wf_per_cu );
247 printf( "Max shared memory per workgroup : %u\n", shm_per_wg );
248 printf( "Max workgroup dim x : %u\n", wg_dim_x );
249 printf( "Max workgroup dim y : %u\n", wg_dim_y );
250 printf( "Max workgroup dim z : %u\n", wg_dim_z );
251 printf( "Max grid dim x : %u\n", grd_dim_x );
252 printf( "Max grid dim y : %u\n", grd_dim_y );
253 printf( "Max grid dim z : %u\n", grd_dim_z );
254 printf( "Compute unit count : %u\n", cu_count );
255 printf( "Compute capability : %u.%u\n", cc_major, cc_minor );
256 printf( "\n" );
257 }
258 }
259 }
260 }
261
263 printf( "--------------------------------------------------------------------------------\n" );
264 }
265
266
267 if (!
quiet) printf(
"\n");
269
270 return 0;
271}
static papi_handle_t handle
returns handle of next device type
returns device attributes
returns device type attributes
initialize the PAPI library.
#define PAPI_DEV_ATTR__ROCM_UINT_WG_DIM_Y
#define PAPI_DEV_ATTR__CUDA_UINT_THR_PER_BLK
#define PAPI_DEV_ATTR__CUDA_UINT_WARP_SIZE
#define PAPI_DEV_ATTR__CPU_UINT_L1D_CACHE_LINE_COUNT
#define PAPI_DEV_ATTR__ROCM_UINT_GRD_DIM_Y
#define PAPI_DEV_ATTR__ROCM_UINT_WAVE_PER_CU
#define PAPI_DEV_ATTR__CUDA_UINT_BLK_PER_SM
#define PAPI_DEV_ATTR__CUDA_ULONG_UID
#define PAPI_DEV_TYPE_ENUM__ALL
#define PAPI_DEV_ATTR__CPU_UINT_FAMILY
#define PAPI_DEV_TYPE_ID__CUDA
#define PAPI_DEV_ATTR__CPU_UINT_SOCKET_COUNT
#define PAPI_DEV_TYPE_ATTR__INT_PAPI_ID
#define PAPI_DEV_ATTR__CPU_UINT_L1I_CACHE_LINE_SIZE
#define PAPI_DEV_ATTR__CUDA_UINT_BLK_DIM_Z
#define PAPI_DEV_ATTR__ROCM_UINT_WAVEFRONT_SIZE
#define PAPI_DEV_ATTR__CUDA_UINT_UNIFIED_ADDR
#define PAPI_DEV_ATTR__CUDA_UINT_SM_COUNT
#define PAPI_DEV_ATTR__CPU_UINT_L3U_CACHE_ASSOC
#define PAPI_DEV_ATTR__CUDA_UINT_GRD_DIM_Y
#define PAPI_DEV_ATTR__CPU_UINT_L1I_CACHE_SIZE
#define PAPI_DEV_ATTR__CPU_UINT_L3U_CACHE_LINE_SIZE
#define PAPI_DEV_ATTR__CPU_UINT_THREAD_COUNT
#define PAPI_DEV_TYPE_ATTR__INT_COUNT
#define PAPI_DEV_ATTR__CPU_UINT_NUMA_COUNT
#define PAPI_DEV_TYPE_ATTR__INT_VENDOR_ID
#define PAPI_DEV_ATTR__CPU_UINT_L1I_CACHE_LINE_COUNT
#define PAPI_DEV_ATTR__CUDA_UINT_MANAGED_MEM
#define PAPI_DEV_ATTR__CPU_UINT_L3U_CACHE_SIZE
#define PAPI_DEV_ATTR__ROCM_UINT_COMP_CAP_MINOR
#define PAPI_DEV_ATTR__CPU_UINT_L2U_CACHE_SIZE
#define PAPI_DEV_ATTR__CUDA_UINT_BLK_DIM_Y
#define PAPI_DEV_ATTR__ROCM_CHAR_DEVICE_NAME
#define PAPI_DEV_ATTR__CUDA_UINT_SHM_PER_BLK
#define PAPI_DEV_ATTR__ROCM_UINT_WORKGROUP_SIZE
#define PAPI_DEV_ATTR__CPU_UINT_L2U_CACHE_LINE_COUNT
#define PAPI_DEV_ATTR__ROCM_UINT_COMP_CAP_MAJOR
#define PAPI_DEV_ATTR__CPU_CHAR_NAME
#define PAPI_DEV_ATTR__CUDA_UINT_GRD_DIM_X
#define PAPI_DEV_ATTR__ROCM_ULONG_UID
#define PAPI_DEV_ATTR__CPU_UINT_L3U_CACHE_LINE_COUNT
#define PAPI_DEV_TYPE_ATTR__CHAR_NAME
#define PAPI_DEV_ATTR__CUDA_UINT_MEMCPY_OVERLAP
#define PAPI_DEV_ATTR__ROCM_UINT_SHM_PER_WG
#define PAPI_DEV_ATTR__CUDA_UINT_COMP_CAP_MINOR
#define PAPI_DEV_ATTR__CUDA_UINT_MAP_HOST_MEM
#define PAPI_DEV_ATTR__CPU_UINT_L1D_CACHE_ASSOC
#define PAPI_DEV_ATTR__CUDA_UINT_SHM_PER_SM
#define PAPI_DEV_ATTR__CPU_UINT_STEPPING
#define PAPI_DEV_ATTR__ROCM_UINT_CU_COUNT
#define PAPI_DEV_ATTR__CUDA_UINT_BLK_DIM_X
#define PAPI_DEV_ATTR__ROCM_UINT_SIMD_PER_CU
#define PAPI_DEV_ATTR__ROCM_UINT_WG_DIM_X
#define PAPI_DEV_ATTR__CUDA_UINT_MULTI_KERNEL
#define PAPI_DEV_ATTR__CPU_UINT_L2U_CACHE_ASSOC
#define PAPI_DEV_ATTR__CUDA_UINT_GRD_DIM_Z
#define PAPI_DEV_ATTR__ROCM_UINT_GRD_DIM_X
#define PAPI_DEV_TYPE_ID__CPU
#define PAPI_DEV_ATTR__CPU_UINT_L1D_CACHE_SIZE
#define PAPI_DEV_ATTR__CPU_UINT_MODEL
#define PAPI_DEV_ATTR__ROCM_UINT_WG_DIM_Z
#define PAPI_DEV_ATTR__ROCM_UINT_GRD_DIM_Z
#define PAPI_DEV_ATTR__CPU_UINT_L2U_CACHE_LINE_SIZE
#define PAPI_DEV_ATTR__CPU_UINT_CORE_COUNT
#define PAPI_DEV_ATTR__CPU_UINT_L1I_CACHE_ASSOC
#define PAPI_DEV_ATTR__CPU_UINT_L1D_CACHE_LINE_SIZE
#define PAPI_DEV_ATTR__CUDA_CHAR_DEVICE_NAME
#define PAPI_DEV_TYPE_ATTR__CHAR_STATUS
#define PAPI_DEV_ATTR__CUDA_UINT_COMP_CAP_MAJOR
#define PAPI_DEV_TYPE_ID__ROCM
int multi_kernel(int argc, char *argv[])
#define PAPI_VERSION_REVISION(x)
#define PAPI_VERSION_MAJOR(x)
#define PAPI_VERSION_MINOR(x)
int tests_quiet(int argc, char **argv)
void PAPI_NORETURN test_fail(const char *file, int line, const char *call, int retval)
void PAPI_NORETURN test_pass(const char *filename)