24#if defined( __amd64__ ) || defined (__x86_64__)
26cpuid(
unsigned int *
a,
unsigned int *
b,
unsigned int *
c,
unsigned int *d )
30 :
"=a" (*
a),
"=b" (*
b),
"=c" (*
c),
"=d" (*d)
35cpuid(
unsigned int *
a,
unsigned int *
b,
unsigned int *
c,
unsigned int *d )
42 __asm__ __volatile__(
".byte 0x53\n\tcpuid\n\tmovl %%ebx, %%esi\n\t.byte 0x5b":
"=a"( *
a ),
"=S"( *
b ),
"=c"( *
c ),
57 unsigned int ax, bx, cx, dx;
70 cpuid( ®.e.ax, ®.e.bx, ®.e.dx, ®.e.cx );
72 MEMDBG(
"Vendor: %s\n", ®.vendor[4] );
76 if ( !strncmp(
"GenuineIntel", ®.vendor[4], 12 ) ) {
78 }
else if ( !strncmp(
"AuthenticAMD", ®.vendor[4], 12 ) ) {
81 MEMDBG(
"Unsupported cpu type; Not Intel or AMD x86\n" );
86 MEMDBG(
"Detected L1: %d L2: %d L3: %d\n",
118 short int assoc[16] =
119 { 0, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, SHRT_MAX };
122 return ( assoc[pattern] );
133 unsigned int ax, bx, cx, dx;
135 unsigned char byt[16];
137 int i, j, levels = 0;
145 MEMDBG(
"Initializing AMD memory info\n" );
147 reg.e.ax = 0x80000005;
148 cpuid( ®.e.ax, ®.e.bx, ®.e.cx, ®.e.dx );
150 MEMDBG(
"e.ax=%#8.8x e.bx=%#8.8x e.cx=%#8.8x e.dx=%#8.8x\n",
151 reg.e.ax, reg.e.bx, reg.e.cx, reg.e.dx );
153 (
":\neax: %#x %#x %#x %#x\nebx: %#x %#x %#x %#x\necx: %#x %#x %#x %#x\nedx: %#x %#x %#x %#x\n",
154 reg.byt[0], reg.byt[1], reg.byt[2], reg.byt[3], reg.byt[4],
155 reg.byt[5], reg.byt[6], reg.byt[7], reg.byt[8], reg.byt[9],
156 reg.byt[10], reg.byt[11], reg.byt[12], reg.byt[13], reg.byt[14],
196 if ( L[0].tlb[
i].associativity == 0xff )
209 if ( L[0].cache[0].line_size )
211 MEMDBG(
"D-Cache Line Count: %d; Computed: %d\n", reg.byt[9],
222 if ( L[0].cache[1].line_size )
224 MEMDBG(
"I-Cache Line Count: %d; Computed: %d\n", reg.byt[13],
227 for (
i = 0;
i < 2;
i++ ) {
228 if ( L[0].cache[
i].associativity == 0xff )
235 reg.e.ax = 0x80000006;
236 cpuid( ®.e.ax, ®.e.bx, ®.e.cx, ®.e.dx );
238 MEMDBG(
"e.ax=%#8.8x e.bx=%#8.8x e.cx=%#8.8x e.dx=%#8.8x\n",
239 reg.e.ax, reg.e.bx, reg.e.cx, reg.e.dx );
241 (
":\neax: %#x %#x %#x %#x\nebx: %#x %#x %#x %#x\necx: %#x %#x %#x %#x\nedx: %#x %#x %#x %#x\n",
242 reg.byt[0], reg.byt[1], reg.byt[2], reg.byt[3], reg.byt[4],
243 reg.byt[5], reg.byt[6], reg.byt[7], reg.byt[8], reg.byt[9],
244 reg.byt[10], reg.byt[11], reg.byt[12], reg.byt[13], reg.byt[14],
249 if ( reg.byt[0] | reg.byt[1] ) {
253 ( ( ( short ) ( reg.byt[1] & 0xF ) << 8 ) + reg.byt[0] ) / 2;
265 if ( reg.byt[2] | reg.byt[3] ) {
269 ( ( ( short ) ( reg.byt[3] & 0xF ) << 8 ) + reg.byt[2] ) / 2;
282 if ( reg.byt[4] | reg.byt[5] ) {
285 ( ( short ) ( reg.byt[5] & 0xF ) << 8 ) + reg.byt[4];
290 if ( reg.byt[6] | reg.byt[7] ) {
293 ( ( short ) ( reg.byt[7] & 0xF ) << 8 ) + reg.byt[6];
303 L[1].
cache[0].
size = (
int ) ( ( reg.e.cx & 0xffff0000 ) >> 6 );
308 if ( L[1].cache[0].line_size )
311 MEMDBG(
"U-Cache Line Count: %d; Computed: %d\n", reg.byt[9] & 0xF,
319 L[2].
cache[0].
size = (
int ) ( reg.e.dx & 0xfffc0000 ) << 1;
324 if ( L[2].cache[0].line_size )
327 MEMDBG(
"U-Cache Line Count: %d; Computed: %d\n", reg.byt[13] & 0xF,
338 *num_levels = levels;
386 .associativity = SHRT_MAX,
709 .associativity = SHRT_MAX,
716 .size = {4, 2048, 4096},
717 .associativity = SHRT_MAX,
724 .size = {4, 2048, 4096},
725 .associativity = SHRT_MAX,
732 .size = {4, 2048, 4096},
733 .associativity = SHRT_MAX,
740 .size = {2048, 4096, 0},
741 .associativity = SHRT_MAX,
765 .associativity = SHRT_MAX,
772 .size = {2048, 4096, 0},
780 .size = {4, 4096, 0},
781 .associativity = SHRT_MAX,
788 .size = {4, 4096, 0},
789 .associativity = SHRT_MAX,
796 .size = {4, 4096, 0},
797 .associativity = SHRT_MAX,
995 .size = {2048, 4096, 0},
1000 {.descriptor = 0xB2,
1008 {.descriptor = 0xB3,
1016 {.descriptor = 0xB4,
1024 {.descriptor = 0xBA,
1032 {.descriptor = 0xBA,
1040 {.descriptor = 0xCA,
1048 {.descriptor = 0xD0,
1056 {.descriptor = 0xD1,
1064 {.descriptor = 0xD2,
1072 {.descriptor = 0xD6,
1080 {.descriptor = 0xD7,
1088 {.descriptor = 0xD8,
1096 {.descriptor = 0xDC,
1100 .associativity = 12,
1104 {.descriptor = 0xDD,
1108 .associativity = 12,
1112 {.descriptor = 0xDE,
1116 .associativity = 12,
1120 {.descriptor = 0xE2,
1124 .associativity = 16,
1128 {.descriptor = 0xE3,
1132 .associativity = 16,
1136 {.descriptor = 0xE4,
1140 .associativity = 16,
1144 {.descriptor = 0xEA,
1148 .associativity = 24,
1152 {.descriptor = 0xEB,
1156 .associativity = 24,
1160 {.descriptor = 0xEC,
1164 .associativity = 24,
1168 {.descriptor = 0xF0,
1174 {.descriptor = 0xF1,
1188 for (
i = 0;
i < k;
i++ ) {
1192 printf(
"\t Size(s): " );
1218 unsigned int r_eax, r_ebx, r_ecx, r_edx;
1220 cpuid( &r_eax, &r_ebx, &r_ecx, &r_edx );
1222 if ( ( r_eax & 0x0FFF3FF0 ) == 0xF60 )
1251 c->size = d->
size[0] << 10;
1255 c->num_lines =
c->size /
c->line_size;
1260#if defined(__amd64__) || defined(__x86_64__)
1262cpuid2(
unsigned int*eax,
unsigned int* ebx,
1263 unsigned int*ecx,
unsigned int *edx,
1264 unsigned int index,
unsigned int ecx_in )
1266 __asm__ __volatile__ (
"cpuid;"
1267 :
"=a" (*eax),
"=b" (*ebx),
"=c" (*ecx),
"=d" (*edx)
1268 :
"0" (index),
"2"(ecx_in) );
1272cpuid2 (
unsigned int* eax,
unsigned int* ebx,
1273 unsigned int* ecx,
unsigned int* edx,
1274 unsigned int index,
unsigned int ecx_in )
1276 unsigned int a,
b,
c,d;
1277 __asm__ __volatile__ (
".byte 0x53\n\tcpuid\n\tmovl %%ebx, %%esi\n\t.byte 0x5b"
1278 :
"=a" (
a),
"=S" (
b),
"=c" (
c),
"=d" (d) \
1279 :
"0" (index),
"2"(ecx_in) );
1280 *eax =
a; *ebx =
b; *ecx =
c; *edx = d;
1289 unsigned int eax, ebx, ecx, edx;
1290 unsigned int maxidx, ecx_in;
1293 int cache_type,cache_level,cache_selfinit,cache_fullyassoc;
1294 int cache_linesize,cache_partitions,cache_ways,cache_sets;
1300 cpuid2(&eax,&ebx,&ecx,&edx, 0, 0);
1304 MEMDBG(
"Warning! CPUID Index 4 not supported!\n");
1310 cpuid2(&eax,&ebx,&ecx,&edx, 4, ecx_in);
1316 cache_type=eax&0x1f;
1317 if (cache_type==0)
break;
1319 cache_level=(eax>>5)&0x3;
1320 cache_selfinit=(eax>>8)&0x1;
1321 cache_fullyassoc=(eax>>9)&0x1;
1323 cache_linesize=(ebx&0xfff)+1;
1324 cache_partitions=((ebx>>12)&0x3ff)+1;
1325 cache_ways=((ebx>>22)&0x3ff)+1;
1339 if (cache_level>*num_levels) *num_levels=cache_level;
1348 switch(cache_type) {
1349 case 1:
MEMDBG(
"L%d Data Cache\n",cache_level);
1352 case 2:
MEMDBG(
"L%d Instruction Cache\n",cache_level);
1355 case 3:
MEMDBG(
"L%d Unified Cache\n",cache_level);
1360 if (cache_selfinit) {
MEMDBG(
"\tSelf-init\n"); }
1361 if (cache_fullyassoc) {
MEMDBG(
"\tFully Associtative\n"); }
1366 MEMDBG(
"\tCache linesize: %d\n",cache_linesize);
1368 MEMDBG(
"\tCache partitions: %d\n",cache_partitions);
1369 MEMDBG(
"\tCache associaticity: %d\n",cache_ways);
1371 MEMDBG(
"\tCache sets: %d\n",cache_sets);
1372 MEMDBG(
"\tCache size = %dkB\n",
1373 (cache_ways*cache_partitions*cache_linesize*cache_sets)/1024);
1379 c->line_size=cache_linesize;
1380 if (cache_fullyassoc) {
1381 c->associativity=SHRT_MAX;
1384 c->associativity=cache_ways;
1386 c->size=(cache_ways*cache_partitions*cache_linesize*cache_sets);
1387 c->num_lines=cache_ways*cache_partitions*cache_sets;
1405 unsigned int ax, bx, cx, dx;
1407 unsigned char descrip[16];
1421 MEMDBG(
"Initializing Intel Cache and TLB descriptors\n" );
1429 cpuid( ®.e.ax, ®.e.bx, ®.e.cx, ®.e.dx );
1431 MEMDBG(
"e.ax=%#8.8x e.bx=%#8.8x e.cx=%#8.8x e.dx=%#8.8x\n",
1432 reg.e.ax, reg.e.bx, reg.e.cx, reg.e.dx );
1434 (
":\nd0: %#x %#x %#x %#x\nd1: %#x %#x %#x %#x\nd2: %#x %#x %#x %#x\nd3: %#x %#x %#x %#x\n",
1435 reg.descrip[0], reg.descrip[1], reg.descrip[2], reg.descrip[3],
1436 reg.descrip[4], reg.descrip[5], reg.descrip[6], reg.descrip[7],
1437 reg.descrip[8], reg.descrip[9], reg.descrip[10], reg.descrip[11],
1438 reg.descrip[12], reg.descrip[13], reg.descrip[14], reg.descrip[15] );
1440 count = reg.descrip[0];
1443 if (
count==0)
goto early_exit;
1446 MEMDBG(
"Repeat cpuid(2,...) %d times. If not 1, code is broken.\n",
1449 fprintf(
stderr,
"Warning: Unhandled cpuid count of %d\n",
count);
1452 for ( r = 0; r < 4; r++ ) {
1453 if ( ( reg.descrip[r * 4 + 3] & 0x80 ) == 0 ) {
1454 for (
b = 3;
b >= 0;
b-- ) {
1457 if ( reg.descrip[
i] == 0xff ) {
1458 MEMDBG(
"Warning! PAPI x86_cache: must implement cpuid leaf 4\n");
1464 for ( t = 0; t <
size; t++ ) {
1477 MEMDBG(
"# of Levels: %d\n", last_level );
1478 *num_levels=last_level;
1509 unsigned int eax, ebx, ecx, edx;
1510 char hyper_vendor_id[13];
1512 cpuid2(&eax, &ebx, &ecx, &edx,0x1,0);
1514 if (ecx&0x80000000) {
1518 cpuid2(&eax, &ebx, &ecx, &edx, 0x40000000,0);
1519 memcpy(hyper_vendor_id + 0, &ebx, 4);
1520 memcpy(hyper_vendor_id + 4, &ecx, 4);
1521 memcpy(hyper_vendor_id + 8, &edx, 4);
1522 hyper_vendor_id[12] =
'\0';
#define PAPI_MAX_MEM_HIERARCHY_LEVELS
static double a[MATRIX_SIZE][MATRIX_SIZE]
static double b[MATRIX_SIZE][MATRIX_SIZE]
static double c[MATRIX_SIZE][MATRIX_SIZE]
Return codes and api definitions.
#define PAPI_MH_TYPE_DATA
#define PAPI_MH_TYPE_PREF
#define PAPI_MH_MAX_LEVELS
#define PAPI_MH_TYPE_PSEUDO_LRU
#define PAPI_MH_CACHE_TYPE(a)
#define PAPI_MH_TYPE_INST
#define PAPI_MH_TYPE_EMPTY
#define PAPI_MH_TYPE_TRACE
#define PAPI_MH_TYPE_UNIFIED
#define MEMDBG(format, args...)
mh for mem hierarchy maybe?
PAPI_mh_level_t level[PAPI_MAX_MEM_HIERARCHY_LEVELS]
PAPI_mh_tlb_info_t tlb[PAPI_MH_MAX_LEVELS]
PAPI_mh_cache_info_t cache[PAPI_MH_MAX_LEVELS]
static int init_amd(PAPI_mh_info_t *mh_info, int *levels)
static void cpuid(unsigned int *a, unsigned int *b, unsigned int *c, unsigned int *d)
int _x86_detect_hypervisor(char *vendor_name)
static int init_intel(PAPI_mh_info_t *mh_info, int *levels)
static void intel_decode_descriptor(struct _intel_cache_info *d, PAPI_mh_level_t *L)
static int init_intel_leaf2(PAPI_mh_info_t *mh_info, int *num_levels)
static void print_intel_cache_table()
int _x86_cache_info(PAPI_mh_info_t *mh_info)
static void cpuid2(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx, unsigned int index, unsigned int ecx_in)
static struct _intel_cache_info intel_cache[]
static void init_mem_hierarchy(PAPI_mh_info_t *mh_info)
static int init_intel_leaf4(PAPI_mh_info_t *mh_info, int *num_levels)
static short int _amd_L2_L3_assoc(unsigned short int pattern)