PAPI 7.1.0.0
Loading...
Searching...
No Matches
x86_cpuid_info.c
Go to the documentation of this file.
1/****************************/
2/* THIS IS OPEN SOURCE CODE */
3/****************************/
4
5/*
6* File: x86_cpuid_info.c
7* Author: Dan Terpstra
8* terpstra@eecs.utk.edu
9* complete rewrite of linux-memory.c to conform to latest docs
10* and convert Intel to a table driven implementation.
11* Now also supports multiple TLB descriptors
12*/
13
14#include <string.h>
15#include <stdio.h>
16#include "papi.h"
17#include "papi_internal.h"
18
19static void init_mem_hierarchy( PAPI_mh_info_t * mh_info );
20static int init_amd( PAPI_mh_info_t * mh_info, int *levels );
21static short int _amd_L2_L3_assoc( unsigned short int pattern );
22static int init_intel( PAPI_mh_info_t * mh_info , int *levels);
23
24#if defined( __amd64__ ) || defined (__x86_64__)
25static inline void
26cpuid( unsigned int *a, unsigned int *b, unsigned int *c, unsigned int *d )
27{
28 unsigned int op = *a;
29 __asm__("cpuid;"
30 : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d)
31 : "a" (op) );
32}
33#else
34static inline void
35cpuid( unsigned int *a, unsigned int *b, unsigned int *c, unsigned int *d )
36{
37 unsigned int op = *a;
38 // .byte 0x53 == push ebx. it's universal for 32 and 64 bit
39 // .byte 0x5b == pop ebx.
40 // Some gcc's (4.1.2 on Core2) object to pairing push/pop and ebx in 64 bit mode.
41 // Using the opcode directly avoids this problem.
42 __asm__ __volatile__( ".byte 0x53\n\tcpuid\n\tmovl %%ebx, %%esi\n\t.byte 0x5b":"=a"( *a ), "=S"( *b ), "=c"( *c ),
43 "=d"
44 ( *d )
45 : "a"( op ) );
46}
47#endif
48
49int
51{
52 int retval = 0;
53 union
54 {
55 struct
56 {
57 unsigned int ax, bx, cx, dx;
58 } e;
59 char vendor[20]; /* leave room for terminator bytes */
60 } reg;
61
62 /* Don't use cpu_type to determine the processor.
63 * get the information directly from the chip.
64 */
65 reg.e.ax = 0; /* function code 0: vendor string */
66 /* The vendor string is composed of EBX:EDX:ECX.
67 * by swapping the register addresses in the call below,
68 * the string is correctly composed in the char array.
69 */
70 cpuid( &reg.e.ax, &reg.e.bx, &reg.e.dx, &reg.e.cx );
71 reg.vendor[16] = 0;
72 MEMDBG( "Vendor: %s\n", &reg.vendor[4] );
73
74 init_mem_hierarchy( mh_info );
75
76 if ( !strncmp( "GenuineIntel", &reg.vendor[4], 12 ) ) {
77 init_intel( mh_info, &mh_info->levels);
78 } else if ( !strncmp( "AuthenticAMD", &reg.vendor[4], 12 ) ) {
79 init_amd( mh_info, &mh_info->levels );
80 } else {
81 MEMDBG( "Unsupported cpu type; Not Intel or AMD x86\n" );
82 return PAPI_ENOIMPL;
83 }
84
85 /* This works only because an empty cache element is initialized to 0 */
86 MEMDBG( "Detected L1: %d L2: %d L3: %d\n",
87 mh_info->level[0].cache[0].size + mh_info->level[0].cache[1].size,
88 mh_info->level[1].cache[0].size + mh_info->level[1].cache[1].size,
89 mh_info->level[2].cache[0].size + mh_info->level[2].cache[1].size );
90 return retval;
91}
92
93static void
95{
96 int i, j;
97 PAPI_mh_level_t *L = mh_info->level;
98
99 /* initialize entire memory hierarchy structure to benign values */
100 for ( i = 0; i < PAPI_MAX_MEM_HIERARCHY_LEVELS; i++ ) {
101 for ( j = 0; j < PAPI_MH_MAX_LEVELS; j++ ) {
103 L[i].tlb[j].num_entries = 0;
104 L[i].tlb[j].associativity = 0;
106 L[i].cache[j].size = 0;
107 L[i].cache[j].line_size = 0;
108 L[i].cache[j].num_lines = 0;
109 L[i].cache[j].associativity = 0;
110 }
111 }
112}
113
114static short int
115_amd_L2_L3_assoc( unsigned short int pattern )
116{
117 /* From "CPUID Specification" #25481 Rev 2.28, April 2008 */
118 short int assoc[16] =
119 { 0, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, SHRT_MAX };
120 if ( pattern > 0xF )
121 return -1;
122 return ( assoc[pattern] );
123}
124
125/* Cache configuration for AMD Athlon/Duron */
126static int
127init_amd( PAPI_mh_info_t * mh_info, int *num_levels )
128{
129 union
130 {
131 struct
132 {
133 unsigned int ax, bx, cx, dx;
134 } e;
135 unsigned char byt[16];
136 } reg;
137 int i, j, levels = 0;
138 PAPI_mh_level_t *L = mh_info->level;
139
140 /*
141 * Layout of CPU information taken from :
142 * "CPUID Specification" #25481 Rev 2.28, April 2008 for most current info.
143 */
144
145 MEMDBG( "Initializing AMD memory info\n" );
146 /* AMD level 1 cache info */
147 reg.e.ax = 0x80000005; /* extended function code 5: L1 Cache and TLB Identifiers */
148 cpuid( &reg.e.ax, &reg.e.bx, &reg.e.cx, &reg.e.dx );
149
150 MEMDBG( "e.ax=%#8.8x e.bx=%#8.8x e.cx=%#8.8x e.dx=%#8.8x\n",
151 reg.e.ax, reg.e.bx, reg.e.cx, reg.e.dx );
152 MEMDBG
153 ( ":\neax: %#x %#x %#x %#x\nebx: %#x %#x %#x %#x\necx: %#x %#x %#x %#x\nedx: %#x %#x %#x %#x\n",
154 reg.byt[0], reg.byt[1], reg.byt[2], reg.byt[3], reg.byt[4],
155 reg.byt[5], reg.byt[6], reg.byt[7], reg.byt[8], reg.byt[9],
156 reg.byt[10], reg.byt[11], reg.byt[12], reg.byt[13], reg.byt[14],
157 reg.byt[15] );
158
159 /* NOTE: We assume L1 cache and TLB always exists */
160 /* L1 TLB info */
161
162 /* 4MB memory page information; half the number of entries as 2MB */
163 L[0].tlb[0].type = PAPI_MH_TYPE_INST;
164 L[0].tlb[0].num_entries = reg.byt[0] / 2;
165 L[0].tlb[0].page_size = 4096 << 10;
166 L[0].tlb[0].associativity = reg.byt[1];
167
168 L[0].tlb[1].type = PAPI_MH_TYPE_DATA;
169 L[0].tlb[1].num_entries = reg.byt[2] / 2;
170 L[0].tlb[1].page_size = 4096 << 10;
171 L[0].tlb[1].associativity = reg.byt[3];
172
173 /* 2MB memory page information */
174 L[0].tlb[2].type = PAPI_MH_TYPE_INST;
175 L[0].tlb[2].num_entries = reg.byt[0];
176 L[0].tlb[2].page_size = 2048 << 10;
177 L[0].tlb[2].associativity = reg.byt[1];
178
179 L[0].tlb[3].type = PAPI_MH_TYPE_DATA;
180 L[0].tlb[3].num_entries = reg.byt[2];
181 L[0].tlb[3].page_size = 2048 << 10;
182 L[0].tlb[3].associativity = reg.byt[3];
183
184 /* 4k page information */
185 L[0].tlb[4].type = PAPI_MH_TYPE_INST;
186 L[0].tlb[4].num_entries = reg.byt[4];
187 L[0].tlb[4].page_size = 4 << 10;
188 L[0].tlb[4].associativity = reg.byt[5];
189
190 L[0].tlb[5].type = PAPI_MH_TYPE_DATA;
191 L[0].tlb[5].num_entries = reg.byt[6];
192 L[0].tlb[5].page_size = 4 << 10;
193 L[0].tlb[5].associativity = reg.byt[7];
194
195 for ( i = 0; i < PAPI_MH_MAX_LEVELS; i++ ) {
196 if ( L[0].tlb[i].associativity == 0xff )
197 L[0].tlb[i].associativity = SHRT_MAX;
198 }
199
200 /* L1 D-cache info */
201 L[0].cache[0].type =
203 L[0].cache[0].size = reg.byt[11] << 10;
204 L[0].cache[0].associativity = reg.byt[10];
205 L[0].cache[0].line_size = reg.byt[8];
206 /* Byt[9] is "Lines per tag" */
207 /* Is that == lines per cache? */
208 /* L[0].cache[1].num_lines = reg.byt[9]; */
209 if ( L[0].cache[0].line_size )
210 L[0].cache[0].num_lines = L[0].cache[0].size / L[0].cache[0].line_size;
211 MEMDBG( "D-Cache Line Count: %d; Computed: %d\n", reg.byt[9],
212 L[0].cache[0].num_lines );
213
214 /* L1 I-cache info */
215 L[0].cache[1].type = PAPI_MH_TYPE_INST;
216 L[0].cache[1].size = reg.byt[15] << 10;
217 L[0].cache[1].associativity = reg.byt[14];
218 L[0].cache[1].line_size = reg.byt[12];
219 /* Byt[13] is "Lines per tag" */
220 /* Is that == lines per cache? */
221 /* L[0].cache[1].num_lines = reg.byt[13]; */
222 if ( L[0].cache[1].line_size )
223 L[0].cache[1].num_lines = L[0].cache[1].size / L[0].cache[1].line_size;
224 MEMDBG( "I-Cache Line Count: %d; Computed: %d\n", reg.byt[13],
225 L[0].cache[1].num_lines );
226
227 for ( i = 0; i < 2; i++ ) {
228 if ( L[0].cache[i].associativity == 0xff )
229 L[0].cache[i].associativity = SHRT_MAX;
230 }
231
232 /* AMD L2/L3 Cache and L2 TLB info */
233 /* NOTE: For safety we assume L2 and L3 cache and TLB may not exist */
234
235 reg.e.ax = 0x80000006; /* extended function code 6: L2/L3 Cache and L2 TLB Identifiers */
236 cpuid( &reg.e.ax, &reg.e.bx, &reg.e.cx, &reg.e.dx );
237
238 MEMDBG( "e.ax=%#8.8x e.bx=%#8.8x e.cx=%#8.8x e.dx=%#8.8x\n",
239 reg.e.ax, reg.e.bx, reg.e.cx, reg.e.dx );
240 MEMDBG
241 ( ":\neax: %#x %#x %#x %#x\nebx: %#x %#x %#x %#x\necx: %#x %#x %#x %#x\nedx: %#x %#x %#x %#x\n",
242 reg.byt[0], reg.byt[1], reg.byt[2], reg.byt[3], reg.byt[4],
243 reg.byt[5], reg.byt[6], reg.byt[7], reg.byt[8], reg.byt[9],
244 reg.byt[10], reg.byt[11], reg.byt[12], reg.byt[13], reg.byt[14],
245 reg.byt[15] );
246
247 /* L2 TLB info */
248
249 if ( reg.byt[0] | reg.byt[1] ) { /* Level 2 ITLB exists */
250 /* 4MB ITLB page information; half the number of entries as 2MB */
251 L[1].tlb[0].type = PAPI_MH_TYPE_INST;
252 L[1].tlb[0].num_entries =
253 ( ( ( short ) ( reg.byt[1] & 0xF ) << 8 ) + reg.byt[0] ) / 2;
254 L[1].tlb[0].page_size = 4096 << 10;
255 L[1].tlb[0].associativity =
256 _amd_L2_L3_assoc( ( reg.byt[1] & 0xF0 ) >> 4 );
257
258 /* 2MB ITLB page information */
259 L[1].tlb[2].type = PAPI_MH_TYPE_INST;
260 L[1].tlb[2].num_entries = L[1].tlb[0].num_entries * 2;
261 L[1].tlb[2].page_size = 2048 << 10;
262 L[1].tlb[2].associativity = L[1].tlb[0].associativity;
263 }
264
265 if ( reg.byt[2] | reg.byt[3] ) { /* Level 2 DTLB exists */
266 /* 4MB DTLB page information; half the number of entries as 2MB */
267 L[1].tlb[1].type = PAPI_MH_TYPE_DATA;
268 L[1].tlb[1].num_entries =
269 ( ( ( short ) ( reg.byt[3] & 0xF ) << 8 ) + reg.byt[2] ) / 2;
270 L[1].tlb[1].page_size = 4096 << 10;
271 L[1].tlb[1].associativity =
272 _amd_L2_L3_assoc( ( reg.byt[3] & 0xF0 ) >> 4 );
273
274 /* 2MB DTLB page information */
275 L[1].tlb[3].type = PAPI_MH_TYPE_DATA;
276 L[1].tlb[3].num_entries = L[1].tlb[1].num_entries * 2;
277 L[1].tlb[3].page_size = 2048 << 10;
278 L[1].tlb[3].associativity = L[1].tlb[1].associativity;
279 }
280
281 /* 4k page information */
282 if ( reg.byt[4] | reg.byt[5] ) { /* Level 2 ITLB exists */
283 L[1].tlb[4].type = PAPI_MH_TYPE_INST;
284 L[1].tlb[4].num_entries =
285 ( ( short ) ( reg.byt[5] & 0xF ) << 8 ) + reg.byt[4];
286 L[1].tlb[4].page_size = 4 << 10;
287 L[1].tlb[4].associativity =
288 _amd_L2_L3_assoc( ( reg.byt[5] & 0xF0 ) >> 4 );
289 }
290 if ( reg.byt[6] | reg.byt[7] ) { /* Level 2 DTLB exists */
291 L[1].tlb[5].type = PAPI_MH_TYPE_DATA;
292 L[1].tlb[5].num_entries =
293 ( ( short ) ( reg.byt[7] & 0xF ) << 8 ) + reg.byt[6];
294 L[1].tlb[5].page_size = 4 << 10;
295 L[1].tlb[5].associativity =
296 _amd_L2_L3_assoc( ( reg.byt[7] & 0xF0 ) >> 4 );
297 }
298
299 /* AMD Level 2 cache info */
300 if ( reg.e.cx ) {
301 L[1].cache[0].type =
303 L[1].cache[0].size = ( int ) ( ( reg.e.cx & 0xffff0000 ) >> 6 ); /* right shift by 16; multiply by 2^10 */
304 L[1].cache[0].associativity =
305 _amd_L2_L3_assoc( ( reg.byt[9] & 0xF0 ) >> 4 );
306 L[1].cache[0].line_size = reg.byt[8];
307/* L[1].cache[0].num_lines = reg.byt[9]&0xF; */
308 if ( L[1].cache[0].line_size )
309 L[1].cache[0].num_lines =
310 L[1].cache[0].size / L[1].cache[0].line_size;
311 MEMDBG( "U-Cache Line Count: %d; Computed: %d\n", reg.byt[9] & 0xF,
312 L[1].cache[0].num_lines );
313 }
314
315 /* AMD Level 3 cache info (shared across cores) */
316 if ( reg.e.dx ) {
317 L[2].cache[0].type =
319 L[2].cache[0].size = ( int ) ( reg.e.dx & 0xfffc0000 ) << 1; /* in blocks of 512KB (2^19) */
320 L[2].cache[0].associativity =
321 _amd_L2_L3_assoc( ( reg.byt[13] & 0xF0 ) >> 4 );
322 L[2].cache[0].line_size = reg.byt[12];
323/* L[2].cache[0].num_lines = reg.byt[13]&0xF; */
324 if ( L[2].cache[0].line_size )
325 L[2].cache[0].num_lines =
326 L[2].cache[0].size / L[2].cache[0].line_size;
327 MEMDBG( "U-Cache Line Count: %d; Computed: %d\n", reg.byt[13] & 0xF,
328 L[1].cache[0].num_lines );
329 }
330 for ( i = 0; i < PAPI_MAX_MEM_HIERARCHY_LEVELS; i++ ) {
331 for ( j = 0; j < PAPI_MH_MAX_LEVELS; j++ ) {
332 /* Compute the number of levels of hierarchy actually used */
333 if ( L[i].tlb[j].type != PAPI_MH_TYPE_EMPTY ||
334 L[i].cache[j].type != PAPI_MH_TYPE_EMPTY )
335 levels = i + 1;
336 }
337 }
338 *num_levels = levels;
339 return PAPI_OK;
340}
341
342 /*
343 * The data from this table now comes from figure 3-17 in
344 * the Intel Architectures Software Reference Manual 2A
345 * (cpuid instruction section)
346 *
347 * Pretviously the information was provided by
348 * "Intel® Processor Identification and the CPUID Instruction",
349 * Application Note, AP-485, Nov 2008, 241618-033
350 * Updated to AP-485, Aug 2009, 241618-036
351 *
352 * The following data structure and its instantiation trys to
353 * capture all the information in Section 2.1.3 of the above
354 * document. Not all of it is used by PAPI, but it could be.
355 * As the above document is revised, this table should be
356 * updated.
357 */
358
359#define TLB_SIZES 3 /* number of different page sizes for a single TLB descriptor */
361{
362 int descriptor; /* 0x00 - 0xFF: register descriptor code */
363 int level; /* 1 to PAPI_MH_MAX_LEVELS */
364 int type; /* Empty, instr, data, vector, unified | TLB */
365 int size[TLB_SIZES]; /* cache or TLB page size(s) in kB */
366 int associativity; /* SHRT_MAX == fully associative */
367 int sector; /* 1 if cache is sectored; else 0 */
368 int line_size; /* for cache */
369 int entries; /* for TLB */
370};
371
373// 0x01
374 {.descriptor = 0x01,
375 .level = 1,
377 .size[0] = 4,
378 .associativity = 4,
379 .entries = 32,
380 },
381// 0x02
382 {.descriptor = 0x02,
383 .level = 1,
385 .size[0] = 4096,
386 .associativity = SHRT_MAX,
387 .entries = 2,
388 },
389// 0x03
390 {.descriptor = 0x03,
391 .level = 1,
393 .size[0] = 4,
394 .associativity = 4,
395 .entries = 64,
396 },
397// 0x04
398 {.descriptor = 0x04,
399 .level = 1,
401 .size[0] = 4096,
402 .associativity = 4,
403 .entries = 8,
404 },
405// 0x05
406 {.descriptor = 0x05,
407 .level = 1,
409 .size[0] = 4096,
410 .associativity = 4,
411 .entries = 32,
412 },
413// 0x06
414 {.descriptor = 0x06,
415 .level = 1,
416 .type = PAPI_MH_TYPE_INST,
417 .size[0] = 8,
418 .associativity = 4,
419 .line_size = 32,
420 },
421// 0x08
422 {.descriptor = 0x08,
423 .level = 1,
424 .type = PAPI_MH_TYPE_INST,
425 .size[0] = 16,
426 .associativity = 4,
427 .line_size = 32,
428 },
429// 0x09
430 {.descriptor = 0x09,
431 .level = 1,
432 .type = PAPI_MH_TYPE_INST,
433 .size[0] = 32,
434 .associativity = 4,
435 .line_size = 64,
436 },
437// 0x0A
438 {.descriptor = 0x0A,
439 .level = 1,
440 .type = PAPI_MH_TYPE_DATA,
441 .size[0] = 8,
442 .associativity = 2,
443 .line_size = 32,
444 },
445// 0x0B
446 {.descriptor = 0x0B,
447 .level = 1,
449 .size[0] = 4096,
450 .associativity = 4,
451 .entries = 4,
452 },
453// 0x0C
454 {.descriptor = 0x0C,
455 .level = 1,
456 .type = PAPI_MH_TYPE_DATA,
457 .size[0] = 16,
458 .associativity = 4,
459 .line_size = 32,
460 },
461// 0x0D
462 {.descriptor = 0x0D,
463 .level = 1,
464 .type = PAPI_MH_TYPE_DATA,
465 .size[0] = 16,
466 .associativity = 4,
467 .line_size = 64,
468 },
469// 0x0E
470 {.descriptor = 0x0E,
471 .level = 1,
472 .type = PAPI_MH_TYPE_DATA,
473 .size[0] = 24,
474 .associativity = 6,
475 .line_size = 64,
476 },
477// 0x21
478 {.descriptor = 0x21,
479 .level = 2,
480 .type = PAPI_MH_TYPE_UNIFIED,
481 .size[0] = 256,
482 .associativity = 8,
483 .line_size = 64,
484 },
485// 0x22
486 {.descriptor = 0x22,
487 .level = 3,
488 .type = PAPI_MH_TYPE_UNIFIED,
489 .size[0] = 512,
490 .associativity = 4,
491 .sector = 1,
492 .line_size = 64,
493 },
494// 0x23
495 {.descriptor = 0x23,
496 .level = 3,
497 .type = PAPI_MH_TYPE_UNIFIED,
498 .size[0] = 1024,
499 .associativity = 8,
500 .sector = 1,
501 .line_size = 64,
502 },
503// 0x25
504 {.descriptor = 0x25,
505 .level = 3,
506 .type = PAPI_MH_TYPE_UNIFIED,
507 .size[0] = 2048,
508 .associativity = 8,
509 .sector = 1,
510 .line_size = 64,
511 },
512// 0x29
513 {.descriptor = 0x29,
514 .level = 3,
515 .type = PAPI_MH_TYPE_UNIFIED,
516 .size[0] = 4096,
517 .associativity = 8,
518 .sector = 1,
519 .line_size = 64,
520 },
521// 0x2C
522 {.descriptor = 0x2C,
523 .level = 1,
524 .type = PAPI_MH_TYPE_DATA,
525 .size[0] = 32,
526 .associativity = 8,
527 .line_size = 64,
528 },
529// 0x30
530 {.descriptor = 0x30,
531 .level = 1,
532 .type = PAPI_MH_TYPE_INST,
533 .size[0] = 32,
534 .associativity = 8,
535 .line_size = 64,
536 },
537// 0x39
538 {.descriptor = 0x39,
539 .level = 2,
540 .type = PAPI_MH_TYPE_UNIFIED,
541 .size[0] = 128,
542 .associativity = 4,
543 .sector = 1,
544 .line_size = 64,
545 },
546// 0x3A
547 {.descriptor = 0x3A,
548 .level = 2,
549 .type = PAPI_MH_TYPE_UNIFIED,
550 .size[0] = 192,
551 .associativity = 6,
552 .sector = 1,
553 .line_size = 64,
554 },
555// 0x3B
556 {.descriptor = 0x3B,
557 .level = 2,
558 .type = PAPI_MH_TYPE_UNIFIED,
559 .size[0] = 128,
560 .associativity = 2,
561 .sector = 1,
562 .line_size = 64,
563 },
564// 0x3C
565 {.descriptor = 0x3C,
566 .level = 2,
567 .type = PAPI_MH_TYPE_UNIFIED,
568 .size[0] = 256,
569 .associativity = 4,
570 .sector = 1,
571 .line_size = 64,
572 },
573// 0x3D
574 {.descriptor = 0x3D,
575 .level = 2,
576 .type = PAPI_MH_TYPE_UNIFIED,
577 .size[0] = 384,
578 .associativity = 6,
579 .sector = 1,
580 .line_size = 64,
581 },
582// 0x3E
583 {.descriptor = 0x3E,
584 .level = 2,
585 .type = PAPI_MH_TYPE_UNIFIED,
586 .size[0] = 512,
587 .associativity = 4,
588 .sector = 1,
589 .line_size = 64,
590 },
591// 0x40: no last level cache (??)
592// 0x41
593 {.descriptor = 0x41,
594 .level = 2,
595 .type = PAPI_MH_TYPE_UNIFIED,
596 .size[0] = 128,
597 .associativity = 4,
598 .line_size = 32,
599 },
600// 0x42
601 {.descriptor = 0x42,
602 .level = 2,
603 .type = PAPI_MH_TYPE_UNIFIED,
604 .size[0] = 256,
605 .associativity = 4,
606 .line_size = 32,
607 },
608// 0x43
609 {.descriptor = 0x43,
610 .level = 2,
611 .type = PAPI_MH_TYPE_UNIFIED,
612 .size[0] = 512,
613 .associativity = 4,
614 .line_size = 32,
615 },
616// 0x44
617 {.descriptor = 0x44,
618 .level = 2,
619 .type = PAPI_MH_TYPE_UNIFIED,
620 .size[0] = 1024,
621 .associativity = 4,
622 .line_size = 32,
623 },
624// 0x45
625 {.descriptor = 0x45,
626 .level = 2,
627 .type = PAPI_MH_TYPE_UNIFIED,
628 .size[0] = 2048,
629 .associativity = 4,
630 .line_size = 32,
631 },
632// 0x46
633 {.descriptor = 0x46,
634 .level = 3,
635 .type = PAPI_MH_TYPE_UNIFIED,
636 .size[0] = 4096,
637 .associativity = 4,
638 .line_size = 64,
639 },
640// 0x47
641 {.descriptor = 0x47,
642 .level = 3,
643 .type = PAPI_MH_TYPE_UNIFIED,
644 .size[0] = 8192,
645 .associativity = 8,
646 .line_size = 64,
647 },
648// 0x48
649 {.descriptor = 0x48,
650 .level = 2,
651 .type = PAPI_MH_TYPE_UNIFIED,
652 .size[0] = 3072,
653 .associativity = 12,
654 .line_size = 64,
655 },
656// 0x49 NOTE: for family 0x0F model 0x06 this is level 3
657 {.descriptor = 0x49,
658 .level = 2,
659 .type = PAPI_MH_TYPE_UNIFIED,
660 .size[0] = 4096,
661 .associativity = 16,
662 .line_size = 64,
663 },
664// 0x4A
665 {.descriptor = 0x4A,
666 .level = 3,
667 .type = PAPI_MH_TYPE_UNIFIED,
668 .size[0] = 6144,
669 .associativity = 12,
670 .line_size = 64,
671 },
672// 0x4B
673 {.descriptor = 0x4B,
674 .level = 3,
675 .type = PAPI_MH_TYPE_UNIFIED,
676 .size[0] = 8192,
677 .associativity = 16,
678 .line_size = 64,
679 },
680// 0x4C
681 {.descriptor = 0x4C,
682 .level = 3,
683 .type = PAPI_MH_TYPE_UNIFIED,
684 .size[0] = 12288,
685 .associativity = 12,
686 .line_size = 64,
687 },
688// 0x4D
689 {.descriptor = 0x4D,
690 .level = 3,
691 .type = PAPI_MH_TYPE_UNIFIED,
692 .size[0] = 16384,
693 .associativity = 16,
694 .line_size = 64,
695 },
696// 0x4E
697 {.descriptor = 0x4E,
698 .level = 2,
699 .type = PAPI_MH_TYPE_UNIFIED,
700 .size[0] = 6144,
701 .associativity = 24,
702 .line_size = 64,
703 },
704// 0x4F
705 {.descriptor = 0x4F,
706 .level = 1,
708 .size[0] = 4,
709 .associativity = SHRT_MAX,
710 .entries = 32,
711 },
712// 0x50
713 {.descriptor = 0x50,
714 .level = 1,
716 .size = {4, 2048, 4096},
717 .associativity = SHRT_MAX,
718 .entries = 64,
719 },
720// 0x51
721 {.descriptor = 0x51,
722 .level = 1,
724 .size = {4, 2048, 4096},
725 .associativity = SHRT_MAX,
726 .entries = 128,
727 },
728// 0x52
729 {.descriptor = 0x52,
730 .level = 1,
732 .size = {4, 2048, 4096},
733 .associativity = SHRT_MAX,
734 .entries = 256,
735 },
736// 0x55
737 {.descriptor = 0x55,
738 .level = 1,
740 .size = {2048, 4096, 0},
741 .associativity = SHRT_MAX,
742 .entries = 7,
743 },
744// 0x56
745 {.descriptor = 0x56,
746 .level = 1,
748 .size[0] = 4096,
749 .associativity = 4,
750 .entries = 16,
751 },
752// 0x57
753 {.descriptor = 0x57,
754 .level = 1,
756 .size[0] = 4,
757 .associativity = 4,
758 .entries = 16,
759 },
760// 0x59
761 {.descriptor = 0x59,
762 .level = 1,
764 .size[0] = 4,
765 .associativity = SHRT_MAX,
766 .entries = 16,
767 },
768// 0x5A
769 {.descriptor = 0x5A,
770 .level = 1,
772 .size = {2048, 4096, 0},
773 .associativity = 4,
774 .entries = 32,
775 },
776// 0x5B
777 {.descriptor = 0x5B,
778 .level = 1,
780 .size = {4, 4096, 0},
781 .associativity = SHRT_MAX,
782 .entries = 64,
783 },
784// 0x5C
785 {.descriptor = 0x5C,
786 .level = 1,
788 .size = {4, 4096, 0},
789 .associativity = SHRT_MAX,
790 .entries = 128,
791 },
792// 0x5D
793 {.descriptor = 0x5D,
794 .level = 1,
796 .size = {4, 4096, 0},
797 .associativity = SHRT_MAX,
798 .entries = 256,
799 },
800// 0x60
801 {.descriptor = 0x60,
802 .level = 1,
803 .type = PAPI_MH_TYPE_DATA,
804 .size[0] = 16,
805 .associativity = 8,
806 .sector = 1,
807 .line_size = 64,
808 },
809// 0x66
810 {.descriptor = 0x66,
811 .level = 1,
812 .type = PAPI_MH_TYPE_DATA,
813 .size[0] = 8,
814 .associativity = 4,
815 .sector = 1,
816 .line_size = 64,
817 },
818// 0x67
819 {.descriptor = 0x67,
820 .level = 1,
821 .type = PAPI_MH_TYPE_DATA,
822 .size[0] = 16,
823 .associativity = 4,
824 .sector = 1,
825 .line_size = 64,
826 },
827// 0x68
828 {.descriptor = 0x68,
829 .level = 1,
830 .type = PAPI_MH_TYPE_DATA,
831 .size[0] = 32,
832 .associativity = 4,
833 .sector = 1,
834 .line_size = 64,
835 },
836// 0x70
837 {.descriptor = 0x70,
838 .level = 1,
839 .type = PAPI_MH_TYPE_TRACE,
840 .size[0] = 12,
841 .associativity = 8,
842 },
843// 0x71
844 {.descriptor = 0x71,
845 .level = 1,
846 .type = PAPI_MH_TYPE_TRACE,
847 .size[0] = 16,
848 .associativity = 8,
849 },
850// 0x72
851 {.descriptor = 0x72,
852 .level = 1,
853 .type = PAPI_MH_TYPE_TRACE,
854 .size[0] = 32,
855 .associativity = 8,
856 },
857// 0x73
858 {.descriptor = 0x73,
859 .level = 1,
860 .type = PAPI_MH_TYPE_TRACE,
861 .size[0] = 64,
862 .associativity = 8,
863 },
864// 0x78
865 {.descriptor = 0x78,
866 .level = 2,
867 .type = PAPI_MH_TYPE_UNIFIED,
868 .size[0] = 1024,
869 .associativity = 4,
870 .line_size = 64,
871 },
872// 0x79
873 {.descriptor = 0x79,
874 .level = 2,
875 .type = PAPI_MH_TYPE_UNIFIED,
876 .size[0] = 128,
877 .associativity = 8,
878 .sector = 1,
879 .line_size = 64,
880 },
881// 0x7A
882 {.descriptor = 0x7A,
883 .level = 2,
884 .type = PAPI_MH_TYPE_UNIFIED,
885 .size[0] = 256,
886 .associativity = 8,
887 .sector = 1,
888 .line_size = 64,
889 },
890// 0x7B
891 {.descriptor = 0x7B,
892 .level = 2,
893 .type = PAPI_MH_TYPE_UNIFIED,
894 .size[0] = 512,
895 .associativity = 8,
896 .sector = 1,
897 .line_size = 64,
898 },
899// 0x7C
900 {.descriptor = 0x7C,
901 .level = 2,
902 .type = PAPI_MH_TYPE_UNIFIED,
903 .size[0] = 1024,
904 .associativity = 8,
905 .sector = 1,
906 .line_size = 64,
907 },
908// 0x7D
909 {.descriptor = 0x7D,
910 .level = 2,
911 .type = PAPI_MH_TYPE_UNIFIED,
912 .size[0] = 2048,
913 .associativity = 8,
914 .line_size = 64,
915 },
916// 0x7F
917 {.descriptor = 0x7F,
918 .level = 2,
919 .type = PAPI_MH_TYPE_UNIFIED,
920 .size[0] = 512,
921 .associativity = 2,
922 .line_size = 64,
923 },
924// 0x80
925 {.descriptor = 0x80,
926 .level = 2,
927 .type = PAPI_MH_TYPE_UNIFIED,
928 .size[0] = 512,
929 .associativity = 8,
930 .line_size = 64,
931 },
932// 0x82
933 {.descriptor = 0x82,
934 .level = 2,
935 .type = PAPI_MH_TYPE_UNIFIED,
936 .size[0] = 256,
937 .associativity = 8,
938 .line_size = 32,
939 },
940// 0x83
941 {.descriptor = 0x83,
942 .level = 2,
943 .type = PAPI_MH_TYPE_UNIFIED,
944 .size[0] = 512,
945 .associativity = 8,
946 .line_size = 32,
947 },
948// 0x84
949 {.descriptor = 0x84,
950 .level = 2,
951 .type = PAPI_MH_TYPE_UNIFIED,
952 .size[0] = 1024,
953 .associativity = 8,
954 .line_size = 32,
955 },
956// 0x85
957 {.descriptor = 0x85,
958 .level = 2,
959 .type = PAPI_MH_TYPE_UNIFIED,
960 .size[0] = 2048,
961 .associativity = 8,
962 .line_size = 32,
963 },
964// 0x86
965 {.descriptor = 0x86,
966 .level = 2,
967 .type = PAPI_MH_TYPE_UNIFIED,
968 .size[0] = 512,
969 .associativity = 4,
970 .line_size = 64,
971 },
972// 0x87
973 {.descriptor = 0x87,
974 .level = 2,
975 .type = PAPI_MH_TYPE_UNIFIED,
976 .size[0] = 1024,
977 .associativity = 8,
978 .line_size = 64,
979 },
980// 0xB0
981 {.descriptor = 0xB0,
982 .level = 1,
984 .size[0] = 4,
985 .associativity = 4,
986 .entries = 128,
987 },
988// 0xB1 NOTE: This is currently the only instance where .entries
989// is dependent on .size. It's handled as a code exception.
990// If other instances appear in the future, the structure
991// should probably change to accomodate it.
992 {.descriptor = 0xB1,
993 .level = 1,
995 .size = {2048, 4096, 0},
996 .associativity = 4,
997 .entries = 8, /* or 4 if size = 4096 */
998 },
999// 0xB2
1000 {.descriptor = 0xB2,
1001 .level = 1,
1003 .size[0] = 4,
1004 .associativity = 4,
1005 .entries = 64,
1006 },
1007// 0xB3
1008 {.descriptor = 0xB3,
1009 .level = 1,
1011 .size[0] = 4,
1012 .associativity = 4,
1013 .entries = 128,
1014 },
1015// 0xB4
1016 {.descriptor = 0xB4,
1017 .level = 1,
1019 .size[0] = 4,
1020 .associativity = 4,
1021 .entries = 256,
1022 },
1023// 0xBA
1024 {.descriptor = 0xBA,
1025 .level = 1,
1027 .size[0] = 4,
1028 .associativity = 4,
1029 .entries = 64,
1030 },
1031// 0xC0
1032 {.descriptor = 0xBA,
1033 .level = 1,
1035 .size = {4,4096},
1036 .associativity = 4,
1037 .entries = 8,
1038 },
1039// 0xCA
1040 {.descriptor = 0xCA,
1041 .level = 2,
1043 .size[0] = 4,
1044 .associativity = 4,
1045 .entries = 512,
1046 },
1047// 0xD0
1048 {.descriptor = 0xD0,
1049 .level = 3,
1050 .type = PAPI_MH_TYPE_UNIFIED,
1051 .size[0] = 512,
1052 .associativity = 4,
1053 .line_size = 64,
1054 },
1055// 0xD1
1056 {.descriptor = 0xD1,
1057 .level = 3,
1058 .type = PAPI_MH_TYPE_UNIFIED,
1059 .size[0] = 1024,
1060 .associativity = 4,
1061 .line_size = 64,
1062 },
1063// 0xD2
1064 {.descriptor = 0xD2,
1065 .level = 3,
1066 .type = PAPI_MH_TYPE_UNIFIED,
1067 .size[0] = 2048,
1068 .associativity = 4,
1069 .line_size = 64,
1070 },
1071// 0xD6
1072 {.descriptor = 0xD6,
1073 .level = 3,
1074 .type = PAPI_MH_TYPE_UNIFIED,
1075 .size[0] = 1024,
1076 .associativity = 8,
1077 .line_size = 64,
1078 },
1079// 0xD7
1080 {.descriptor = 0xD7,
1081 .level = 3,
1082 .type = PAPI_MH_TYPE_UNIFIED,
1083 .size[0] = 2048,
1084 .associativity = 8,
1085 .line_size = 64,
1086 },
1087// 0xD8
1088 {.descriptor = 0xD8,
1089 .level = 3,
1090 .type = PAPI_MH_TYPE_UNIFIED,
1091 .size[0] = 4096,
1092 .associativity = 8,
1093 .line_size = 64,
1094 },
1095// 0xDC
1096 {.descriptor = 0xDC,
1097 .level = 3,
1098 .type = PAPI_MH_TYPE_UNIFIED,
1099 .size[0] = 1536,
1100 .associativity = 12,
1101 .line_size = 64,
1102 },
1103// 0xDD
1104 {.descriptor = 0xDD,
1105 .level = 3,
1106 .type = PAPI_MH_TYPE_UNIFIED,
1107 .size[0] = 3072,
1108 .associativity = 12,
1109 .line_size = 64,
1110 },
1111// 0xDE
1112 {.descriptor = 0xDE,
1113 .level = 3,
1114 .type = PAPI_MH_TYPE_UNIFIED,
1115 .size[0] = 6144,
1116 .associativity = 12,
1117 .line_size = 64,
1118 },
1119// 0xE2
1120 {.descriptor = 0xE2,
1121 .level = 3,
1122 .type = PAPI_MH_TYPE_UNIFIED,
1123 .size[0] = 2048,
1124 .associativity = 16,
1125 .line_size = 64,
1126 },
1127// 0xE3
1128 {.descriptor = 0xE3,
1129 .level = 3,
1130 .type = PAPI_MH_TYPE_UNIFIED,
1131 .size[0] = 4096,
1132 .associativity = 16,
1133 .line_size = 64,
1134 },
1135// 0xE4
1136 {.descriptor = 0xE4,
1137 .level = 3,
1138 .type = PAPI_MH_TYPE_UNIFIED,
1139 .size[0] = 8192,
1140 .associativity = 16,
1141 .line_size = 64,
1142 },
1143// 0xEA
1144 {.descriptor = 0xEA,
1145 .level = 3,
1146 .type = PAPI_MH_TYPE_UNIFIED,
1147 .size[0] = 12288,
1148 .associativity = 24,
1149 .line_size = 64,
1150 },
1151// 0xEB
1152 {.descriptor = 0xEB,
1153 .level = 3,
1154 .type = PAPI_MH_TYPE_UNIFIED,
1155 .size[0] = 18432,
1156 .associativity = 24,
1157 .line_size = 64,
1158 },
1159// 0xEC
1160 {.descriptor = 0xEC,
1161 .level = 3,
1162 .type = PAPI_MH_TYPE_UNIFIED,
1163 .size[0] = 24576,
1164 .associativity = 24,
1165 .line_size = 64,
1166 },
1167// 0xF0
1168 {.descriptor = 0xF0,
1169 .level = 1,
1170 .type = PAPI_MH_TYPE_PREF,
1171 .size[0] = 64,
1172 },
1173// 0xF1
1174 {.descriptor = 0xF1,
1175 .level = 1,
1176 .type = PAPI_MH_TYPE_PREF,
1177 .size[0] = 128,
1178 },
1179};
1180
1181#ifdef DEBUG
1182static void
1184{
1185 int i, j, k =
1186 ( int ) ( sizeof ( intel_cache ) /
1187 sizeof ( struct _intel_cache_info ) );
1188 for ( i = 0; i < k; i++ ) {
1189 printf( "%d.\tDescriptor: %#x\n", i, intel_cache[i].descriptor );
1190 printf( "\t Level: %d\n", intel_cache[i].level );
1191 printf( "\t Type: %d\n", intel_cache[i].type );
1192 printf( "\t Size(s): " );
1193 for ( j = 0; j < TLB_SIZES; j++ )
1194 printf( "%d, ", intel_cache[i].size[j] );
1195 printf( "\n" );
1196 printf( "\t Assoc: %d\n", intel_cache[i].associativity );
1197 printf( "\t Sector: %d\n", intel_cache[i].sector );
1198 printf( "\t Line Size: %d\n", intel_cache[i].line_size );
1199 printf( "\t Entries: %d\n", intel_cache[i].entries );
1200 printf( "\n" );
1201 }
1202}
1203#endif
1204
1205/* Given a specific cache descriptor, this routine decodes the information from a table
1206 * of such descriptors and fills out one or more records in a PAPI data structure.
1207 * Called only by init_intel()
1208 */
1209static void
1211{
1212 int i, next;
1213 int level = d->level - 1;
1216
1217 if ( d->descriptor == 0x49 ) { /* special case */
1218 unsigned int r_eax, r_ebx, r_ecx, r_edx;
1219 r_eax = 0x1; /* function code 1: family & model */
1220 cpuid( &r_eax, &r_ebx, &r_ecx, &r_edx );
1221 /* override table for Family F, model 6 only */
1222 if ( ( r_eax & 0x0FFF3FF0 ) == 0xF60 )
1223 level = 3;
1224 }
1225 if ( d->type & PAPI_MH_TYPE_TLB ) {
1226 for ( next = 0; next < PAPI_MH_MAX_LEVELS - 1; next++ ) {
1227 if ( L[level].tlb[next].type == PAPI_MH_TYPE_EMPTY )
1228 break;
1229 }
1230 /* expand TLB entries for multiple possible page sizes */
1231 for ( i = 0; i < TLB_SIZES && next < PAPI_MH_MAX_LEVELS && d->size[i];
1232 i++, next++ ) {
1233// printf("Level %d Descriptor: %#x TLB type %#x next: %d, i: %d\n", level, d->descriptor, d->type, next, i);
1234 t = &L[level].tlb[next];
1235 t->type = PAPI_MH_CACHE_TYPE( d->type );
1236 t->num_entries = d->entries;
1237 t->page_size = d->size[i] << 10; /* minimum page size in KB */
1239 /* another special case */
1240 if ( d->descriptor == 0xB1 && d->size[i] == 4096 )
1241 t->num_entries = d->entries / 2;
1242 }
1243 } else {
1244 for ( next = 0; next < PAPI_MH_MAX_LEVELS - 1; next++ ) {
1245 if ( L[level].cache[next].type == PAPI_MH_TYPE_EMPTY )
1246 break;
1247 }
1248// printf("Level %d Descriptor: %#x Cache type %#x next: %d\n", level, d->descriptor, d->type, next);
1249 c = &L[level].cache[next];
1250 c->type = PAPI_MH_CACHE_TYPE( d->type );
1251 c->size = d->size[0] << 10; /* convert from KB to bytes */
1252 c->associativity = d->associativity;
1253 if ( d->line_size ) {
1254 c->line_size = d->line_size;
1255 c->num_lines = c->size / c->line_size;
1256 }
1257 }
1258}
1259
1260#if defined(__amd64__) || defined(__x86_64__)
1261static inline void
1262cpuid2( unsigned int*eax, unsigned int* ebx,
1263 unsigned int*ecx, unsigned int *edx,
1264 unsigned int index, unsigned int ecx_in )
1265{
1266 __asm__ __volatile__ ("cpuid;"
1267 : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx)
1268 : "0" (index), "2"(ecx_in) );
1269}
1270#else
1271static inline void
1272cpuid2 ( unsigned int* eax, unsigned int* ebx,
1273 unsigned int* ecx, unsigned int* edx,
1274 unsigned int index, unsigned int ecx_in )
1275{
1276 unsigned int a,b,c,d;
1277 __asm__ __volatile__ (".byte 0x53\n\tcpuid\n\tmovl %%ebx, %%esi\n\t.byte 0x5b"
1278 : "=a" (a), "=S" (b), "=c" (c), "=d" (d) \
1279 : "0" (index), "2"(ecx_in) );
1280 *eax = a; *ebx = b; *ecx = c; *edx = d;
1281}
1282#endif
1283
1284
1285static int
1286init_intel_leaf4( PAPI_mh_info_t * mh_info, int *num_levels )
1287{
1288
1289 unsigned int eax, ebx, ecx, edx;
1290 unsigned int maxidx, ecx_in;
1291 int next;
1292
1293 int cache_type,cache_level,cache_selfinit,cache_fullyassoc;
1294 int cache_linesize,cache_partitions,cache_ways,cache_sets;
1295
1297
1298 *num_levels=0;
1299
1300 cpuid2(&eax,&ebx,&ecx,&edx, 0, 0);
1301 maxidx = eax;
1302
1303 if (maxidx<4) {
1304 MEMDBG("Warning! CPUID Index 4 not supported!\n");
1305 return PAPI_ENOSUPP;
1306 }
1307
1308 ecx_in=0;
1309 while(1) {
1310 cpuid2(&eax,&ebx,&ecx,&edx, 4, ecx_in);
1311
1312
1313
1314 /* decoded as per table 3-12 in Intel Software Developer's Manual Volume 2A */
1315
1316 cache_type=eax&0x1f;
1317 if (cache_type==0) break;
1318
1319 cache_level=(eax>>5)&0x3;
1320 cache_selfinit=(eax>>8)&0x1;
1321 cache_fullyassoc=(eax>>9)&0x1;
1322
1323 cache_linesize=(ebx&0xfff)+1;
1324 cache_partitions=((ebx>>12)&0x3ff)+1;
1325 cache_ways=((ebx>>22)&0x3ff)+1;
1326
1327 cache_sets=(ecx)+1;
1328
1329 /* should we export this info?
1330
1331 cache_maxshare=((eax>>14)&0xfff)+1;
1332 cache_maxpackage=((eax>>26)&0x3f)+1;
1333
1334 cache_wb=(edx)&1;
1335 cache_inclusive=(edx>>1)&1;
1336 cache_indexing=(edx>>2)&1;
1337 */
1338
1339 if (cache_level>*num_levels) *num_levels=cache_level;
1340
1341 /* find next slot available to hold cache info */
1342 for ( next = 0; next < PAPI_MH_MAX_LEVELS - 1; next++ ) {
1343 if ( mh_info->level[cache_level-1].cache[next].type == PAPI_MH_TYPE_EMPTY ) break;
1344 }
1345
1346 c=&(mh_info->level[cache_level-1].cache[next]);
1347
1348 switch(cache_type) {
1349 case 1: MEMDBG("L%d Data Cache\n",cache_level);
1350 c->type=PAPI_MH_TYPE_DATA;
1351 break;
1352 case 2: MEMDBG("L%d Instruction Cache\n",cache_level);
1353 c->type=PAPI_MH_TYPE_INST;
1354 break;
1355 case 3: MEMDBG("L%d Unified Cache\n",cache_level);
1356 c->type=PAPI_MH_TYPE_UNIFIED;
1357 break;
1358 }
1359
1360 if (cache_selfinit) { MEMDBG("\tSelf-init\n"); }
1361 if (cache_fullyassoc) { MEMDBG("\tFully Associtative\n"); }
1362
1363 //MEMDBG("\tMax logical processors sharing cache: %d\n",cache_maxshare);
1364 //MEMDBG("\tMax logical processors sharing package: %d\n",cache_maxpackage);
1365
1366 MEMDBG("\tCache linesize: %d\n",cache_linesize);
1367
1368 MEMDBG("\tCache partitions: %d\n",cache_partitions);
1369 MEMDBG("\tCache associaticity: %d\n",cache_ways);
1370
1371 MEMDBG("\tCache sets: %d\n",cache_sets);
1372 MEMDBG("\tCache size = %dkB\n",
1373 (cache_ways*cache_partitions*cache_linesize*cache_sets)/1024);
1374
1375 //MEMDBG("\tWBINVD/INVD acts on lower caches: %d\n",cache_wb);
1376 //MEMDBG("\tCache is not inclusive: %d\n",cache_inclusive);
1377 //MEMDBG("\tComplex cache indexing: %d\n",cache_indexing);
1378
1379 c->line_size=cache_linesize;
1380 if (cache_fullyassoc) {
1381 c->associativity=SHRT_MAX;
1382 }
1383 else {
1384 c->associativity=cache_ways;
1385 }
1386 c->size=(cache_ways*cache_partitions*cache_linesize*cache_sets);
1387 c->num_lines=cache_ways*cache_partitions*cache_sets;
1388
1389 ecx_in++;
1390 }
1391 return PAPI_OK;
1392}
1393
1394static int
1395init_intel_leaf2( PAPI_mh_info_t * mh_info , int *num_levels)
1396{
1397 /* cpuid() returns memory copies of 4 32-bit registers
1398 * this union allows them to be accessed as either registers
1399 * or individual bytes. Remember that Intel is little-endian.
1400 */
1401 union
1402 {
1403 struct
1404 {
1405 unsigned int ax, bx, cx, dx;
1406 } e;
1407 unsigned char descrip[16];
1408 } reg;
1409
1410 int r; /* register boundary index */
1411 int b; /* byte index into a register */
1412 int i; /* byte index into the descrip array */
1413 int t; /* table index into the static descriptor table */
1414 int count; /* how many times to call cpuid; from eax:lsb */
1415 int size; /* size of the descriptor table */
1416 int last_level = 0; /* how many levels in the cache hierarchy */
1417
1418 /* All of Intel's cache info is in 1 call to cpuid
1419 * however it is a table lookup :(
1420 */
1421 MEMDBG( "Initializing Intel Cache and TLB descriptors\n" );
1422
1423#ifdef DEBUG
1424 if ( ISLEVEL( DEBUG_MEMORY ) )
1426#endif
1427
1428 reg.e.ax = 0x2; /* function code 2: cache descriptors */
1429 cpuid( &reg.e.ax, &reg.e.bx, &reg.e.cx, &reg.e.dx );
1430
1431 MEMDBG( "e.ax=%#8.8x e.bx=%#8.8x e.cx=%#8.8x e.dx=%#8.8x\n",
1432 reg.e.ax, reg.e.bx, reg.e.cx, reg.e.dx );
1433 MEMDBG
1434 ( ":\nd0: %#x %#x %#x %#x\nd1: %#x %#x %#x %#x\nd2: %#x %#x %#x %#x\nd3: %#x %#x %#x %#x\n",
1435 reg.descrip[0], reg.descrip[1], reg.descrip[2], reg.descrip[3],
1436 reg.descrip[4], reg.descrip[5], reg.descrip[6], reg.descrip[7],
1437 reg.descrip[8], reg.descrip[9], reg.descrip[10], reg.descrip[11],
1438 reg.descrip[12], reg.descrip[13], reg.descrip[14], reg.descrip[15] );
1439
1440 count = reg.descrip[0]; /* # times to repeat CPUID call. Not implemented. */
1441
1442 /* Knights Corner at least returns 0 here */
1443 if (count==0) goto early_exit;
1444
1445 size = ( sizeof ( intel_cache ) / sizeof ( struct _intel_cache_info ) ); /* # descriptors */
1446 MEMDBG( "Repeat cpuid(2,...) %d times. If not 1, code is broken.\n",
1447 count );
1448 if (count!=1) {
1449 fprintf(stderr,"Warning: Unhandled cpuid count of %d\n",count);
1450 }
1451
1452 for ( r = 0; r < 4; r++ ) { /* walk the registers */
1453 if ( ( reg.descrip[r * 4 + 3] & 0x80 ) == 0 ) { /* only process if high order bit is 0 */
1454 for ( b = 3; b >= 0; b-- ) { /* walk the descriptor bytes from high to low */
1455 i = r * 4 + b; /* calculate an index into the array of descriptors */
1456 if ( i ) { /* skip the low order byte in eax [0]; it's the count (see above) */
1457 if ( reg.descrip[i] == 0xff ) {
1458 MEMDBG("Warning! PAPI x86_cache: must implement cpuid leaf 4\n");
1459 return PAPI_ENOSUPP;
1460 /* we might continue instead */
1461 /* in order to get TLB info */
1462 /* continue; */
1463 }
1464 for ( t = 0; t < size; t++ ) { /* walk the descriptor table */
1465 if ( reg.descrip[i] == intel_cache[t].descriptor ) { /* find match */
1466 if ( intel_cache[t].level > last_level )
1467 last_level = intel_cache[t].level;
1469 mh_info->level );
1470 }
1471 }
1472 }
1473 }
1474 }
1475 }
1476early_exit:
1477 MEMDBG( "# of Levels: %d\n", last_level );
1478 *num_levels=last_level;
1479 return PAPI_OK;
1480}
1481
1482
1483static int
1484init_intel( PAPI_mh_info_t * mh_info, int *levels )
1485{
1486
1487 int result;
1488 int num_levels;
1489
1490 /* try using the oldest leaf2 method first */
1491 result=init_intel_leaf2(mh_info, &num_levels);
1492
1493 if (result!=PAPI_OK) {
1494 /* All Core2 and newer also support leaf4 detection */
1495 /* Starting with Westmere *only* leaf4 is supported */
1496 result=init_intel_leaf4(mh_info, &num_levels);
1497 }
1498
1499 *levels=num_levels;
1500 return PAPI_OK;
1501}
1502
1503
1504/* Returns 1 if hypervisor detected */
1505/* Returns 0 if none found. */
1506int
1507_x86_detect_hypervisor(char *vendor_name)
1508{
1509 unsigned int eax, ebx, ecx, edx;
1510 char hyper_vendor_id[13];
1511
1512 cpuid2(&eax, &ebx, &ecx, &edx,0x1,0);
1513 /* This is the hypervisor bit, ecx bit 31 */
1514 if (ecx&0x80000000) {
1515 /* There are various values in the 0x4000000X range */
1516 /* It is questionable how standard they are */
1517 /* For now we just return the name. */
1518 cpuid2(&eax, &ebx, &ecx, &edx, 0x40000000,0);
1519 memcpy(hyper_vendor_id + 0, &ebx, 4);
1520 memcpy(hyper_vendor_id + 4, &ecx, 4);
1521 memcpy(hyper_vendor_id + 8, &edx, 4);
1522 hyper_vendor_id[12] = '\0';
1523 strncpy(vendor_name,hyper_vendor_id,PAPI_MAX_STR_LEN);
1524 return 1;
1525 }
1526 else {
1527 strncpy(vendor_name,"none",PAPI_MAX_STR_LEN);
1528 }
1529 return 0;
1530}
volatile int result
int i
static long count
#define PAPI_OK
Definition: f90papi.h:73
#define PAPI_ENOSUPP
Definition: f90papi.h:244
#define PAPI_MAX_STR_LEN
Definition: f90papi.h:77
#define PAPI_MAX_MEM_HIERARCHY_LEVELS
Definition: f90papi.h:103
#define PAPI_ENOIMPL
Definition: f90papi.h:219
static double a[MATRIX_SIZE][MATRIX_SIZE]
Definition: libmsr_basic.c:38
static double b[MATRIX_SIZE][MATRIX_SIZE]
Definition: libmsr_basic.c:39
static double c[MATRIX_SIZE][MATRIX_SIZE]
Definition: libmsr_basic.c:40
uint16_t type
Return codes and api definitions.
#define PAPI_MH_TYPE_DATA
Definition: papi.h:720
#define PAPI_MH_TYPE_PREF
Definition: papi.h:734
#define PAPI_MH_TYPE_TLB
Definition: papi.h:733
#define PAPI_MH_MAX_LEVELS
Definition: papi.h:739
#define PAPI_MH_TYPE_PSEUDO_LRU
Definition: papi.h:730
#define PAPI_MH_CACHE_TYPE(a)
Definition: papi.h:724
#define PAPI_MH_TYPE_WB
Definition: papi.h:726
#define PAPI_MH_TYPE_WT
Definition: papi.h:725
#define PAPI_MH_TYPE_INST
Definition: papi.h:719
#define PAPI_MH_TYPE_EMPTY
Definition: papi.h:718
#define PAPI_MH_TYPE_TRACE
Definition: papi.h:722
#define PAPI_MH_TYPE_UNIFIED
Definition: papi.h:723
#define MEMDBG(format, args...)
Definition: papi_debug.h:71
#define ISLEVEL(a)
Definition: papi_debug.h:55
#define DEBUG_MEMORY
Definition: papi_debug.h:34
FILE * stderr
int
Definition: sde_internal.h:89
mh for mem hierarchy maybe?
Definition: papi.h:767
int levels
Definition: papi.h:768
PAPI_mh_level_t level[PAPI_MAX_MEM_HIERARCHY_LEVELS]
Definition: papi.h:769
PAPI_mh_tlb_info_t tlb[PAPI_MH_MAX_LEVELS]
Definition: papi.h:761
PAPI_mh_cache_info_t cache[PAPI_MH_MAX_LEVELS]
Definition: papi.h:762
int associativity
Definition: papi.h:747
int size[TLB_SIZES]
static int init_amd(PAPI_mh_info_t *mh_info, int *levels)
static void cpuid(unsigned int *a, unsigned int *b, unsigned int *c, unsigned int *d)
int _x86_detect_hypervisor(char *vendor_name)
static int init_intel(PAPI_mh_info_t *mh_info, int *levels)
static void intel_decode_descriptor(struct _intel_cache_info *d, PAPI_mh_level_t *L)
static int init_intel_leaf2(PAPI_mh_info_t *mh_info, int *num_levels)
static void print_intel_cache_table()
int _x86_cache_info(PAPI_mh_info_t *mh_info)
static void cpuid2(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx, unsigned int index, unsigned int ecx_in)
static struct _intel_cache_info intel_cache[]
static void init_mem_hierarchy(PAPI_mh_info_t *mh_info)
#define TLB_SIZES
static int init_intel_leaf4(PAPI_mh_info_t *mh_info, int *num_levels)
static short int _amd_L2_L3_assoc(unsigned short int pattern)
int retval
Definition: zero_fork.c:53