PAPI 7.1.0.0
Loading...
Searching...
No Matches
perfctr-x86.c
Go to the documentation of this file.
1/*
2* File: perfctr-x86.c
3* Author: Brian Sheely
4* bsheely@eecs.utk.edu
5* Mods: <your name here>
6* <your email address>
7*/
8
9#include <string.h>
10#include <linux/unistd.h>
11
12#include "papi.h"
13#include "papi_memory.h"
14#include "papi_internal.h"
15#include "perfctr-x86.h"
16#include "perfmon/pfmlib.h"
17#include "extras.h"
18#include "papi_vector.h"
19#include "papi_libpfm_events.h"
20
21#include "papi_preset.h"
22#include "linux-memory.h"
23
24/* Contains source for the Modified Bipartite Allocation scheme */
25#include "papi_bipartite.h"
26
27/* Prototypes for entry points found in perfctr.c */
28extern int _perfctr_init_component( int );
29extern int _perfctr_ctl( hwd_context_t * ctx, int code,
30 _papi_int_option_t * option );
32 void *context );
33
34extern int _perfctr_init_thread( hwd_context_t * ctx );
35extern int _perfctr_shutdown_thread( hwd_context_t * ctx );
36
37#include "linux-common.h"
38#include "linux-timer.h"
39
41
43
44#if defined(PERFCTR26)
45#define evntsel_aux p4.escr
46#endif
47
48#if defined(PAPI_PENTIUM4_VEC_MMX)
49#define P4_VEC "MMX"
50#else
51#define P4_VEC "SSE"
52#endif
53
54#if defined(PAPI_PENTIUM4_FP_X87)
55#define P4_FPU " X87"
56#elif defined(PAPI_PENTIUM4_FP_X87_SSE_SP)
57#define P4_FPU " X87 SSE_SP"
58#elif defined(PAPI_PENTIUM4_FP_SSE_SP_DP)
59#define P4_FPU " SSE_SP SSE_DP"
60#else
61#define P4_FPU " X87 SSE_DP"
62#endif
63
64/* CODE TO SUPPORT CUSTOMIZABLE FP COUNTS ON OPTERON */
65#if defined(PAPI_OPTERON_FP_RETIRED)
66#define AMD_FPU "RETIRED"
67#elif defined(PAPI_OPTERON_FP_SSE_SP)
68#define AMD_FPU "SSE_SP"
69#elif defined(PAPI_OPTERON_FP_SSE_DP)
70#define AMD_FPU "SSE_DP"
71#else
72#define AMD_FPU "SPECULATIVE"
73#endif
74
75static inline int is_pentium4(void) {
78 return 1;
79 }
80
81 return 0;
82
83}
84
85#ifdef DEBUG
86static void
88{
89 SUBDBG( "X86_reg_alloc:\n" );
90 SUBDBG( " selector: %#x\n", a->ra_selector );
91 SUBDBG( " rank: %#x\n", a->ra_rank );
92 SUBDBG( " escr: %#x %#x\n", a->ra_escr[0], a->ra_escr[1] );
93}
94
95void
96print_control( const struct perfctr_cpu_control *control )
97{
98 unsigned int i;
99 SUBDBG( "Control used:\n" );
100 SUBDBG( "tsc_on\t\t\t%u\n", control->tsc_on );
101 SUBDBG( "nractrs\t\t\t%u\n", control->nractrs );
102 SUBDBG( "nrictrs\t\t\t%u\n", control->nrictrs );
103
104 for ( i = 0; i < ( control->nractrs + control->nrictrs ); ++i ) {
105 if ( control->pmc_map[i] >= 18 ) {
106 SUBDBG( "pmc_map[%u]\t\t0x%08X\n", i, control->pmc_map[i] );
107 } else {
108 SUBDBG( "pmc_map[%u]\t\t%u\n", i, control->pmc_map[i] );
109 }
110 SUBDBG( "evntsel[%u]\t\t0x%08X\n", i, control->evntsel[i] );
111 if ( control->ireset[i] ) {
112 SUBDBG( "ireset[%u]\t%d\n", i, control->ireset[i] );
113 }
114 }
115}
116#endif
117
118static int
120{
121 int i, def_mode = 0;
122
123 if ( is_pentium4() ) {
125 def_mode |= ESCR_T0_USR;
127 def_mode |= ESCR_T0_OS;
128
129 for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
130 ptr->control.cpu_control.evntsel_aux[i] |= def_mode;
131 }
132 ptr->control.cpu_control.tsc_on = 1;
133 ptr->control.cpu_control.nractrs = 0;
134 ptr->control.cpu_control.nrictrs = 0;
135
136#ifdef VPERFCTR_CONTROL_CLOEXEC
137 ptr->control.flags = VPERFCTR_CONTROL_CLOEXEC;
138 SUBDBG( "close on exec\t\t\t%u\n", ptr->control.flags );
139#endif
140 } else {
141
143 def_mode |= PERF_USR;
145 def_mode |= PERF_OS;
146
147 ptr->allocated_registers.selector = 0;
149 case PERFCTR_X86_GENERIC:
150 case PERFCTR_X86_WINCHIP_C6:
151 case PERFCTR_X86_WINCHIP_2:
152 case PERFCTR_X86_VIA_C3:
153 case PERFCTR_X86_INTEL_P5:
154 case PERFCTR_X86_INTEL_P5MMX:
155 case PERFCTR_X86_INTEL_PII:
156 case PERFCTR_X86_INTEL_P6:
157 case PERFCTR_X86_INTEL_PIII:
158#ifdef PERFCTR_X86_INTEL_CORE
159 case PERFCTR_X86_INTEL_CORE:
160#endif
161#ifdef PERFCTR_X86_INTEL_PENTM
162 case PERFCTR_X86_INTEL_PENTM:
163#endif
164 ptr->control.cpu_control.evntsel[0] |= PERF_ENABLE;
165 for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
166 ptr->control.cpu_control.evntsel[i] |= def_mode;
167 ptr->control.cpu_control.pmc_map[i] = ( unsigned int ) i;
168 }
169 break;
170#ifdef PERFCTR_X86_INTEL_CORE2
171 case PERFCTR_X86_INTEL_CORE2:
172#endif
173#ifdef PERFCTR_X86_INTEL_ATOM
174 case PERFCTR_X86_INTEL_ATOM:
175#endif
176#ifdef PERFCTR_X86_INTEL_NHLM
177 case PERFCTR_X86_INTEL_NHLM:
178#endif
179#ifdef PERFCTR_X86_INTEL_WSTMR
180 case PERFCTR_X86_INTEL_WSTMR:
181#endif
182#ifdef PERFCTR_X86_AMD_K8
183 case PERFCTR_X86_AMD_K8:
184#endif
185#ifdef PERFCTR_X86_AMD_K8C
186 case PERFCTR_X86_AMD_K8C:
187#endif
188#ifdef PERFCTR_X86_AMD_FAM10H /* this is defined in perfctr 2.6.29 */
189 case PERFCTR_X86_AMD_FAM10H:
190#endif
191 case PERFCTR_X86_AMD_K7:
192 for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
193 ptr->control.cpu_control.evntsel[i] |= PERF_ENABLE | def_mode;
194 ptr->control.cpu_control.pmc_map[i] = ( unsigned int ) i;
195 }
196 break;
197 }
198#ifdef VPERFCTR_CONTROL_CLOEXEC
199 ptr->control.flags = VPERFCTR_CONTROL_CLOEXEC;
200 SUBDBG( "close on exec\t\t\t%u\n", ptr->control.flags );
201#endif
202
203 /* Make sure the TSC is always on */
204 ptr->control.cpu_control.tsc_on = 1;
205 }
206 return ( PAPI_OK );
207}
208
209int
211{
212 int i, did = 0;
213 int num_cntrs = _perfctr_vector.cmp_info.num_cntrs;
214
215 /* Clear the current domain set for this event set */
216 /* We don't touch the Enable bit in this code */
217 if ( is_pentium4() ) {
218 for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
219 cntrl->control.cpu_control.evntsel_aux[i] &=
220 ~( ESCR_T0_OS | ESCR_T0_USR );
221 }
222
223 if ( domain & PAPI_DOM_USER ) {
224 did = 1;
225 for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
226 cntrl->control.cpu_control.evntsel_aux[i] |= ESCR_T0_USR;
227 }
228 }
229
230 if ( domain & PAPI_DOM_KERNEL ) {
231 did = 1;
232 for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
233 cntrl->control.cpu_control.evntsel_aux[i] |= ESCR_T0_OS;
234 }
235 }
236 } else {
237 for ( i = 0; i < num_cntrs; i++ ) {
238 cntrl->control.cpu_control.evntsel[i] &= ~( PERF_OS | PERF_USR );
239 }
240
241 if ( domain & PAPI_DOM_USER ) {
242 did = 1;
243 for ( i = 0; i < num_cntrs; i++ ) {
244 cntrl->control.cpu_control.evntsel[i] |= PERF_USR;
245 }
246 }
247
248 if ( domain & PAPI_DOM_KERNEL ) {
249 did = 1;
250 for ( i = 0; i < num_cntrs; i++ ) {
251 cntrl->control.cpu_control.evntsel[i] |= PERF_OS;
252 }
253 }
254 }
255
256 if ( !did )
257 return ( PAPI_EINVAL );
258 else
259 return ( PAPI_OK );
260}
261
262/* This function examines the event to determine
263 if it can be mapped to counter ctr.
264 Returns true if it can, false if it can't. */
265static int
267{
268 return ( int ) ( dst->ra_selector & ( 1 << ctr ) );
269}
270
271/* This function forces the event to
272 be mapped to only counter ctr.
273 Returns nothing. */
274static void
276{
277 dst->ra_selector = ( unsigned int ) ( 1 << ctr );
278 dst->ra_rank = 1;
279
280 if ( is_pentium4() ) {
281 /* Pentium 4 requires that both an escr and a counter are selected.
282 Find which counter mask contains this counter.
283 Set the opposite escr to empty (-1) */
284 if ( dst->ra_bits.counter[0] & dst->ra_selector )
285 dst->ra_escr[1] = -1;
286 else
287 dst->ra_escr[0] = -1;
288 }
289}
290
291/* This function examines the event to determine
292 if it has a single exclusive mapping.
293 Returns true if exlusive, false if non-exclusive. */
294static int
296{
297 return ( dst->ra_rank == 1 );
298}
299
300/* This function compares the dst and src events
301 to determine if any resources are shared. Typically the src event
302 is exclusive, so this detects a conflict if true.
303 Returns true if conflict, false if no conflict. */
304static int
306{
307 if ( is_pentium4() ) {
308 int retval1, retval2;
309 /* Pentium 4 needs to check for conflict of both counters and esc registers */
310 /* selectors must share bits */
311 retval1 = ( ( dst->ra_selector & src->ra_selector ) ||
312 /* or escrs must equal each other and not be set to -1 */
313 ( ( dst->ra_escr[0] == src->ra_escr[0] ) &&
314 ( ( int ) dst->ra_escr[0] != -1 ) ) ||
315 ( ( dst->ra_escr[1] == src->ra_escr[1] ) &&
316 ( ( int ) dst->ra_escr[1] != -1 ) ) );
317 /* Pentium 4 also needs to check for conflict on pebs registers */
318 /* pebs enables must both be non-zero */
319 retval2 =
320 ( ( ( dst->ra_bits.pebs_enable && src->ra_bits.pebs_enable ) &&
321 /* and not equal to each other */
322 ( dst->ra_bits.pebs_enable != src->ra_bits.pebs_enable ) ) ||
323 /* same for pebs_matrix_vert */
324 ( ( dst->ra_bits.pebs_matrix_vert &&
325 src->ra_bits.pebs_matrix_vert ) &&
326 ( dst->ra_bits.pebs_matrix_vert !=
327 src->ra_bits.pebs_matrix_vert ) ) );
328 if ( retval2 ) {
329 SUBDBG( "pebs conflict!\n" );
330 }
331 return ( retval1 | retval2 );
332 }
333
334 return ( int ) ( dst->ra_selector & src->ra_selector );
335}
336
337/* This function removes shared resources available to the src event
338 from the resources available to the dst event,
339 and reduces the rank of the dst event accordingly. Typically,
340 the src event will be exclusive, but the code shouldn't assume it.
341 Returns nothing. */
342static void
344{
345 int i;
346 unsigned shared;
347
348 if ( is_pentium4() ) {
349#ifdef DEBUG
350 SUBDBG( "src, dst\n" );
351 print_alloc( src );
352 print_alloc( dst );
353#endif
354
355 /* check for a pebs conflict */
356 /* pebs enables must both be non-zero */
357 i = ( ( ( dst->ra_bits.pebs_enable && src->ra_bits.pebs_enable ) &&
358 /* and not equal to each other */
359 ( dst->ra_bits.pebs_enable != src->ra_bits.pebs_enable ) ) ||
360 /* same for pebs_matrix_vert */
361 ( ( dst->ra_bits.pebs_matrix_vert &&
362 src->ra_bits.pebs_matrix_vert )
363 && ( dst->ra_bits.pebs_matrix_vert !=
364 src->ra_bits.pebs_matrix_vert ) ) );
365 if ( i ) {
366 SUBDBG( "pebs conflict! clearing selector\n" );
367 dst->ra_selector = 0;
368 return;
369 } else {
370 /* remove counters referenced by any shared escrs */
371 if ( ( dst->ra_escr[0] == src->ra_escr[0] ) &&
372 ( ( int ) dst->ra_escr[0] != -1 ) ) {
373 dst->ra_selector &= ~dst->ra_bits.counter[0];
374 dst->ra_escr[0] = -1;
375 }
376 if ( ( dst->ra_escr[1] == src->ra_escr[1] ) &&
377 ( ( int ) dst->ra_escr[1] != -1 ) ) {
378 dst->ra_selector &= ~dst->ra_bits.counter[1];
379 dst->ra_escr[1] = -1;
380 }
381
382 /* remove any remaining shared counters */
383 shared = ( dst->ra_selector & src->ra_selector );
384 if ( shared )
385 dst->ra_selector ^= shared;
386 }
387 /* recompute rank */
388 for ( i = 0, dst->ra_rank = 0; i < MAX_COUNTERS; i++ )
389 if ( dst->ra_selector & ( 1 << i ) )
390 dst->ra_rank++;
391#ifdef DEBUG
392 SUBDBG( "new dst\n" );
393 print_alloc( dst );
394#endif
395 } else {
396 shared = dst->ra_selector & src->ra_selector;
397 if ( shared )
398 dst->ra_selector ^= shared;
399 for ( i = 0, dst->ra_rank = 0; i < MAX_COUNTERS; i++ )
400 if ( dst->ra_selector & ( 1 << i ) )
401 dst->ra_rank++;
402 }
403}
404
405static void
407{
408 dst->ra_selector = src->ra_selector;
409
410 if ( is_pentium4() ) {
411 dst->ra_escr[0] = src->ra_escr[0];
412 dst->ra_escr[1] = src->ra_escr[1];
413 }
414}
415
416/* Register allocation */
417static int
419{
420 int i, j, natNum;
421 hwd_reg_alloc_t event_list[MAX_COUNTERS];
422 hwd_register_t *ptr;
423
424 /* Initialize the local structure needed
425 for counter allocation and optimization. */
426 natNum = ESI->NativeCount;
427
428 if ( is_pentium4() ) {
429 SUBDBG( "native event count: %d\n", natNum );
430 }
431
432 for ( i = 0; i < natNum; i++ ) {
433 /* retrieve the mapping information about this native event */
435 ni_event, &event_list[i].ra_bits );
436
437 if ( is_pentium4() ) {
438 /* combine counter bit masks for both esc registers into selector */
439 event_list[i].ra_selector =
440 event_list[i].ra_bits.counter[0] | event_list[i].ra_bits.
441 counter[1];
442 } else {
443 /* make sure register allocator only looks at legal registers */
444 event_list[i].ra_selector =
445 event_list[i].ra_bits.selector & ALLCNTRS;
446#ifdef PERFCTR_X86_INTEL_CORE2
448 PERFCTR_X86_INTEL_CORE2 )
449 event_list[i].ra_selector |=
450 ( ( event_list[i].ra_bits.
451 selector >> 16 ) << 2 ) & ALLCNTRS;
452#endif
453 }
454 /* calculate native event rank, which is no. of counters it can live on */
455 event_list[i].ra_rank = 0;
456 for ( j = 0; j < MAX_COUNTERS; j++ ) {
457 if ( event_list[i].ra_selector & ( 1 << j ) ) {
458 event_list[i].ra_rank++;
459 }
460 }
461
462 if ( is_pentium4() ) {
463 event_list[i].ra_escr[0] = event_list[i].ra_bits.escr[0];
464 event_list[i].ra_escr[1] = event_list[i].ra_bits.escr[1];
465#ifdef DEBUG
466 SUBDBG( "i: %d\n", i );
467 print_alloc( &event_list[i] );
468#endif
469 }
470 }
471 if ( _papi_bipartite_alloc( event_list, natNum, ESI->CmpIdx ) ) { /* successfully mapped */
472 for ( i = 0; i < natNum; i++ ) {
473#ifdef PERFCTR_X86_INTEL_CORE2
475 PERFCTR_X86_INTEL_CORE2 )
476 event_list[i].ra_bits.selector = event_list[i].ra_selector;
477#endif
478#ifdef DEBUG
479 if ( is_pentium4() ) {
480 SUBDBG( "i: %d\n", i );
481 print_alloc( &event_list[i] );
482 }
483#endif
484 /* Copy all info about this native event to the NativeInfo struct */
485 ptr = ESI->NativeInfoArray[i].ni_bits;
486 *ptr = event_list[i].ra_bits;
487
488 if ( is_pentium4() ) {
489 /* The selector contains the counter bit position. Turn it into a number
490 and store it in the first counter value, zeroing the second. */
491 ptr->counter[0] = ffs( event_list[i].ra_selector ) - 1;
492 ptr->counter[1] = 0;
493 }
494
495 /* Array order on perfctr is event ADD order, not counter #... */
497 }
498 return PAPI_OK;
499 } else
500 return PAPI_ECNFLCT;
501}
502
503static void
505{
506 unsigned int i, j;
507
508 /* total counters is sum of accumulating (nractrs) and interrupting (nrictrs) */
509 j = this_state->control.cpu_control.nractrs +
510 this_state->control.cpu_control.nrictrs;
511
512 /* Remove all counter control command values from eventset. */
513 for ( i = 0; i < j; i++ ) {
514 SUBDBG( "Clearing pmc event entry %d\n", i );
515 if ( is_pentium4() ) {
516 this_state->control.cpu_control.pmc_map[i] = 0;
517 this_state->control.cpu_control.evntsel[i] = 0;
518 this_state->control.cpu_control.evntsel_aux[i] =
519 this_state->control.cpu_control.
520 evntsel_aux[i] & ( ESCR_T0_OS | ESCR_T0_USR );
521 } else {
522 this_state->control.cpu_control.pmc_map[i] = i;
523 this_state->control.cpu_control.evntsel[i]
524 = this_state->control.cpu_control.
525 evntsel[i] & ( PERF_ENABLE | PERF_OS | PERF_USR );
526 }
527 this_state->control.cpu_control.ireset[i] = 0;
528 }
529
530 if ( is_pentium4() ) {
531 /* Clear pebs stuff */
532 this_state->control.cpu_control.p4.pebs_enable = 0;
533 this_state->control.cpu_control.p4.pebs_matrix_vert = 0;
534 }
535
536 /* clear both a and i counter counts */
537 this_state->control.cpu_control.nractrs = 0;
538 this_state->control.cpu_control.nrictrs = 0;
539
540#ifdef DEBUG
541 if ( is_pentium4() )
542 print_control( &this_state->control.cpu_control );
543#endif
544}
545
546/* This function clears the current contents of the control structure and
547 updates it with whatever resources are allocated for all the native events
548 in the native info structure array. */
549static int
552 hwd_context_t * ctx )
553{
554 ( void ) ctx; /*unused */
555 unsigned int i, k, retval = PAPI_OK;
556 hwd_register_t *bits,*bits2;
557 struct perfctr_cpu_control *cpu_control = &this_state->control.cpu_control;
558
559 /* clear out the events from the control state */
560 clear_cs_events( this_state );
561
562 if ( is_pentium4() ) {
563 /* fill the counters we're using */
564 for ( i = 0; i < ( unsigned int ) count; i++ ) {
565 /* dereference the mapping information about this native event */
566 bits = native[i].ni_bits;
567
568 /* Add counter control command values to eventset */
569 cpu_control->pmc_map[i] = bits->counter[0];
570 cpu_control->evntsel[i] = bits->cccr;
571 cpu_control->ireset[i] = bits->ireset;
572 cpu_control->pmc_map[i] |= FAST_RDPMC;
573 cpu_control->evntsel_aux[i] |= bits->event;
574
575 /* pebs_enable and pebs_matrix_vert are shared registers used for replay_events.
576 Replay_events count L1 and L2 cache events. There is only one of each for
577 the entire eventset. Therefore, there can be only one unique replay_event
578 per eventset. This means L1 and L2 can't be counted together. Which stinks.
579 This conflict should be trapped in the allocation scheme, but we'll test for it
580 here too, just in case. */
581 if ( bits->pebs_enable ) {
582 /* if pebs_enable isn't set, just copy */
583 if ( cpu_control->p4.pebs_enable == 0 ) {
584 cpu_control->p4.pebs_enable = bits->pebs_enable;
585 /* if pebs_enable conflicts, flag an error */
586 } else if ( cpu_control->p4.pebs_enable != bits->pebs_enable ) {
587 SUBDBG
588 ( "WARNING: P4_update_control_state -- pebs_enable conflict!" );
590 }
591 /* if pebs_enable == bits->pebs_enable, do nothing */
592 }
593 if ( bits->pebs_matrix_vert ) {
594 /* if pebs_matrix_vert isn't set, just copy */
595 if ( cpu_control->p4.pebs_matrix_vert == 0 ) {
596 cpu_control->p4.pebs_matrix_vert = bits->pebs_matrix_vert;
597 /* if pebs_matrix_vert conflicts, flag an error */
598 } else if ( cpu_control->p4.pebs_matrix_vert !=
599 bits->pebs_matrix_vert ) {
600 SUBDBG
601 ( "WARNING: P4_update_control_state -- pebs_matrix_vert conflict!" );
603 }
604 /* if pebs_matrix_vert == bits->pebs_matrix_vert, do nothing */
605 }
606 }
607 this_state->control.cpu_control.nractrs = count;
608
609 /* Make sure the TSC is always on */
610 this_state->control.cpu_control.tsc_on = 1;
611
612#ifdef DEBUG
613 print_control( &this_state->control.cpu_control );
614#endif
615 } else {
617#ifdef PERFCTR_X86_INTEL_CORE2
618 case PERFCTR_X86_INTEL_CORE2:
619 /* fill the counters we're using */
620 for ( i = 0; i < ( unsigned int ) count; i++ ) {
621 bits2 = native[i].ni_bits;
622 for ( k = 0; k < MAX_COUNTERS; k++ )
623 if ( bits2->selector & ( 1 << k ) ) {
624 break;
625 }
626 if ( k > 1 )
627 this_state->control.cpu_control.pmc_map[i] =
628 ( k - 2 ) | 0x40000000;
629 else
630 this_state->control.cpu_control.pmc_map[i] = k;
631
632 /* Add counter control command values to eventset */
633 this_state->control.cpu_control.evntsel[i] |=
634 bits2->counter_cmd;
635 }
636 break;
637#endif
638 default:
639 /* fill the counters we're using */
640 for ( i = 0; i < ( unsigned int ) count; i++ ) {
641 /* Add counter control command values to eventset */
642 bits2 = native[i].ni_bits;
643 this_state->control.cpu_control.evntsel[i] |=
644 bits2->counter_cmd;
645 }
646 }
647 this_state->control.cpu_control.nractrs = ( unsigned int ) count;
648 }
649 return retval;
650}
651
652static int
654{
655 int error;
656#ifdef DEBUG
657 print_control( &state->control.cpu_control );
658#endif
659
660 if ( state->rvperfctr != NULL ) {
661 if ( ( error =
662 rvperfctr_control( state->rvperfctr, &state->control ) ) < 0 ) {
663 SUBDBG( "rvperfctr_control returns: %d\n", error );
665 return ( PAPI_ESYS );
666 }
667 return ( PAPI_OK );
668 }
669
670 if ( ( error = vperfctr_control( ctx->perfctr, &state->control ) ) < 0 ) {
671 SUBDBG( "vperfctr_control returns: %d\n", error );
673 return ( PAPI_ESYS );
674 }
675 return ( PAPI_OK );
676}
677
678static int
680{
681 int error;
682
683 if ( state->rvperfctr != NULL ) {
684 if ( rvperfctr_stop( ( struct rvperfctr * ) ctx->perfctr ) < 0 ) {
686 return ( PAPI_ESYS );
687 }
688 return ( PAPI_OK );
689 }
690
691 error = vperfctr_stop( ctx->perfctr );
692 if ( error < 0 ) {
693 SUBDBG( "vperfctr_stop returns: %d\n", error );
695 return ( PAPI_ESYS );
696 }
697 return ( PAPI_OK );
698}
699
700static int
701_x86_read( hwd_context_t * ctx, hwd_control_state_t * spc, long long **dp,
702 int flags )
703{
704 if ( flags & PAPI_PAUSED ) {
705 vperfctr_read_state( ctx->perfctr, &spc->state, NULL );
706 if ( !is_pentium4() ) {
707 unsigned int i = 0;
708 for ( i = 0;
709 i <
710 spc->control.cpu_control.nractrs +
711 spc->control.cpu_control.nrictrs; i++ ) {
712 SUBDBG( "vperfctr_read_state: counter %d = %lld\n", i,
713 spc->state.pmc[i] );
714 }
715 }
716 } else {
717 SUBDBG( "vperfctr_read_ctrs\n" );
718 if ( spc->rvperfctr != NULL ) {
719 rvperfctr_read_ctrs( spc->rvperfctr, &spc->state );
720 } else {
721 vperfctr_read_ctrs( ctx->perfctr, &spc->state );
722 }
723 }
724 *dp = ( long long * ) spc->state.pmc;
725#ifdef DEBUG
726 {
727 if ( ISLEVEL( DEBUG_SUBSTRATE ) ) {
728 unsigned int i;
729 if ( is_pentium4() ) {
730 for ( i = 0; i < spc->control.cpu_control.nractrs; i++ ) {
731 SUBDBG( "raw val hardware index %d is %lld\n", i,
732 ( long long ) spc->state.pmc[i] );
733 }
734 } else {
735 for ( i = 0;
736 i <
737 spc->control.cpu_control.nractrs +
738 spc->control.cpu_control.nrictrs; i++ ) {
739 SUBDBG( "raw val hardware index %d is %lld\n", i,
740 ( long long ) spc->state.pmc[i] );
741 }
742 }
743 }
744 }
745#endif
746 return ( PAPI_OK );
747}
748
749static int
751{
752 return ( _x86_start( ctx, cntrl ) );
753}
754
755/* Perfctr requires that interrupting counters appear at the end of the pmc list
756 In the case a user wants to interrupt on a counter in an evntset that is not
757 among the last events, we need to move the perfctr virtual events around to
758 make it last. This function swaps two perfctr events, and then adjust the
759 position entries in both the NativeInfoArray and the EventInfoArray to keep
760 everything consistent. */
761static void
762swap_events( EventSetInfo_t * ESI, struct hwd_pmc_control *contr, int cntr1,
763 int cntr2 )
764{
765 unsigned int ui;
766 int si, i, j;
767
768 for ( i = 0; i < ESI->NativeCount; i++ ) {
769 if ( ESI->NativeInfoArray[i].ni_position == cntr1 )
770 ESI->NativeInfoArray[i].ni_position = cntr2;
771 else if ( ESI->NativeInfoArray[i].ni_position == cntr2 )
772 ESI->NativeInfoArray[i].ni_position = cntr1;
773 }
774
775 for ( i = 0; i < ESI->NumberOfEvents; i++ ) {
776 for ( j = 0; ESI->EventInfoArray[i].pos[j] >= 0; j++ ) {
777 if ( ESI->EventInfoArray[i].pos[j] == cntr1 )
778 ESI->EventInfoArray[i].pos[j] = cntr2;
779 else if ( ESI->EventInfoArray[i].pos[j] == cntr2 )
780 ESI->EventInfoArray[i].pos[j] = cntr1;
781 }
782 }
783
784 ui = contr->cpu_control.pmc_map[cntr1];
785 contr->cpu_control.pmc_map[cntr1] = contr->cpu_control.pmc_map[cntr2];
786 contr->cpu_control.pmc_map[cntr2] = ui;
787
788 ui = contr->cpu_control.evntsel[cntr1];
789 contr->cpu_control.evntsel[cntr1] = contr->cpu_control.evntsel[cntr2];
790 contr->cpu_control.evntsel[cntr2] = ui;
791
792 if ( is_pentium4() ) {
793 ui = contr->cpu_control.evntsel_aux[cntr1];
794 contr->cpu_control.evntsel_aux[cntr1] =
795 contr->cpu_control.evntsel_aux[cntr2];
796 contr->cpu_control.evntsel_aux[cntr2] = ui;
797 }
798
799 si = contr->cpu_control.ireset[cntr1];
800 contr->cpu_control.ireset[cntr1] = contr->cpu_control.ireset[cntr2];
801 contr->cpu_control.ireset[cntr2] = si;
802}
803
804static int
805_x86_set_overflow( EventSetInfo_t *ESI, int EventIndex, int threshold )
806{
808 struct hwd_pmc_control *contr = &(ctl->control);
809 int i, ncntrs, nricntrs = 0, nracntrs = 0, retval = 0;
810 OVFDBG( "EventIndex=%d\n", EventIndex );
811
812#ifdef DEBUG
813 if ( is_pentium4() )
814 print_control( &(contr->cpu_control) );
815#endif
816
817 /* The correct event to overflow is EventIndex */
819 i = ESI->EventInfoArray[EventIndex].pos[0];
820
821 if ( i >= ncntrs ) {
822 PAPIERROR( "Selector id %d is larger than ncntrs %d", i, ncntrs );
823 return PAPI_EINVAL;
824 }
825
826 if ( threshold != 0 ) { /* Set an overflow threshold */
830 if ( retval != PAPI_OK )
831 return ( retval );
832
833 /* overflow interrupt occurs on the NEXT event after overflow occurs
834 thus we subtract 1 from the threshold. */
835 contr->cpu_control.ireset[i] = ( -threshold + 1 );
836
837 if ( is_pentium4() )
838 contr->cpu_control.evntsel[i] |= CCCR_OVF_PMI_T0;
839 else
840 contr->cpu_control.evntsel[i] |= PERF_INT_ENABLE;
841
842 contr->cpu_control.nrictrs++;
843 contr->cpu_control.nractrs--;
844 nricntrs = ( int ) contr->cpu_control.nrictrs;
845 nracntrs = ( int ) contr->cpu_control.nractrs;
847
848 /* move this event to the bottom part of the list if needed */
849 if ( i < nracntrs )
850 swap_events( ESI, contr, i, nracntrs );
851 OVFDBG( "Modified event set\n" );
852 } else {
853 if ( is_pentium4() && contr->cpu_control.evntsel[i] & CCCR_OVF_PMI_T0 ) {
854 contr->cpu_control.ireset[i] = 0;
855 contr->cpu_control.evntsel[i] &= ( ~CCCR_OVF_PMI_T0 );
856 contr->cpu_control.nrictrs--;
857 contr->cpu_control.nractrs++;
858 } else if ( !is_pentium4() &&
859 contr->cpu_control.evntsel[i] & PERF_INT_ENABLE ) {
860 contr->cpu_control.ireset[i] = 0;
861 contr->cpu_control.evntsel[i] &= ( ~PERF_INT_ENABLE );
862 contr->cpu_control.nrictrs--;
863 contr->cpu_control.nractrs++;
864 }
865
866 nricntrs = ( int ) contr->cpu_control.nrictrs;
867 nracntrs = ( int ) contr->cpu_control.nractrs;
868
869 /* move this event to the top part of the list if needed */
870 if ( i >= nracntrs )
871 swap_events( ESI, contr, i, nracntrs - 1 );
872
873 if ( !nricntrs )
874 contr->si_signo = 0;
875
876 OVFDBG( "Modified event set\n" );
877
879 }
880
881#ifdef DEBUG
882 if ( is_pentium4() )
883 print_control( &(contr->cpu_control) );
884#endif
885 OVFDBG( "End of call. Exit code: %d\n", retval );
886 return ( retval );
887}
888
889static int
891{
892 ( void ) master; /*unused */
893 ( void ) ESI; /*unused */
894 return ( PAPI_OK );
895}
896
897
898
899/* these define cccr and escr register bits, and the p4 event structure */
901#include "../lib/pfmlib_pentium4_priv.h"
902
903#define P4_REPLAY_REAL_MASK 0x00000003
904
908
909
911 /* 0 */ {.enb = 0,
912 /* dummy */
913 .mat_vert = 0,
914 },
915 /* 1 */ {.enb = 0,
916 /* dummy */
917 .mat_vert = 0,
918 },
919 /* 2 */ {.enb = 0x01000001,
920 /* 1stL_cache_load_miss_retired */
921 .mat_vert = 0x00000001,
922 },
923 /* 3 */ {.enb = 0x01000002,
924 /* 2ndL_cache_load_miss_retired */
925 .mat_vert = 0x00000001,
926 },
927 /* 4 */ {.enb = 0x01000004,
928 /* DTLB_load_miss_retired */
929 .mat_vert = 0x00000001,
930 },
931 /* 5 */ {.enb = 0x01000004,
932 /* DTLB_store_miss_retired */
933 .mat_vert = 0x00000002,
934 },
935 /* 6 */ {.enb = 0x01000004,
936 /* DTLB_all_miss_retired */
937 .mat_vert = 0x00000003,
938 },
939 /* 7 */ {.enb = 0x01018001,
940 /* Tagged_mispred_branch */
941 .mat_vert = 0x00000010,
942 },
943 /* 8 */ {.enb = 0x01000200,
944 /* MOB_load_replay_retired */
945 .mat_vert = 0x00000001,
946 },
947 /* 9 */ {.enb = 0x01000400,
948 /* split_load_retired */
949 .mat_vert = 0x00000001,
950 },
951 /* 10 */ {.enb = 0x01000400,
952 /* split_store_retired */
953 .mat_vert = 0x00000002,
954 },
955};
956
957/* this maps the arbitrary pmd index in libpfm/pentium4_events.h to the intel documentation */
958static int pfm2intel[] =
959 { 0, 1, 4, 5, 8, 9, 12, 13, 16, 2, 3, 6, 7, 10, 11, 14, 15, 17 };
960
961
962
963
964/* This call is broken. Selector can be much bigger than 32 bits. It should be a pfmlib_regmask_t - pjm */
965/* Also, libpfm assumes events can live on different counters with different codes. This call only returns
966 the first occurrence found. */
967/* Right now its only called by ntv_code_to_bits in perfctr-p3, so we're ok. But for it to be
968 generally useful it should be fixed. - dkt */
969static int
970_pfm_get_counter_info( unsigned int event, unsigned int *selector, int *code )
971{
972 pfmlib_regmask_t cnt, impl;
973 unsigned int num;
974 unsigned int i, first = 1;
975 int ret;
976
977 if ( ( ret = pfm_get_event_counters( event, &cnt ) ) != PFMLIB_SUCCESS ) {
978 PAPIERROR( "pfm_get_event_counters(%d,%p): %s", event, &cnt,
979 pfm_strerror( ret ) );
980 return PAPI_ESYS;
981 }
982 if ( ( ret = pfm_get_num_counters( &num ) ) != PFMLIB_SUCCESS ) {
983 PAPIERROR( "pfm_get_num_counters(%p): %s", num, pfm_strerror( ret ) );
984 return PAPI_ESYS;
985 }
986 if ( ( ret = pfm_get_impl_counters( &impl ) ) != PFMLIB_SUCCESS ) {
987 PAPIERROR( "pfm_get_impl_counters(%p): %s", &impl,
988 pfm_strerror( ret ) );
989 return PAPI_ESYS;
990 }
991
992 *selector = 0;
993 for ( i = 0; num; i++ ) {
994 if ( pfm_regmask_isset( &impl, i ) )
995 num--;
996 if ( pfm_regmask_isset( &cnt, i ) ) {
997 if ( first ) {
998 if ( ( ret =
1000 code ) ) !=
1001 PFMLIB_SUCCESS ) {
1002 PAPIERROR( "pfm_get_event_code_counter(%d, %d, %p): %s",
1003 event, i, code, pfm_strerror( ret ) );
1004 return PAPI_ESYS;
1005 }
1006 first = 0;
1007 }
1008 *selector |= 1 << i;
1009 }
1010 }
1011 return PAPI_OK;
1012}
1013
1014int
1016 hwd_register_t *newbits )
1017{
1018 unsigned int event, umask;
1019
1020 X86_register_t *bits = (X86_register_t *)newbits;
1021
1022 if ( is_pentium4() ) {
1023 pentium4_escr_value_t escr_value;
1024 pentium4_cccr_value_t cccr_value;
1025 unsigned int num_masks, replay_mask, unit_masks[12];
1026 unsigned int event_mask;
1027 unsigned int tag_value, tag_enable;
1028 unsigned int i;
1029 int j, escr, cccr, pmd;
1030
1031 if ( _pfm_decode_native_event( EventCode, &event, &umask ) != PAPI_OK )
1032 return PAPI_ENOEVNT;
1033
1034 /* for each allowed escr (1 or 2) find the allowed cccrs.
1035 for each allowed cccr find the pmd index
1036 convert to an intel counter number; or it into bits->counter */
1037 for ( i = 0; i < MAX_ESCRS_PER_EVENT; i++ ) {
1038 bits->counter[i] = 0;
1039 escr = pentium4_events[event].allowed_escrs[i];
1040 if ( escr < 0 ) {
1041 continue;
1042 }
1043
1044 bits->escr[i] = escr;
1045
1046 for ( j = 0; j < MAX_CCCRS_PER_ESCR; j++ ) {
1047 cccr = pentium4_escrs[escr].allowed_cccrs[j];
1048 if ( cccr < 0 ) {
1049 continue;
1050 }
1051
1052 pmd = pentium4_cccrs[cccr].pmd;
1053 bits->counter[i] |= ( 1 << pfm2intel[pmd] );
1054 }
1055 }
1056
1057 /* if there's only one valid escr, copy the values */
1058 if ( escr < 0 ) {
1059 bits->escr[1] = bits->escr[0];
1060 bits->counter[1] = bits->counter[0];
1061 }
1062
1063 /* Calculate the event-mask value. Invalid masks
1064 * specified by the caller are ignored. */
1065 tag_value = 0;
1066 tag_enable = 0;
1067 event_mask = _pfm_convert_umask( event, umask );
1068
1069 if ( event_mask & 0xF0000 ) {
1070 tag_enable = 1;
1071 tag_value = ( ( event_mask & 0xF0000 ) >> EVENT_MASK_BITS );
1072 }
1073
1074 event_mask &= 0x0FFFF; /* mask off possible tag bits */
1075
1076 /* Set up the ESCR and CCCR register values. */
1077 escr_value.val = 0;
1078 escr_value.bits.t1_usr = 0; /* controlled by kernel */
1079 escr_value.bits.t1_os = 0; /* controlled by kernel */
1080// escr_value.bits.t0_usr = (plm & PFM_PLM3) ? 1 : 0;
1081// escr_value.bits.t0_os = (plm & PFM_PLM0) ? 1 : 0;
1082 escr_value.bits.tag_enable = tag_enable;
1083 escr_value.bits.tag_value = tag_value;
1084 escr_value.bits.event_mask = event_mask;
1085 escr_value.bits.event_select = pentium4_events[event].event_select;
1086 escr_value.bits.reserved = 0;
1087
1088 /* initialize the proper bits in the cccr register */
1089 cccr_value.val = 0;
1090 cccr_value.bits.reserved1 = 0;
1091 cccr_value.bits.enable = 1;
1092 cccr_value.bits.escr_select = pentium4_events[event].escr_select;
1093 cccr_value.bits.active_thread = 3;
1094 /* FIXME: This is set to count when either logical
1095 * CPU is active. Need a way to distinguish
1096 * between logical CPUs when HT is enabled.
1097 * the docs say these bits should always
1098 * be set. */
1099 cccr_value.bits.compare = 0;
1100 /* FIXME: What do we do with "threshold" settings? */
1101 cccr_value.bits.complement = 0;
1102 /* FIXME: What do we do with "threshold" settings? */
1103 cccr_value.bits.threshold = 0;
1104 /* FIXME: What do we do with "threshold" settings? */
1105 cccr_value.bits.force_ovf = 0;
1106 /* FIXME: Do we want to allow "forcing" overflow
1107 * interrupts on all counter increments? */
1108 cccr_value.bits.ovf_pmi_t0 = 0;
1109 cccr_value.bits.ovf_pmi_t1 = 0;
1110 /* PMI taken care of by kernel typically */
1111 cccr_value.bits.reserved2 = 0;
1112 cccr_value.bits.cascade = 0;
1113 /* FIXME: How do we handle "cascading" counters? */
1114 cccr_value.bits.overflow = 0;
1115
1116 /* these flags are always zero, from what I can tell... */
1117 bits->pebs_enable = 0; /* flag for PEBS counting */
1118 bits->pebs_matrix_vert = 0;
1119 /* flag for PEBS_MATRIX_VERT, whatever that is */
1120
1121 /* ...unless the event is replay_event */
1122 if ( !strcmp( pentium4_events[event].name, "replay_event" ) ) {
1123 escr_value.bits.event_mask = event_mask & P4_REPLAY_REAL_MASK;
1124 num_masks = prepare_umask( umask, unit_masks );
1125 for ( i = 0; i < num_masks; i++ ) {
1126 replay_mask = unit_masks[i];
1127 if ( replay_mask > 1 && replay_mask < 11 ) {
1128 /* process each valid mask we find */
1129 bits->pebs_enable |= p4_replay_regs[replay_mask].enb;
1130 bits->pebs_matrix_vert |= p4_replay_regs[replay_mask].mat_vert;
1131 }
1132 }
1133 }
1134
1135 /* store the escr and cccr values */
1136 bits->event = escr_value.val;
1137 bits->cccr = cccr_value.val;
1138 bits->ireset = 0; /* I don't really know what this does */
1139 SUBDBG( "escr: 0x%lx; cccr: 0x%lx\n", escr_value.val, cccr_value.val );
1140 } else {
1141
1142 int ret, code;
1143
1144 if ( _pfm_decode_native_event( EventCode, &event, &umask ) != PAPI_OK )
1145 return PAPI_ENOEVNT;
1146
1147 if ( ( ret = _pfm_get_counter_info( event, &bits->selector,
1148 &code ) ) != PAPI_OK )
1149 return ret;
1150
1151 bits->counter_cmd=(int) (code | ((_pfm_convert_umask(event,umask))<< 8) );
1152
1153 SUBDBG( "selector: %#x\n", bits->selector );
1154 SUBDBG( "event: %#x; umask: %#x; code: %#x; cmd: %#x\n", event,
1155 umask, code, ( ( hwd_register_t * ) bits )->counter_cmd );
1156 }
1157
1158 return PAPI_OK;
1159}
1160
1161
1162
1164 .cmp_info = {
1165 /* default component information (unspecified values are initialized to 0) */
1166 .name = "perfctr",
1167 .description = "Linux perfctr CPU counters",
1168 .default_domain = PAPI_DOM_USER,
1169 .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL,
1170 .default_granularity = PAPI_GRN_THR,
1171 .available_granularities = PAPI_GRN_THR,
1172 .hardware_intr_sig = PAPI_INT_SIGNAL,
1173
1174 /* component specific cmp_info initializations */
1175 .fast_real_timer = 1,
1176 .fast_virtual_timer = 1,
1177 .attach = 1,
1178 .attach_must_ptrace = 1,
1179 .cntr_umasks = 1,
1180 }
1181 ,
1182
1183 /* sizes of framework-opaque component-private structures */
1184 .size = {
1185 .context = sizeof ( X86_perfctr_context_t ),
1186 .control_state = sizeof ( X86_perfctr_control_t ),
1187 .reg_value = sizeof ( X86_register_t ),
1188 .reg_alloc = sizeof ( X86_reg_alloc_t ),
1189 }
1190 ,
1191
1192 /* function pointers in this component */
1193 .init_control_state = _x86_init_control_state,
1194 .start = _x86_start,
1195 .stop = _x86_stop,
1196 .read = _x86_read,
1197 .allocate_registers = _x86_allocate_registers,
1198 .update_control_state = _x86_update_control_state,
1199 .set_domain = _x86_set_domain,
1200 .reset = _x86_reset,
1201 .set_overflow = _x86_set_overflow,
1202 .stop_profiling = _x86_stop_profiling,
1203
1204 .init_component = _perfctr_init_component,
1205 .ctl = _perfctr_ctl,
1206 .dispatch_timer = _perfctr_dispatch_timer,
1207 .init_thread = _perfctr_init_thread,
1208 .shutdown_thread = _perfctr_shutdown_thread,
1209
1210 /* from libpfm */
1211 .ntv_enum_events = _papi_libpfm_ntv_enum_events,
1212 .ntv_name_to_code = _papi_libpfm_ntv_name_to_code,
1213 .ntv_code_to_name = _papi_libpfm_ntv_code_to_name,
1214 .ntv_code_to_descr = _papi_libpfm_ntv_code_to_descr,
1215 .ntv_code_to_bits = _papi_libpfm_ntv_code_to_bits_perfctr,
1216
1217};
1218
1219
int i
static long count
int _papi_hwi_start_signal(int signal, int need_context, int cidx)
Definition: extras.c:403
int _papi_hwi_stop_signal(int signal)
Definition: extras.c:443
#define PAPI_DOM_USER
Definition: f90papi.h:174
#define PAPI_OK
Definition: f90papi.h:73
#define PAPI_ECNFLCT
Definition: f90papi.h:234
#define PAPI_DOM_KERNEL
Definition: f90papi.h:254
#define PAPI_ENOEVNT
Definition: f90papi.h:139
#define PAPI_PAUSED
Definition: f90papi.h:25
#define PAPI_VENDOR_INTEL
Definition: f90papi.h:275
#define PAPI_EINVAL
Definition: f90papi.h:115
#define PAPI_ESYS
Definition: f90papi.h:136
#define PAPI_GRN_THR
Definition: f90papi.h:265
static int threshold
static double a[MATRIX_SIZE][MATRIX_SIZE]
Definition: libmsr_basic.c:38
Return codes and api definitions.
static int _papi_bipartite_alloc(hwd_reg_alloc_t *event_list, int count, int cidx)
#define DEBUG_SUBSTRATE
Definition: papi_debug.h:27
#define OVFDBG(format, args...)
Definition: papi_debug.h:69
#define SUBDBG(format, args...)
Definition: papi_debug.h:64
#define ISLEVEL(a)
Definition: papi_debug.h:55
__sighandler_t signal(int __sig, __sighandler_t __handler) __attribute__((__nothrow__
bool state
Definition: papi_hl.c:155
void PAPIERROR(char *format,...)
#define PAPI_INT_SIGNAL
Definition: papi_internal.h:52
#define NEED_CONTEXT
Definition: papi_internal.h:97
int _papi_libpfm_ntv_code_to_name(unsigned int EventCode, char *ntv_name, int len)
int _papi_libpfm_ntv_enum_events(unsigned int *EventCode, int modifier)
int _papi_libpfm_ntv_name_to_code(const char *name, unsigned int *event_code)
int _papi_libpfm_ntv_code_to_descr(unsigned int EventCode, char *ntv_descr, int len)
unsigned int _pfm_convert_umask(unsigned int event, unsigned int umask)
static int native
static int _bpt_map_shared(hwd_reg_alloc_t *dst, hwd_reg_alloc_t *src)
Definition: perfctr-x86.c:305
papi_vector_t _perfctr_vector
Definition: perfctr-x86.c:1163
static void _bpt_map_preempt(hwd_reg_alloc_t *dst, hwd_reg_alloc_t *src)
Definition: perfctr-x86.c:343
papi_mdi_t _papi_hwi_system_info
Definition: papi_internal.c:56
void print_control(const struct perfctr_cpu_control *control)
Definition: perfctr-x86.c:96
static int _pfm_get_counter_info(unsigned int event, unsigned int *selector, int *code)
Definition: perfctr-x86.c:970
static int _x86_stop(hwd_context_t *ctx, hwd_control_state_t *state)
Definition: perfctr-x86.c:679
static int _bpt_map_exclusive(hwd_reg_alloc_t *dst)
Definition: perfctr-x86.c:295
#define P4_REPLAY_REAL_MASK
Definition: perfctr-x86.c:903
pentium4_escr_reg_t pentium4_escrs[]
int _perfctr_ctl(hwd_context_t *ctx, int code, _papi_int_option_t *option)
Definition: perfctr.c:295
int _perfctr_init_thread(hwd_context_t *ctx)
Definition: perfctr.c:386
static int is_pentium4(void)
Definition: perfctr-x86.c:75
pentium4_cccr_reg_t pentium4_cccrs[]
static void _bpt_map_update(hwd_reg_alloc_t *dst, hwd_reg_alloc_t *src)
Definition: perfctr-x86.c:406
int _perfctr_init_component(int)
Definition: perfctr.c:107
void _perfctr_dispatch_timer(int signal, hwd_siginfo_t *si, void *context)
static void _bpt_map_set(hwd_reg_alloc_t *dst, int ctr)
Definition: perfctr-x86.c:275
static int _x86_set_overflow(EventSetInfo_t *ESI, int EventIndex, int threshold)
Definition: perfctr-x86.c:805
static void clear_cs_events(hwd_control_state_t *this_state)
Definition: perfctr-x86.c:504
static void print_alloc(X86_reg_alloc_t *a)
Definition: perfctr-x86.c:87
static int pfm2intel[]
Definition: perfctr-x86.c:958
pentium4_event_t pentium4_events[]
static void swap_events(EventSetInfo_t *ESI, struct hwd_pmc_control *contr, int cntr1, int cntr2)
Definition: perfctr-x86.c:762
static int _x86_update_control_state(hwd_control_state_t *this_state, NativeInfo_t *native, int count, hwd_context_t *ctx)
Definition: perfctr-x86.c:550
static int _x86_read(hwd_context_t *ctx, hwd_control_state_t *spc, long long **dp, int flags)
Definition: perfctr-x86.c:701
int _x86_set_domain(hwd_control_state_t *cntrl, int domain)
Definition: perfctr-x86.c:210
static int _x86_allocate_registers(EventSetInfo_t *ESI)
Definition: perfctr-x86.c:418
static int _x86_start(hwd_context_t *ctx, hwd_control_state_t *state)
Definition: perfctr-x86.c:653
static int _x86_stop_profiling(ThreadInfo_t *master, EventSetInfo_t *ESI)
Definition: perfctr-x86.c:890
static int _bpt_map_avail(hwd_reg_alloc_t *dst, int ctr)
Definition: perfctr-x86.c:266
static int _x86_reset(hwd_context_t *ctx, hwd_control_state_t *cntrl)
Definition: perfctr-x86.c:750
int _papi_libpfm_ntv_code_to_bits_perfctr(unsigned int EventCode, hwd_register_t *newbits)
Definition: perfctr-x86.c:1015
static pentium4_replay_regs_t p4_replay_regs[]
Definition: perfctr-x86.c:910
static int _x86_init_control_state(hwd_control_state_t *ptr)
Definition: perfctr-x86.c:119
int _perfctr_shutdown_thread(hwd_context_t *ctx)
Definition: perfctr.c:434
#define ESCR_T0_USR
Definition: perfctr-x86.h:17
#define FAST_RDPMC
Definition: perfctr-x86.h:19
#define VCNTRL_ERROR
Definition: perfctr-x86.h:65
#define PERF_OS
Definition: perfctr-x86.h:56
#define PERF_ENABLE
Definition: perfctr-x86.h:52
#define RCNTRL_ERROR
Definition: perfctr-x86.h:66
#define PERF_USR
Definition: perfctr-x86.h:57
#define PERF_INT_ENABLE
Definition: perfctr-x86.h:53
#define hwd_pmc_control
Definition: perfctr-x86.h:11
#define CCCR_OVF_PMI_T0
Definition: perfctr-x86.h:18
#define MAX_COUNTERS
Definition: perfctr-x86.h:8
#define ALLCNTRS
Definition: perfctr-x86.h:35
#define ESCR_T0_OS
Definition: perfctr-x86.h:16
static int _pfm_decode_native_event(unsigned int EventCode, unsigned int *event, unsigned int *umask)
static int prepare_umask(unsigned int foo, unsigned int *values)
pfm_err_t pfm_get_event_counters(unsigned int idx, pfmlib_regmask_t *counters)
char * pfm_strerror(int code)
pfm_err_t pfm_get_event_code_counter(unsigned int idx, unsigned int cnt, int *code)
#define PFMLIB_SUCCESS
Definition: pfmlib.h:283
pfm_err_t pfm_get_impl_counters(pfmlib_regmask_t *impl_counters)
static int pfm_regmask_isset(pfmlib_regmask_t *h, unsigned int b)
Definition: pfmlib.h:313
pfm_err_t pfm_get_num_counters(unsigned int *num)
#define EVENT_MASK_BITS
#define MAX_CCCRS_PER_ESCR
#define MAX_ESCRS_PER_EVENT
if(file==NULL) goto out
const char * name
Definition: rocs.c:225
int
Definition: sde_internal.h:89
int pos[PAPI_EVENTS_IN_DERIVED_EVENT]
EventInfo_t * EventInfoArray
hwd_control_state_t * ctl_state
NativeInfo_t * NativeInfoArray
hwd_register_t * ni_bits
char name[PAPI_MAX_STR_LEN]
Definition: papi.h:627
int cpuid_family
Definition: papi.h:786
int model
Definition: papi.h:783
int vendor
Definition: papi.h:781
unsigned int selector
Definition: perfctr-x86.h:74
unsigned pebs_enable
Definition: perfctr-x86.h:81
unsigned ireset
Definition: perfctr-x86.h:83
unsigned counter[2]
Definition: perfctr-x86.h:77
unsigned event
Definition: perfctr-x86.h:80
unsigned pebs_matrix_vert
Definition: perfctr-x86.h:82
unsigned escr[2]
Definition: perfctr-x86.h:78
unsigned cccr
Definition: perfctr-x86.h:79
int event[MAX_COUNTERS]
Definition: solaris-ultra.h:47
PAPI_hw_info_t hw_info
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
int allowed_cccrs[MAX_CCCRS_PER_ESCR]
int allowed_escrs[MAX_ESCRS_PER_EVENT]
unsigned long escr_select
unsigned long complement
unsigned long ovf_pmi_t0
unsigned long ovf_pmi_t1
unsigned long active_thread
struct pentium4_cccr_value_t::@86 bits
unsigned long tag_enable
unsigned long reserved
unsigned long event_select
struct pentium4_escr_value_t::@85 bits
unsigned long tag_value
unsigned long event_mask
int retval
Definition: zero_fork.c:53