10#include <linux/unistd.h>
45#define evntsel_aux p4.escr
48#if defined(PAPI_PENTIUM4_VEC_MMX)
54#if defined(PAPI_PENTIUM4_FP_X87)
56#elif defined(PAPI_PENTIUM4_FP_X87_SSE_SP)
57#define P4_FPU " X87 SSE_SP"
58#elif defined(PAPI_PENTIUM4_FP_SSE_SP_DP)
59#define P4_FPU " SSE_SP SSE_DP"
61#define P4_FPU " X87 SSE_DP"
65#if defined(PAPI_OPTERON_FP_RETIRED)
66#define AMD_FPU "RETIRED"
67#elif defined(PAPI_OPTERON_FP_SSE_SP)
68#define AMD_FPU "SSE_SP"
69#elif defined(PAPI_OPTERON_FP_SSE_DP)
70#define AMD_FPU "SSE_DP"
72#define AMD_FPU "SPECULATIVE"
89 SUBDBG(
"X86_reg_alloc:\n" );
90 SUBDBG(
" selector: %#x\n",
a->ra_selector );
91 SUBDBG(
" rank: %#x\n",
a->ra_rank );
92 SUBDBG(
" escr: %#x %#x\n",
a->ra_escr[0],
a->ra_escr[1] );
99 SUBDBG(
"Control used:\n" );
100 SUBDBG(
"tsc_on\t\t\t%u\n", control->tsc_on );
101 SUBDBG(
"nractrs\t\t\t%u\n", control->nractrs );
102 SUBDBG(
"nrictrs\t\t\t%u\n", control->nrictrs );
104 for (
i = 0;
i < ( control->nractrs + control->nrictrs ); ++
i ) {
105 if ( control->pmc_map[
i] >= 18 ) {
106 SUBDBG(
"pmc_map[%u]\t\t0x%08X\n",
i, control->pmc_map[
i] );
108 SUBDBG(
"pmc_map[%u]\t\t%u\n",
i, control->pmc_map[
i] );
110 SUBDBG(
"evntsel[%u]\t\t0x%08X\n",
i, control->evntsel[
i] );
111 if ( control->ireset[
i] ) {
112 SUBDBG(
"ireset[%u]\t%d\n",
i, control->ireset[
i] );
130 ptr->control.cpu_control.evntsel_aux[
i] |= def_mode;
132 ptr->control.cpu_control.tsc_on = 1;
133 ptr->control.cpu_control.nractrs = 0;
134 ptr->control.cpu_control.nrictrs = 0;
136#ifdef VPERFCTR_CONTROL_CLOEXEC
137 ptr->control.flags = VPERFCTR_CONTROL_CLOEXEC;
138 SUBDBG(
"close on exec\t\t\t%u\n", ptr->control.flags );
147 ptr->allocated_registers.selector = 0;
149 case PERFCTR_X86_GENERIC:
150 case PERFCTR_X86_WINCHIP_C6:
151 case PERFCTR_X86_WINCHIP_2:
152 case PERFCTR_X86_VIA_C3:
153 case PERFCTR_X86_INTEL_P5:
154 case PERFCTR_X86_INTEL_P5MMX:
155 case PERFCTR_X86_INTEL_PII:
156 case PERFCTR_X86_INTEL_P6:
157 case PERFCTR_X86_INTEL_PIII:
158#ifdef PERFCTR_X86_INTEL_CORE
159 case PERFCTR_X86_INTEL_CORE:
161#ifdef PERFCTR_X86_INTEL_PENTM
162 case PERFCTR_X86_INTEL_PENTM:
164 ptr->control.cpu_control.evntsel[0] |=
PERF_ENABLE;
166 ptr->control.cpu_control.evntsel[
i] |= def_mode;
167 ptr->control.cpu_control.pmc_map[
i] = (
unsigned int )
i;
170#ifdef PERFCTR_X86_INTEL_CORE2
171 case PERFCTR_X86_INTEL_CORE2:
173#ifdef PERFCTR_X86_INTEL_ATOM
174 case PERFCTR_X86_INTEL_ATOM:
176#ifdef PERFCTR_X86_INTEL_NHLM
177 case PERFCTR_X86_INTEL_NHLM:
179#ifdef PERFCTR_X86_INTEL_WSTMR
180 case PERFCTR_X86_INTEL_WSTMR:
182#ifdef PERFCTR_X86_AMD_K8
183 case PERFCTR_X86_AMD_K8:
185#ifdef PERFCTR_X86_AMD_K8C
186 case PERFCTR_X86_AMD_K8C:
188#ifdef PERFCTR_X86_AMD_FAM10H
189 case PERFCTR_X86_AMD_FAM10H:
191 case PERFCTR_X86_AMD_K7:
193 ptr->control.cpu_control.evntsel[
i] |=
PERF_ENABLE | def_mode;
194 ptr->control.cpu_control.pmc_map[
i] = (
unsigned int )
i;
198#ifdef VPERFCTR_CONTROL_CLOEXEC
199 ptr->control.flags = VPERFCTR_CONTROL_CLOEXEC;
200 SUBDBG(
"close on exec\t\t\t%u\n", ptr->control.flags );
204 ptr->control.cpu_control.tsc_on = 1;
219 cntrl->control.cpu_control.evntsel_aux[
i] &=
226 cntrl->control.cpu_control.evntsel_aux[
i] |=
ESCR_T0_USR;
233 cntrl->control.cpu_control.evntsel_aux[
i] |=
ESCR_T0_OS;
237 for (
i = 0;
i < num_cntrs;
i++ ) {
243 for (
i = 0;
i < num_cntrs;
i++ ) {
244 cntrl->control.cpu_control.evntsel[
i] |=
PERF_USR;
250 for (
i = 0;
i < num_cntrs;
i++ ) {
251 cntrl->control.cpu_control.evntsel[
i] |=
PERF_OS;
268 return (
int ) ( dst->ra_selector & ( 1 << ctr ) );
277 dst->ra_selector = (
unsigned int ) ( 1 << ctr );
284 if ( dst->ra_bits.counter[0] & dst->ra_selector )
285 dst->ra_escr[1] = -1;
287 dst->ra_escr[0] = -1;
297 return ( dst->ra_rank == 1 );
308 int retval1, retval2;
311 retval1 = ( ( dst->ra_selector & src->ra_selector ) ||
313 ( ( dst->ra_escr[0] == src->ra_escr[0] ) &&
314 ( (
int ) dst->ra_escr[0] != -1 ) ) ||
315 ( ( dst->ra_escr[1] == src->ra_escr[1] ) &&
316 ( (
int ) dst->ra_escr[1] != -1 ) ) );
320 ( ( ( dst->ra_bits.pebs_enable && src->ra_bits.pebs_enable ) &&
322 ( dst->ra_bits.pebs_enable != src->ra_bits.pebs_enable ) ) ||
324 ( ( dst->ra_bits.pebs_matrix_vert &&
325 src->ra_bits.pebs_matrix_vert ) &&
326 ( dst->ra_bits.pebs_matrix_vert !=
327 src->ra_bits.pebs_matrix_vert ) ) );
329 SUBDBG(
"pebs conflict!\n" );
331 return ( retval1 | retval2 );
334 return (
int ) ( dst->ra_selector & src->ra_selector );
357 i = ( ( ( dst->ra_bits.pebs_enable && src->ra_bits.pebs_enable ) &&
359 ( dst->ra_bits.pebs_enable != src->ra_bits.pebs_enable ) ) ||
361 ( ( dst->ra_bits.pebs_matrix_vert &&
362 src->ra_bits.pebs_matrix_vert )
363 && ( dst->ra_bits.pebs_matrix_vert !=
364 src->ra_bits.pebs_matrix_vert ) ) );
366 SUBDBG(
"pebs conflict! clearing selector\n" );
367 dst->ra_selector = 0;
371 if ( ( dst->ra_escr[0] == src->ra_escr[0] ) &&
372 ( (
int ) dst->ra_escr[0] != -1 ) ) {
373 dst->ra_selector &= ~dst->ra_bits.counter[0];
374 dst->ra_escr[0] = -1;
376 if ( ( dst->ra_escr[1] == src->ra_escr[1] ) &&
377 ( (
int ) dst->ra_escr[1] != -1 ) ) {
378 dst->ra_selector &= ~dst->ra_bits.counter[1];
379 dst->ra_escr[1] = -1;
383 shared = ( dst->ra_selector & src->ra_selector );
385 dst->ra_selector ^= shared;
389 if ( dst->ra_selector & ( 1 <<
i ) )
396 shared = dst->ra_selector & src->ra_selector;
398 dst->ra_selector ^= shared;
400 if ( dst->ra_selector & ( 1 <<
i ) )
408 dst->ra_selector = src->ra_selector;
411 dst->ra_escr[0] = src->ra_escr[0];
412 dst->ra_escr[1] = src->ra_escr[1];
429 SUBDBG(
"native event count: %d\n", natNum );
432 for (
i = 0;
i < natNum;
i++ ) {
435 ni_event, &event_list[
i].ra_bits );
439 event_list[
i].ra_selector =
440 event_list[
i].ra_bits.counter[0] | event_list[
i].ra_bits.
444 event_list[
i].ra_selector =
445 event_list[
i].ra_bits.selector &
ALLCNTRS;
446#ifdef PERFCTR_X86_INTEL_CORE2
448 PERFCTR_X86_INTEL_CORE2 )
449 event_list[
i].ra_selector |=
450 ( ( event_list[
i].ra_bits.
455 event_list[
i].ra_rank = 0;
457 if ( event_list[
i].ra_selector & ( 1 << j ) ) {
458 event_list[
i].ra_rank++;
463 event_list[
i].ra_escr[0] = event_list[
i].ra_bits.escr[0];
464 event_list[
i].ra_escr[1] = event_list[
i].ra_bits.escr[1];
472 for (
i = 0;
i < natNum;
i++ ) {
473#ifdef PERFCTR_X86_INTEL_CORE2
475 PERFCTR_X86_INTEL_CORE2 )
476 event_list[
i].ra_bits.selector = event_list[
i].ra_selector;
486 *ptr = event_list[
i].ra_bits;
491 ptr->counter[0] = ffs( event_list[
i].ra_selector ) - 1;
509 j = this_state->control.cpu_control.nractrs +
510 this_state->control.cpu_control.nrictrs;
513 for (
i = 0;
i < j;
i++ ) {
514 SUBDBG(
"Clearing pmc event entry %d\n",
i );
516 this_state->control.cpu_control.pmc_map[
i] = 0;
517 this_state->control.cpu_control.evntsel[
i] = 0;
518 this_state->control.cpu_control.evntsel_aux[
i] =
519 this_state->control.cpu_control.
522 this_state->control.cpu_control.pmc_map[
i] =
i;
523 this_state->control.cpu_control.evntsel[
i]
524 = this_state->control.cpu_control.
527 this_state->control.cpu_control.ireset[
i] = 0;
532 this_state->control.cpu_control.p4.pebs_enable = 0;
533 this_state->control.cpu_control.p4.pebs_matrix_vert = 0;
537 this_state->control.cpu_control.nractrs = 0;
538 this_state->control.cpu_control.nrictrs = 0;
557 struct perfctr_cpu_control *cpu_control = &this_state->control.cpu_control;
569 cpu_control->pmc_map[
i] = bits->counter[0];
570 cpu_control->evntsel[
i] = bits->cccr;
571 cpu_control->ireset[
i] = bits->ireset;
573 cpu_control->evntsel_aux[
i] |= bits->
event;
581 if ( bits->pebs_enable ) {
583 if ( cpu_control->p4.pebs_enable == 0 ) {
584 cpu_control->p4.pebs_enable = bits->pebs_enable;
586 }
else if ( cpu_control->p4.pebs_enable != bits->pebs_enable ) {
588 (
"WARNING: P4_update_control_state -- pebs_enable conflict!" );
593 if ( bits->pebs_matrix_vert ) {
595 if ( cpu_control->p4.pebs_matrix_vert == 0 ) {
596 cpu_control->p4.pebs_matrix_vert = bits->pebs_matrix_vert;
598 }
else if ( cpu_control->p4.pebs_matrix_vert !=
599 bits->pebs_matrix_vert ) {
601 (
"WARNING: P4_update_control_state -- pebs_matrix_vert conflict!" );
607 this_state->control.cpu_control.nractrs =
count;
610 this_state->control.cpu_control.tsc_on = 1;
617#ifdef PERFCTR_X86_INTEL_CORE2
618 case PERFCTR_X86_INTEL_CORE2:
623 if ( bits2->selector & ( 1 << k ) ) {
627 this_state->control.cpu_control.pmc_map[
i] =
628 ( k - 2 ) | 0x40000000;
630 this_state->control.cpu_control.pmc_map[
i] = k;
633 this_state->control.cpu_control.evntsel[
i] |=
643 this_state->control.cpu_control.evntsel[
i] |=
647 this_state->control.cpu_control.nractrs = (
unsigned int )
count;
660 if (
state->rvperfctr != NULL ) {
662 rvperfctr_control(
state->rvperfctr, &
state->control ) ) < 0 ) {
663 SUBDBG(
"rvperfctr_control returns: %d\n", error );
670 if ( ( error = vperfctr_control( ctx->perfctr, &
state->control ) ) < 0 ) {
671 SUBDBG(
"vperfctr_control returns: %d\n", error );
683 if (
state->rvperfctr != NULL ) {
684 if ( rvperfctr_stop( (
struct rvperfctr * ) ctx->perfctr ) < 0 ) {
691 error = vperfctr_stop( ctx->perfctr );
693 SUBDBG(
"vperfctr_stop returns: %d\n", error );
705 vperfctr_read_state( ctx->perfctr, &spc->state, NULL );
710 spc->control.cpu_control.nractrs +
711 spc->control.cpu_control.nrictrs;
i++ ) {
712 SUBDBG(
"vperfctr_read_state: counter %d = %lld\n",
i,
717 SUBDBG(
"vperfctr_read_ctrs\n" );
718 if ( spc->rvperfctr != NULL ) {
719 rvperfctr_read_ctrs( spc->rvperfctr, &spc->state );
721 vperfctr_read_ctrs( ctx->perfctr, &spc->state );
724 *dp = (
long long * ) spc->state.pmc;
730 for (
i = 0;
i < spc->control.cpu_control.nractrs;
i++ ) {
731 SUBDBG(
"raw val hardware index %d is %lld\n",
i,
732 (
long long ) spc->state.pmc[
i] );
737 spc->control.cpu_control.nractrs +
738 spc->control.cpu_control.nrictrs;
i++ ) {
739 SUBDBG(
"raw val hardware index %d is %lld\n",
i,
740 (
long long ) spc->state.pmc[
i] );
784 ui = contr->cpu_control.pmc_map[cntr1];
785 contr->cpu_control.pmc_map[cntr1] = contr->cpu_control.pmc_map[cntr2];
786 contr->cpu_control.pmc_map[cntr2] = ui;
788 ui = contr->cpu_control.evntsel[cntr1];
789 contr->cpu_control.evntsel[cntr1] = contr->cpu_control.evntsel[cntr2];
790 contr->cpu_control.evntsel[cntr2] = ui;
793 ui = contr->cpu_control.evntsel_aux[cntr1];
794 contr->cpu_control.evntsel_aux[cntr1] =
795 contr->cpu_control.evntsel_aux[cntr2];
796 contr->cpu_control.evntsel_aux[cntr2] = ui;
799 si = contr->cpu_control.ireset[cntr1];
800 contr->cpu_control.ireset[cntr1] = contr->cpu_control.ireset[cntr2];
801 contr->cpu_control.ireset[cntr2] = si;
809 int i, ncntrs, nricntrs = 0, nracntrs = 0,
retval = 0;
810 OVFDBG(
"EventIndex=%d\n", EventIndex );
822 PAPIERROR(
"Selector id %d is larger than ncntrs %d",
i, ncntrs );
835 contr->cpu_control.ireset[
i] = ( -
threshold + 1 );
842 contr->cpu_control.nrictrs++;
843 contr->cpu_control.nractrs--;
844 nricntrs = (
int ) contr->cpu_control.nrictrs;
845 nracntrs = (
int ) contr->cpu_control.nractrs;
851 OVFDBG(
"Modified event set\n" );
854 contr->cpu_control.ireset[
i] = 0;
855 contr->cpu_control.evntsel[
i] &= ( ~CCCR_OVF_PMI_T0 );
856 contr->cpu_control.nrictrs--;
857 contr->cpu_control.nractrs++;
860 contr->cpu_control.ireset[
i] = 0;
861 contr->cpu_control.evntsel[
i] &= ( ~PERF_INT_ENABLE );
862 contr->cpu_control.nrictrs--;
863 contr->cpu_control.nractrs++;
866 nricntrs = (
int ) contr->cpu_control.nrictrs;
867 nracntrs = (
int ) contr->cpu_control.nractrs;
876 OVFDBG(
"Modified event set\n" );
901#include "../lib/pfmlib_pentium4_priv.h"
903#define P4_REPLAY_REAL_MASK 0x00000003
921 .mat_vert = 0x00000001,
925 .mat_vert = 0x00000001,
929 .mat_vert = 0x00000001,
933 .mat_vert = 0x00000002,
937 .mat_vert = 0x00000003,
941 .mat_vert = 0x00000010,
945 .mat_vert = 0x00000001,
949 .mat_vert = 0x00000001,
953 .mat_vert = 0x00000002,
959 { 0, 1, 4, 5, 8, 9, 12, 13, 16, 2, 3, 6, 7, 10, 11, 14, 15, 17 };
974 unsigned int i, first = 1;
978 PAPIERROR(
"pfm_get_event_counters(%d,%p): %s", event, &cnt,
987 PAPIERROR(
"pfm_get_impl_counters(%p): %s", &impl,
993 for (
i = 0; num;
i++ ) {
1002 PAPIERROR(
"pfm_get_event_code_counter(%d, %d, %p): %s",
1008 *selector |= 1 <<
i;
1018 unsigned int event, umask;
1025 unsigned int num_masks, replay_mask, unit_masks[12];
1026 unsigned int event_mask;
1027 unsigned int tag_value, tag_enable;
1029 int j, escr, cccr, pmd;
1044 bits->
escr[
i] = escr;
1069 if ( event_mask & 0xF0000 ) {
1074 event_mask &= 0x0FFFF;
1125 for (
i = 0;
i < num_masks;
i++ ) {
1126 replay_mask = unit_masks[
i];
1127 if ( replay_mask > 1 && replay_mask < 11 ) {
1139 SUBDBG(
"escr: 0x%lx; cccr: 0x%lx\n", escr_value.
val, cccr_value.
val );
1154 SUBDBG(
"event: %#x; umask: %#x; code: %#x; cmd: %#x\n", event,
1167 .description =
"Linux perfctr CPU counters",
1175 .fast_real_timer = 1,
1176 .fast_virtual_timer = 1,
1178 .attach_must_ptrace = 1,
#define PAPI_VENDOR_INTEL
static double a[MATRIX_SIZE][MATRIX_SIZE]
Return codes and api definitions.
static int _papi_bipartite_alloc(hwd_reg_alloc_t *event_list, int count, int cidx)
#define OVFDBG(format, args...)
#define SUBDBG(format, args...)
__sighandler_t signal(int __sig, __sighandler_t __handler) __attribute__((__nothrow__
void PAPIERROR(char *format,...)
int _papi_libpfm_ntv_code_to_name(unsigned int EventCode, char *ntv_name, int len)
int _papi_libpfm_ntv_enum_events(unsigned int *EventCode, int modifier)
int _papi_libpfm_ntv_name_to_code(const char *name, unsigned int *event_code)
int _papi_libpfm_ntv_code_to_descr(unsigned int EventCode, char *ntv_descr, int len)
unsigned int _pfm_convert_umask(unsigned int event, unsigned int umask)
static int _bpt_map_shared(hwd_reg_alloc_t *dst, hwd_reg_alloc_t *src)
papi_vector_t _perfctr_vector
static void _bpt_map_preempt(hwd_reg_alloc_t *dst, hwd_reg_alloc_t *src)
papi_mdi_t _papi_hwi_system_info
void print_control(const struct perfctr_cpu_control *control)
static int _pfm_get_counter_info(unsigned int event, unsigned int *selector, int *code)
static int _x86_stop(hwd_context_t *ctx, hwd_control_state_t *state)
static int _bpt_map_exclusive(hwd_reg_alloc_t *dst)
#define P4_REPLAY_REAL_MASK
pentium4_escr_reg_t pentium4_escrs[]
int _perfctr_ctl(hwd_context_t *ctx, int code, _papi_int_option_t *option)
int _perfctr_init_thread(hwd_context_t *ctx)
static int is_pentium4(void)
pentium4_cccr_reg_t pentium4_cccrs[]
static void _bpt_map_update(hwd_reg_alloc_t *dst, hwd_reg_alloc_t *src)
int _perfctr_init_component(int)
void _perfctr_dispatch_timer(int signal, hwd_siginfo_t *si, void *context)
static void _bpt_map_set(hwd_reg_alloc_t *dst, int ctr)
static int _x86_set_overflow(EventSetInfo_t *ESI, int EventIndex, int threshold)
static void clear_cs_events(hwd_control_state_t *this_state)
static void print_alloc(X86_reg_alloc_t *a)
pentium4_event_t pentium4_events[]
static void swap_events(EventSetInfo_t *ESI, struct hwd_pmc_control *contr, int cntr1, int cntr2)
static int _x86_update_control_state(hwd_control_state_t *this_state, NativeInfo_t *native, int count, hwd_context_t *ctx)
static int _x86_read(hwd_context_t *ctx, hwd_control_state_t *spc, long long **dp, int flags)
int _x86_set_domain(hwd_control_state_t *cntrl, int domain)
static int _x86_allocate_registers(EventSetInfo_t *ESI)
static int _x86_start(hwd_context_t *ctx, hwd_control_state_t *state)
static int _x86_stop_profiling(ThreadInfo_t *master, EventSetInfo_t *ESI)
static int _bpt_map_avail(hwd_reg_alloc_t *dst, int ctr)
static int _x86_reset(hwd_context_t *ctx, hwd_control_state_t *cntrl)
int _papi_libpfm_ntv_code_to_bits_perfctr(unsigned int EventCode, hwd_register_t *newbits)
static pentium4_replay_regs_t p4_replay_regs[]
static int _x86_init_control_state(hwd_control_state_t *ptr)
int _perfctr_shutdown_thread(hwd_context_t *ctx)
static int _pfm_decode_native_event(unsigned int EventCode, unsigned int *event, unsigned int *umask)
static int prepare_umask(unsigned int foo, unsigned int *values)
pfm_err_t pfm_get_event_counters(unsigned int idx, pfmlib_regmask_t *counters)
char * pfm_strerror(int code)
pfm_err_t pfm_get_event_code_counter(unsigned int idx, unsigned int cnt, int *code)
pfm_err_t pfm_get_impl_counters(pfmlib_regmask_t *impl_counters)
static int pfm_regmask_isset(pfmlib_regmask_t *h, unsigned int b)
pfm_err_t pfm_get_num_counters(unsigned int *num)
#define MAX_CCCRS_PER_ESCR
#define MAX_ESCRS_PER_EVENT
int pos[PAPI_EVENTS_IN_DERIVED_EVENT]
EventInfo_t * EventInfoArray
hwd_control_state_t * ctl_state
NativeInfo_t * NativeInfoArray
char name[PAPI_MAX_STR_LEN]
unsigned pebs_matrix_vert
PAPI_component_info_t cmp_info
int allowed_cccrs[MAX_CCCRS_PER_ESCR]
int allowed_escrs[MAX_ESCRS_PER_EVENT]
unsigned int event_select
unsigned long escr_select
unsigned long active_thread
struct pentium4_cccr_value_t::@86 bits
unsigned long event_select
struct pentium4_escr_value_t::@85 bits