48#define usel_event unc_perfevtsel.usel_event
49#define usel_umask unc_perfevtsel.usel_umask
50#define usel_occ unc_perfevtsel.usel_occ
51#define usel_edge unc_perfevtsel.usel_edge
52#define usel_int unc_perfevtsel.usel_int
53#define usel_en unc_perfevtsel.usel_en
54#define usel_inv unc_perfevtsel.usel_inv
55#define usel_cnt_mask unc_perfevtsel.usel_cnt_mask
57#define sel_event perfevtsel.sel_event
58#define sel_umask perfevtsel.sel_umask
59#define sel_usr perfevtsel.sel_usr
60#define sel_os perfevtsel.sel_os
61#define sel_edge perfevtsel.sel_edge
62#define sel_pc perfevtsel.sel_pc
63#define sel_int perfevtsel.sel_int
64#define sel_en perfevtsel.sel_en
65#define sel_inv perfevtsel.sel_inv
66#define sel_anythr perfevtsel.sel_anythr
67#define sel_cnt_mask perfevtsel.sel_cnt_mask
116#define NHM_SEL_BASE 0x186
117#define NHM_CTR_BASE 0xc1
118#define NHM_FIXED_CTR_BASE 0x309
120#define UNC_NHM_SEL_BASE 0x3c0
121#define UNC_NHM_CTR_BASE 0x3b0
122#define UNC_NHM_FIXED_CTR_BASE 0x394
124#define MAX_COUNTERS 28
126#define PFMLIB_NHM_ALL_FLAGS \
127 (PFM_NHM_SEL_INV|PFM_NHM_SEL_EDGE|PFM_NHM_SEL_ANYTHR)
129#define NHM_NUM_GEN_COUNTERS 4
130#define NHM_NUM_FIXED_COUNTERS 3
143static inline void cpuid(
unsigned int op,
unsigned int *eax,
unsigned int *ebx,
144 unsigned int *ecx,
unsigned int *edx)
152 __asm__(
"pushl %%ebx;cpuid; popl %%ebx"
157 __asm__(
"pushl %%ebx;cpuid; movl %%ebx, %%eax;popl %%ebx"
163static inline void cpuid(
unsigned int op,
unsigned int *eax,
unsigned int *ebx,
164 unsigned int *ecx,
unsigned int *edx)
198 DPRINT(
"cannot find umask %d for event %s\n", midx, ne->
pme_name);
213 if (strcmp(buffer,
"GenuineIntel"))
237#define INTEL_ARCH_MISP_BR_RETIRED (1 << 6)
238 unsigned int eax, ebx, ecx, edx;
257 cpuid(0xa, &eax, &ebx, &ecx, &edx);
358 for(
i=31;
i < 64;
i++)
428 unsigned int fl, flc,
i;
429 unsigned int mask = 0;
465 return flc > 0 && flc == e->
num_masks ? 1 : 0;
475 unsigned int ref, ucode;
504#define HAS_OPTIONS(x) (cntrs && (cntrs[x].flags || cntrs[x].cnt_mask))
505#define is_fixed_pmc(a) (a == 16 || a == 17 || a == 18)
506#define is_uncore(a) (a > 19)
514 uint64_t val, unc_global_ctrl;
515 uint64_t pebs_mask, ld_mask;
516 unsigned long long fixed_ctr;
518 unsigned int npc, npmc0, npmc01, nf2, nuf;
519 unsigned int i, n, k, j, umask, use_pebs = 0;
521 unsigned int next_gen, last_gen, u_flags;
522 unsigned int next_unc_gen, last_unc_gen, lat;
523 unsigned int offcore_rsp0_value = 0;
524 unsigned int offcore_rsp1_value = 0;
526 npc = npmc01 = npmc0 = nf2 = nuf = 0;
535 pebs_mask = ld_mask = 0;
545 for(
i=0;
i < n;
i++) {
556 DPRINT(
"MISPREDICTED_BRANCH_RETIRED broken on this Nehalem processor, see eeratum AAJ80\n");
568 DPRINT(
"events does not support unit mask combination\n");
576 DPRINT(
"two events compete for a UNCORE_FIXED_CTR0\n");
580 DPRINT(
"uncore fixed counter does not support options\n");
586 DPRINT(
"two events compete for a PMC0\n");
596 DPRINT(
"two events compete for a PMC0\n");
606 DPRINT(
"two events compete for FIXED_CTR2\n");
610 DPRINT(
"UNHALTED_REFERENCE_CYCLES only accepts anythr filter\n");
625 if (offcore_rsp0_value && offcore_rsp0_value != umask) {
626 DPRINT(
"all OFFCORE_RSP0 events must have the same unit mask\n");
630 DPRINT(
"OFFCORE_RSP0 register not available\n");
633 if (!((umask & 0xff) && (umask & 0xff00))) {
634 DPRINT(
"OFFCORE_RSP0 must have at least one request and response unit mask set\n");
638 offcore_rsp0_value = umask;
641 if (offcore_rsp1_value && offcore_rsp1_value != umask) {
642 DPRINT(
"all OFFCORE_RSP1 events must have the same unit mask\n");
646 DPRINT(
"OFFCORE_RSP1 register not available\n");
649 if (!((umask & 0xff) && (umask & 0xff00))) {
650 DPRINT(
"OFFCORE_RSP1 must have at least one request and response unit mask set\n");
654 offcore_rsp1_value = umask;
665 DPRINT(
"uncore events must have PLM0|PLM3\n");
685 for(
i=0;
i < n;
i++) {
711 for(
i=0;
i < n;
i++) {
718 assign_pc[
i] = next_gen++;
735 for(
i=0;
i < n;
i++) {
766 for(
i=0;
i < n;
i++) {
769 for(; next_unc_gen <= last_unc_gen; next_unc_gen++) {
773 if (next_unc_gen <= last_unc_gen)
774 assign_pc[
i] = next_unc_gen++;
776 DPRINT(
"cannot assign generic uncore event\n");
785 for(
i=0;
i < n;
i++) {
786 if (assign_pc[
i] == -1) {
787 for(; next_gen <= last_gen; next_gen++) {
792 if (next_gen <= last_gen) {
793 assign_pc[
i] = next_gen++;
795 DPRINT(
"cannot assign generic event\n");
805 for (
i=0;
i < n ;
i++ ) {
819 reg.
val |= val << ((assign_pc[
i]-16)<<2);
828 __pfm_vbprintf(
"[FIXED_CTRL(pmc%u)=0x%"PRIx64
" pmi0=1 en0=0x%"PRIx64
" any0=%d pmi1=1 en1=0x%"PRIx64
" any1=%d pmi2=1 en2=0x%"PRIx64
" any2=%d] ",
832 !!(reg.
val & 0x4ULL),
833 (reg.
val>>4) & 0x3ULL,
834 !!((reg.
val>>4) & 0x4ULL),
835 (reg.
val>>8) & 0x3ULL,
836 !!((reg.
val>>8) & 0x4ULL));
838 if ((fixed_ctr & 0x1) == 0)
840 if ((fixed_ctr & 0x2) == 0)
842 if ((fixed_ctr & 0x4) == 0)
848 if ((fixed_ctr & 0x1) == 0)
850 if ((fixed_ctr & 0x2) == 0)
852 if ((fixed_ctr & 0x4) == 0)
859 for (
i=0;
i < n ;
i++ ) {
874 umask = (val >> 8) & 0xff;
914 if (cntrs[
i].cnt_mask > 255)
929 pebs_mask |= 1ULL << assign_pc[
i];
936 if (reg.
sel_event == 0xb && (umask & 0x10))
937 ld_mask |= 1ULL << assign_pc[
i];
944 __pfm_vbprintf(
"[PERFEVTSEL%u(pmc%u)=0x%"PRIx64
" event_sel=0x%x umask=0x%x os=%d usr=%d anythr=%d en=%d int=%d inv=%d edge=%d cnt_mask=%d] %s\n",
974 __pfm_vbprintf(
"[UNC_FIXED_CTRL(pmc20)=0x%"PRIx64
" pmi=1 ena=1] UNC_CLK_UNHALTED\n", pc[npc].reg_value);
976 unc_global_ctrl |= 1ULL<< 32;
982 for (
i=0;
i < n ;
i++ ) {
985 if (!
is_uncore(assign_pc[
i]) || assign_pc[
i] == 20)
995 umask = (val >> 8) & 0xff;
1028 if (cntrs[
i].cnt_mask > 255)
1043 unc_global_ctrl |= 1ULL<< (assign_pc[
i] - 21);
1049 __pfm_vbprintf(
"[UNC_PERFEVTSEL%u(pmc%u)=0x%"PRIx64
" event=0x%x umask=0x%x en=%d int=%d inv=%d edge=%d occ=%d cnt_msk=%d] %s\n",
1050 pc[npc].reg_num - 21,
1064 pc[npc].reg_num - 21,
1072 for (
i=0;
i < n ;
i++) {
1073 switch (assign_pc[
i]) {
1103 if (use_pebs && pebs_mask) {
1113 pc[npc].
reg_value = pebs_mask | (ld_mask <<32);
1117 __pfm_vbprintf(
"[PEBS_ENABLE(pmc%u)=0x%"PRIx64
" ena0=%d ena1=%d ena2=%d ena3=%d ll0=%d ll1=%d ll2=%d ll3=%d]\n",
1120 pc[npc].reg_value & 0x1,
1121 (pc[npc].reg_value >> 1) & 0x1,
1122 (pc[npc].reg_value >> 2) & 0x1,
1123 (pc[npc].reg_value >> 3) & 0x1,
1124 (pc[npc].reg_value >> 32) & 0x1,
1125 (pc[npc].reg_value >> 33) & 0x1,
1126 (pc[npc].reg_value >> 34) & 0x1,
1127 (pc[npc].reg_value >> 35) & 0x1);
1132 if (lat < 3 || lat > 0xffff) {
1133 DPRINT(
"invalid load latency threshold %u (must be in [3:0xffff])\n", lat);
1155 if (offcore_rsp0_value) {
1168 if (offcore_rsp1_value) {
1187 static int lbr_plm_map[4]={
1194 unsigned int filter,
i,
c;
1216 if (lbr_plm_map[
i] & 0x1)
1219 if (lbr_plm_map[
i] & 0x2)
1245 __pfm_vbprintf(
"[LBR_SELECT(PMC30)=0x%"PRIx64
" eq0=%d neq0=%d jcc=%d rel=%d ind=%d ret=%d ind_jmp=%d rel_jmp=%d far=%d ]\n",
1278 for(
i=0;
i < 32;
i++,
c++) {
1331 memset(counters, 0,
sizeof(*counters));
1425#define PMU_NHM_COUNTER_WIDTH 32
1537 unsigned int i, n=0;
1581 "unknown L3 cache miss",
1582 "minimal latency core cache hit. Request was satisfied by L1 data cache",
1583 "pending core cache HIT. Outstanding core cache miss to same cacheline address already underway",
1584 "data request satisfied by the L2",
1585 "L3 HIT. Local or remote home request that hit L3 in the uncore with no coherency actions required (snooping)",
1586 "L3 HIT. Local or remote home request that hit L3 and was serviced by another core with a cross core snoop where no modified copy was found (clean)",
1587 "L3 HIT. Local or remote home request that hit L3 and was serviced by another core with a cross core snoop where modified copies were found (HITM)",
1589 "L3 MISS. Local homed request that missed L3 and was serviced by forwarded data following a cross package snoop where no modified copy was found (remote home requests are not counted)",
1591 "L3 MISS. Local homed request that missed L3 and was serviced by local DRAM (go to shared state)",
1592 "L3 MISS. Remote homed request that missed L3 and was serviced by remote DRAM (go to shared state)",
1593 "L3 MISS. Local homed request that missed L3 and was serviced by local DRAM (go to exclusive state)",
1594 "L3 MISS. Remote homed request that missed L3 and was serviced by remote DRAM (go to exclusive state)",
1596 "request to uncacheable memory"
1606 if (val > 15 || !desc)
#define PME_COREI7_INSTRUCTIONS_RETIRED
#define PME_COREI7_EVENT_COUNT
static pme_nhm_entry_t corei7_pe[]
#define PME_COREI7_UNHALTED_CORE_CYCLES
#define PME_COREI7_UNC_EVENT_COUNT
static pme_nhm_entry_t corei7_unc_pe[]
#define PME_WSM_EVENT_COUNT
static pme_nhm_entry_t wsm_pe[]
#define PME_WSM_INSTRUCTIONS_RETIRED
#define PME_WSM_UNHALTED_CORE_CYCLES
static pme_nhm_entry_t intel_wsm_unc_pe[]
#define PME_WSM_UNC_EVENT_COUNT
static double c[MATRIX_SIZE][MATRIX_SIZE]
#define PFMLIB_INTEL_WSM_PMU
static int pfm_regmask_set(pfmlib_regmask_t *h, unsigned int b)
static int pfm_regmask_weight(pfmlib_regmask_t *h, unsigned int *w)
#define PFMLIB_INTEL_NHM_PMU
#define PFMLIB_ERR_TOOMANY
pfm_err_t pfm_get_impl_counters(pfmlib_regmask_t *impl_counters)
static int pfm_regmask_isset(pfmlib_regmask_t *h, unsigned int b)
#define PFMLIB_ERR_NOASSIGN
static int pfm_regmask_or(pfmlib_regmask_t *dst, pfmlib_regmask_t *h1, pfmlib_regmask_t *h2)
#define PFMLIB_ERR_NOTSUPP
int pfm_nhm_is_pebs(pfmlib_event_t *e)
static int pfm_wsm_detect(void)
pfm_pmu_support_t intel_nhm_support
static const char * data_src_encodings[]
static int pfm_nhm_get_event_description(unsigned int ev, char **str)
static int pfm_nhm_get_cycle_event(pfmlib_event_t *e)
static int pfm_nhm_dispatch_events(pfmlib_input_param_t *inp, void *model_in, pfmlib_output_param_t *outp, void *model_out)
static unsigned int pfm_nhm_get_num_event_masks(unsigned int ev)
static pme_nhm_entry_t * get_nhm_entry(unsigned int i)
static char * pfm_nhm_get_event_mask_name(unsigned int ev, unsigned int midx)
static int pfm_nhm_get_event_mask_desc(unsigned int ev, unsigned int midx, char **str)
#define PMU_NHM_COUNTER_WIDTH
static void fixup_mem_uncore_retired(void)
static void pfm_nhm_get_impl_pmcs(pfmlib_regmask_t *impl_pmcs)
static int pfm_nhm_check_cmask(pfmlib_event_t *e, pme_nhm_entry_t *ne, pfmlib_nhm_counter_t *cntr)
#define INTEL_ARCH_MISP_BR_RETIRED
static pfmlib_regmask_t nhm_impl_unc_pmds
static unsigned int num_pe
static int pfm_nhm_detect(void)
static pfmlib_regmask_t nhm_impl_pmds
static int pfm_nhm_detect_common(void)
static void pfm_nhm_get_hw_counter_width(unsigned int *width)
static int pfm_nhm_is_fixed(pfmlib_event_t *e, unsigned int f)
static void pfm_nhm_get_impl_pmds(pfmlib_regmask_t *impl_pmds)
static int pfm_nhm_init(void)
static char * pfm_nhm_get_event_name(unsigned int i)
int pfm_nhm_data_src_desc(unsigned int val, char **desc)
static void setup_nhm_impl_unc_regs(void)
static pme_nhm_entry_t * pe
static int pfm_nhm_get_inst_retired(pfmlib_event_t *e)
static void pfm_nhm_get_event_counters(unsigned int j, pfmlib_regmask_t *counters)
static pfmlib_regmask_t nhm_impl_pmcs
static void pfm_nhm_get_impl_counters(pfmlib_regmask_t *impl_counters)
static unsigned int num_unc_pe
#define UNC_NHM_FIXED_CTR_BASE
int pfm_nhm_is_uncore(pfmlib_event_t *e)
#define PFMLIB_NHM_ALL_FLAGS
static int pfm_nhm_get_event_code(unsigned int i, unsigned int cnt, int *code)
static int pfm_nhm_get_event_mask_code(unsigned int ev, unsigned int midx, unsigned int *code)
static pme_nhm_entry_t * unc_pe
static pfmlib_regmask_t nhm_impl_unc_pmcs
#define NHM_NUM_GEN_COUNTERS
pfm_pmu_support_t intel_wsm_support
static int pfm_nhm_dispatch_counters(pfmlib_input_param_t *inp, pfmlib_nhm_input_param_t *param, pfmlib_output_param_t *outp)
#define NHM_FIXED_CTR_BASE
static void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
#define NHM_NUM_FIXED_COUNTERS
static int pfm_nhm_dispatch_lbr(pfmlib_input_param_t *inp, pfmlib_nhm_input_param_t *param, pfmlib_output_param_t *outp)
static int pfm_nhm_midx2uidx(unsigned int ev, unsigned int midx)
#define PFM_NHM_SEL_ANYTHR
#define PFM_NHM_LBR_FAR_BRANCH
#define PFM_NHM_LBR_NEAR_REL_JMP
#define PFM_NHM_LBR_NEAR_RET
#define PFM_NHM_LBR_NEAR_IND_JMP
#define PFM_NHM_SEL_OCC_RST
#define PFM_NHM_LBR_NEAR_REL_CALL
#define PFM_NHM_LBR_NEAR_IND_CALL
#define PMU_NHM_NUM_COUNTERS
#define PFMLIB_NHM_FIXED2_ONLY
#define PFMLIB_NHM_UNC_FIXED
#define PFMLIB_NHM_FIXED0
#define PFMLIB_NHM_OFFCORE_RSP0
#define PFMLIB_NHM_OFFCORE_RSP1
#define PFMLIB_NHM_FIXED1
#define PFMLIB_NHM_UMASK_NCOMBO
int __pfm_getcpuinfo_attr(const char *attr, char *ret_buf, size_t maxlen)
void __pfm_vbprintf(const char *fmt,...)
#define PFMLIB_INITIALIZED()
#define DPRINT(fmt, a...)
unsigned int unit_masks[PFMLIB_MAX_MASKS_PER_EVENT]
unsigned int ld_lat_thres
pfmlib_reg_t pfp_pmds[PFMLIB_MAX_PMDS]
pfmlib_reg_t pfp_pmcs[PFMLIB_MAX_PMCS]
unsigned int pfp_pmc_count
unsigned int pfp_pmd_count
unsigned long long reg_value
unsigned long reg_alt_addr
unsigned long long reg_addr
pme_nhm_umask_t pme_umasks[PFMLIB_NHM_MAX_UMASK]
unsigned long near_ind_jmp
unsigned long sel_cnt_mask
unsigned long near_rel_jmp
unsigned long near_ind_call
unsigned long near_rel_call
unsigned long usel_cnt_mask
struct pfm_nhm_sel_reg_t::@29 lbr_select