PAPI 7.1.0.0
Loading...
Searching...
No Matches
pfmlib_intel_nhm.c
Go to the documentation of this file.
1/*
2 * pfmlib_intel_nhm.c : Intel Nehalem PMU
3 *
4 * Copyright (c) 2008 Google, Inc
5 * Contributed by Stephane Eranian <eranian@gmail.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 * of the Software, and to permit persons to whom the Software is furnished to do so,
12 * subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in all
15 * copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
18 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
19 * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
20 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
21 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
22 * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Nehalem PMU = architectural perfmon v3 + OFFCORE + PEBS v2 + uncore PMU + LBR
25 */
26#include <sys/types.h>
27#include <ctype.h>
28#include <string.h>
29#include <stdlib.h>
30#include <stdio.h>
31
32/* public headers */
34
35/* private headers */
36#include "pfmlib_priv.h"
38
39/* Intel Westmere event tables */
40#include "intel_wsm_events.h"
42
43/* Intel Core i7 event tables */
44#include "intel_corei7_events.h"
46
47/* let's define some handy shortcuts! */
48#define usel_event unc_perfevtsel.usel_event
49#define usel_umask unc_perfevtsel.usel_umask
50#define usel_occ unc_perfevtsel.usel_occ
51#define usel_edge unc_perfevtsel.usel_edge
52#define usel_int unc_perfevtsel.usel_int
53#define usel_en unc_perfevtsel.usel_en
54#define usel_inv unc_perfevtsel.usel_inv
55#define usel_cnt_mask unc_perfevtsel.usel_cnt_mask
56
57#define sel_event perfevtsel.sel_event
58#define sel_umask perfevtsel.sel_umask
59#define sel_usr perfevtsel.sel_usr
60#define sel_os perfevtsel.sel_os
61#define sel_edge perfevtsel.sel_edge
62#define sel_pc perfevtsel.sel_pc
63#define sel_int perfevtsel.sel_int
64#define sel_en perfevtsel.sel_en
65#define sel_inv perfevtsel.sel_inv
66#define sel_anythr perfevtsel.sel_anythr
67#define sel_cnt_mask perfevtsel.sel_cnt_mask
68
69
70/*
71 * Description of the PMC registers mappings:
72 *
73 * 0 -> PMC0 -> PERFEVTSEL0
74 * 1 -> PMC1 -> PERFEVTSEL1
75 * 2 -> PMC2 -> PERFEVTSEL2
76 * 3 -> PMC3 -> PERFEVTSEL3
77 * 16 -> PMC16 -> FIXED_CTR_CTRL
78 * 17 -> PMC17 -> PEBS_ENABLED
79 * 18 -> PMC18 -> PEBS_LD_LATENCY_THRESHOLD
80 * 19 -> PMC19 -> OFFCORE_RSP0
81 * 20 -> PMC20 -> UNCORE_FIXED_CTRL
82 * 21 -> PMC21 -> UNCORE_EVNTSEL0
83 * 22 -> PMC22 -> UNCORE_EVNTSEL1
84 * 23 -> PMC23 -> UNCORE_EVNTSEL2
85 * 24 -> PMC24 -> UNCORE_EVNTSEL3
86 * 25 -> PMC25 -> UNCORE_EVNTSEL4
87 * 26 -> PMC26 -> UNCORE_EVNTSEL5
88 * 27 -> PMC27 -> UNCORE_EVNTSEL6
89 * 28 -> PMC28 -> UNCORE_EVNTSEL7
90 * 29 -> PMC31 -> UNCORE_ADDROP_MATCH
91 * 30 -> PMC32 -> LBR_SELECT
92 *
93 * Description of the PMD registers mapping:
94 *
95 * 0 -> PMD0 -> PMC0
96 * 1 -> PMD1 -> PMC1
97 * 2 -> PMD2 -> PMC2
98 * 3 -> PMD3 -> PMC3
99 * 16 -> PMD16 -> FIXED_CTR0
100 * 17 -> PMD17 -> FIXED_CTR1
101 * 18 -> PMD18 -> FIXED_CTR2
102 * 19 not used
103 * 20 -> PMD20 -> UNCORE_FIXED_CTR0
104 * 21 -> PMD21 -> UNCORE_PMC0
105 * 22 -> PMD22 -> UNCORE_PMC1
106 * 23 -> PMD23 -> UNCORE_PMC2
107 * 24 -> PMD24 -> UNCORE_PMC3
108 * 25 -> PMD25 -> UNCORE_PMC4
109 * 26 -> PMD26 -> UNCORE_PMC5
110 * 27 -> PMD27 -> UNCORE_PMC6
111 * 28 -> PMD28 -> UNCORE_PMC7
112 *
113 * 31 -> PMD31 -> LBR_TOS
114 * 32-63 -> PMD32-PMD63 -> LBR_FROM_0/LBR_TO_0 - LBR_FROM15/LBR_TO_15
115 */
116#define NHM_SEL_BASE 0x186
117#define NHM_CTR_BASE 0xc1
118#define NHM_FIXED_CTR_BASE 0x309
119
120#define UNC_NHM_SEL_BASE 0x3c0
121#define UNC_NHM_CTR_BASE 0x3b0
122#define UNC_NHM_FIXED_CTR_BASE 0x394
123
124#define MAX_COUNTERS 28 /* highest implemented counter */
125
126#define PFMLIB_NHM_ALL_FLAGS \
127 (PFM_NHM_SEL_INV|PFM_NHM_SEL_EDGE|PFM_NHM_SEL_ANYTHR)
128
129#define NHM_NUM_GEN_COUNTERS 4
130#define NHM_NUM_FIXED_COUNTERS 3
131
134
138static unsigned int num_pe, num_unc_pe;
139static int cpu_model, aaj80;
141
142#ifdef __i386__
143static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx,
144 unsigned int *ecx, unsigned int *edx)
145{
146 /*
147 * because ebx is used in Pic mode, we need to save/restore because
148 * cpuid clobbers it. I could not figure out a way to get ebx out in
149 * one cpuid instruction. To extract ebx, we need to move it to another
150 * register (here eax)
151 */
152 __asm__("pushl %%ebx;cpuid; popl %%ebx"
153 :"=a" (*eax)
154 : "a" (op)
155 : "ecx", "edx");
156
157 __asm__("pushl %%ebx;cpuid; movl %%ebx, %%eax;popl %%ebx"
158 :"=a" (*ebx)
159 : "a" (op)
160 : "ecx", "edx");
161}
162#else
163static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx,
164 unsigned int *ecx, unsigned int *edx)
165{
166 __asm__("cpuid"
167 : "=a" (*eax),
168 "=b" (*ebx),
169 "=c" (*ecx),
170 "=d" (*edx)
171 : "0" (op), "c"(0));
172}
173#endif
174
175static inline pme_nhm_entry_t *
176get_nhm_entry(unsigned int i)
177{
178 return i < num_pe ? pe+i : unc_pe+(i-num_pe);
179}
180
181static int
182pfm_nhm_midx2uidx(unsigned int ev, unsigned int midx)
183{
184 int i, num = 0;
185 pme_nhm_entry_t *ne;
186 int model;
187
188 ne = get_nhm_entry(ev);
189
190 for (i=0; i < ne->pme_numasks; i++) {
192 if (!model || model == cpu_model) {
193 if (midx == num)
194 return i;
195 num++;
196 }
197 }
198 DPRINT("cannot find umask %d for event %s\n", midx, ne->pme_name);
199 return -1;
200}
201
202static int
204{
205 int ret;
206 int family;
207 char buffer[128];
208
209 ret = __pfm_getcpuinfo_attr("vendor_id", buffer, sizeof(buffer));
210 if (ret == -1)
211 return PFMLIB_ERR_NOTSUPP;
212
213 if (strcmp(buffer, "GenuineIntel"))
214 return PFMLIB_ERR_NOTSUPP;
215
216 ret = __pfm_getcpuinfo_attr("cpu family", buffer, sizeof(buffer));
217 if (ret == -1)
218 return PFMLIB_ERR_NOTSUPP;
219
220 family = atoi(buffer);
221
222 ret = __pfm_getcpuinfo_attr("model", buffer, sizeof(buffer));
223 if (ret == -1)
224 return PFMLIB_ERR_NOTSUPP;
225
226 cpu_model = atoi(buffer);
227
228 if (family != 6)
229 return PFMLIB_ERR_NOTSUPP;
230
231 return PFMLIB_SUCCESS;
232}
233
234static int
236{
237#define INTEL_ARCH_MISP_BR_RETIRED (1 << 6)
238 unsigned int eax, ebx, ecx, edx;
239 int ret;
240
241 ret = pfm_nhm_detect_common();
242 if (ret != PFMLIB_SUCCESS)
243 return ret;
244
245 switch(cpu_model) {
246 case 26: /* Nehalem */
247 case 30:
248 case 31:
249 case 46:
250 /*
251 * check for erratum AAJ80
252 *
253 * MISPREDICTED_BRANCH_RETIRED may be broken
254 * in which case it appears in the list of
255 * unavailable architected events
256 */
257 cpuid(0xa, &eax, &ebx, &ecx, &edx);
259 aaj80 = 1;
260 break;
261 default:
262 return PFMLIB_ERR_NOTSUPP;
263 }
264 return PFMLIB_SUCCESS;
265}
266
267static int
269{
270 switch(cpu_model) {
271 case 37: /* Westmere */
272 case 44:
273 break;
274 default:
275 return PFMLIB_ERR_NOTSUPP;
276 }
277 return PFMLIB_SUCCESS;
278}
279
280static inline void setup_nhm_impl_unc_regs(void)
281{
291
292 /* uncore */
302 /* unnhm_addrop_match */
304
305}
306
307static void
309{
310 size_t i;
311
312 for(i=0; i < PME_COREI7_EVENT_COUNT; i++) {
313 if (corei7_pe[i].pme_code != 0xf)
314 continue;
315
316 /*
317 * assume model46 umasks are at the end
318 */
320 break;
321 }
322}
323
324static int
326{
327 pfm_pmu_support_t *supp;
328 int i;
329 int num_unc_cnt = 0;
330
331 if (forced_pmu != PFMLIB_NO_PMU) {
333 cpu_model = 26;
334 else
335 cpu_model = 37;
336 }
337
338 /* core */
347
355
356 /* lbr */
358 for(i=31; i < 64; i++)
360
361 switch(cpu_model) {
362 case 46:
364 num_unc_pe = 0;
365 pe = corei7_pe;
366 unc_pe = NULL;
369 num_unc_cnt = 0;
371 supp = &intel_nhm_support;
372 break;
373 case 26: /* Nehalem */
374 case 30: /* Lynnfield */
377 pe = corei7_pe;
382 num_unc_cnt = 9; /* one fixed + 8 generic */
383 supp = &intel_nhm_support;
384 break;
385 case 37: /* Westmere */
386 case 44:
389 pe = wsm_pe;
394 num_unc_cnt = 9; /* one fixed + 8 generic */
395
396 /* OFFCORE_RESPONSE_1 */
398 supp = &intel_wsm_support;
399 break;
400 default:
401 return PFMLIB_ERR_NOTSUPP;
402 }
403
404 supp->pme_count = num_pe + num_unc_pe;
407 + num_unc_cnt;
408 /*
409 * propagate uncore registers to impl bitmaps
410 */
413
414 /*
415 * compute number of registers available
416 * not all CPUs may have uncore
417 */
420
421 return PFMLIB_SUCCESS;
422}
423
424static int
426{
427 pme_nhm_entry_t *ne;
428 unsigned int fl, flc, i;
429 unsigned int mask = 0;
430
431 ne = get_nhm_entry(e->event);
432 fl = ne->pme_flags;
433
434 /*
435 * first pass: check if event as a whole supports fixed counters
436 */
437 switch(f) {
438 case 0:
439 mask = PFMLIB_NHM_FIXED0;
440 break;
441 case 1:
442 mask = PFMLIB_NHM_FIXED1;
443 break;
444 case 2:
446 break;
447 default:
448 return 0;
449 }
450 if (fl & mask)
451 return 1;
452 /*
453 * second pass: check if unit mask supports fixed counter
454 *
455 * reject if mask not found OR if not all unit masks have
456 * same fixed counter mask
457 */
458 flc = 0;
459 for(i=0; i < e->num_masks; i++) {
460 int midx = pfm_nhm_midx2uidx(e->event, e->unit_masks[i]);
461 fl = ne->pme_umasks[midx].pme_uflags;
462 if (fl & mask)
463 flc++;
464 }
465 return flc > 0 && flc == e->num_masks ? 1 : 0;
466}
467
468/*
469 * Allow combination of events when cnt_mask > 0 AND unit mask codes do
470 * not overlap (otherwise, we do not know what is actually measured)
471 */
472static int
474{
475 unsigned int ref, ucode;
476 int i, j;
477
478 if (!cntr)
479 return -1;
480
481 if (cntr->cnt_mask == 0)
482 return -1;
483
484 for(i=0; i < e->num_masks; i++) {
485 int midx = pfm_nhm_midx2uidx(e->event, e->unit_masks[i]);
486 ref = ne->pme_umasks[midx].pme_ucode;
487 for(j=i+1; j < e->num_masks; j++) {
488 midx = pfm_nhm_midx2uidx(e->event, e->unit_masks[j]);
489 ucode = ne->pme_umasks[midx].pme_ucode;
490 if (ref & ucode)
491 return -1;
492 }
493 }
494 return 0;
495}
496
497/*
498 * IMPORTANT: the interface guarantees that pfp_pmds[] elements are returned in the order the events
499 * were submitted.
500 */
501static int
503{
504#define HAS_OPTIONS(x) (cntrs && (cntrs[x].flags || cntrs[x].cnt_mask))
505#define is_fixed_pmc(a) (a == 16 || a == 17 || a == 18)
506#define is_uncore(a) (a > 19)
507
508 pme_nhm_entry_t *ne;
512 pfmlib_reg_t *pc, *pd;
513 pfmlib_regmask_t *r_pmcs;
514 uint64_t val, unc_global_ctrl;
515 uint64_t pebs_mask, ld_mask;
516 unsigned long long fixed_ctr;
517 unsigned int plm;
518 unsigned int npc, npmc0, npmc01, nf2, nuf;
519 unsigned int i, n, k, j, umask, use_pebs = 0;
520 unsigned int assign_pc[PMU_NHM_NUM_COUNTERS];
521 unsigned int next_gen, last_gen, u_flags;
522 unsigned int next_unc_gen, last_unc_gen, lat;
523 unsigned int offcore_rsp0_value = 0;
524 unsigned int offcore_rsp1_value = 0;
525
526 npc = npmc01 = npmc0 = nf2 = nuf = 0;
527 unc_global_ctrl = 0;
528
529 e = inp->pfp_events;
530 pc = outp->pfp_pmcs;
531 pd = outp->pfp_pmds;
532 n = inp->pfp_event_count;
533 r_pmcs = &inp->pfp_unavail_pmcs;
534 cntrs = param ? param->pfp_nhm_counters : NULL;
535 pebs_mask = ld_mask = 0;
536 use_pebs = param ? param->pfp_nhm_pebs.pebs_used : 0;
537 lat = param ? param->pfp_nhm_pebs.ld_lat_thres : 0;
538
539 if (n > PMU_NHM_NUM_COUNTERS)
540 return PFMLIB_ERR_TOOMANY;
541
542 /*
543 * error checking
544 */
545 for(i=0; i < n; i++) {
546 /*
547 * only supports two priv levels for perf counters
548 */
549 if (e[i].plm & (PFM_PLM1|PFM_PLM2))
550 return PFMLIB_ERR_INVAL;
551
552 ne = get_nhm_entry(e[i].event);
553
554 /* check for erratum AAJ80 */
555 if (aaj80 && (ne->pme_code & 0xff) == 0xc5) {
556 DPRINT("MISPREDICTED_BRANCH_RETIRED broken on this Nehalem processor, see eeratum AAJ80\n");
557 return PFMLIB_ERR_NOTSUPP;
558 }
559
560 /*
561 * check for valid flags
562 */
563 if (e[i].flags & ~PFMLIB_NHM_ALL_FLAGS)
564 return PFMLIB_ERR_INVAL;
565
567 && e[i].num_masks > 1 && pfm_nhm_check_cmask(e, ne, cntrs ? cntrs+i : NULL)) {
568 DPRINT("events does not support unit mask combination\n");
569 return PFMLIB_ERR_NOASSIGN;
570 }
571 /*
572 * check event-level single register constraint for uncore fixed
573 */
575 if (++nuf > 1) {
576 DPRINT("two events compete for a UNCORE_FIXED_CTR0\n");
577 return PFMLIB_ERR_NOASSIGN;
578 }
579 if (HAS_OPTIONS(i)) {
580 DPRINT("uncore fixed counter does not support options\n");
581 return PFMLIB_ERR_NOASSIGN;
582 }
583 }
584 if (ne->pme_flags & PFMLIB_NHM_PMC0) {
585 if (++npmc0 > 1) {
586 DPRINT("two events compete for a PMC0\n");
587 return PFMLIB_ERR_NOASSIGN;
588 }
589 }
590 /*
591 * check event-level single register constraint (PMC0/1 only)
592 * fail if more than two events requested for the same counter pair
593 */
594 if (ne->pme_flags & PFMLIB_NHM_PMC01) {
595 if (++npmc01 > 2) {
596 DPRINT("two events compete for a PMC0\n");
597 return PFMLIB_ERR_NOASSIGN;
598 }
599 }
600 /*
601 * UNHALTED_REFERENCE_CYCLES (CPU_CLK_UNHALTED:BUS)
602 * can only be measured on FIXED_CTR2
603 */
605 if (++nf2 > 1) {
606 DPRINT("two events compete for FIXED_CTR2\n");
607 return PFMLIB_ERR_NOASSIGN;
608 }
609 if (cntrs && ((cntrs[i].flags & (PFM_NHM_SEL_INV|PFM_NHM_SEL_EDGE)) || cntrs[i].cnt_mask)) {
610 DPRINT("UNHALTED_REFERENCE_CYCLES only accepts anythr filter\n");
611 return PFMLIB_ERR_NOASSIGN;
612 }
613 }
614 /*
615 * OFFCORE_RSP0 is shared, unit masks for all offcore_response events
616 * must be identical
617 */
618 umask = 0;
619 for(j=0; j < e[i].num_masks; j++) {
620 int midx = pfm_nhm_midx2uidx(e[i].event, e[i].unit_masks[j]);
621 umask |= ne->pme_umasks[midx].pme_ucode;
622 }
623
625 if (offcore_rsp0_value && offcore_rsp0_value != umask) {
626 DPRINT("all OFFCORE_RSP0 events must have the same unit mask\n");
627 return PFMLIB_ERR_NOASSIGN;
628 }
629 if (pfm_regmask_isset(r_pmcs, 19)) {
630 DPRINT("OFFCORE_RSP0 register not available\n");
631 return PFMLIB_ERR_NOASSIGN;
632 }
633 if (!((umask & 0xff) && (umask & 0xff00))) {
634 DPRINT("OFFCORE_RSP0 must have at least one request and response unit mask set\n");
635 return PFMLIB_ERR_INVAL;
636 }
637 /* lock-in offcore_value */
638 offcore_rsp0_value = umask;
639 }
641 if (offcore_rsp1_value && offcore_rsp1_value != umask) {
642 DPRINT("all OFFCORE_RSP1 events must have the same unit mask\n");
643 return PFMLIB_ERR_NOASSIGN;
644 }
645 if (pfm_regmask_isset(r_pmcs, 31)) {
646 DPRINT("OFFCORE_RSP1 register not available\n");
647 return PFMLIB_ERR_NOASSIGN;
648 }
649 if (!((umask & 0xff) && (umask & 0xff00))) {
650 DPRINT("OFFCORE_RSP1 must have at least one request and response unit mask set\n");
651 return PFMLIB_ERR_INVAL;
652 }
653 /* lock-in offcore_value */
654 offcore_rsp1_value = umask;
655 }
656
657 /*
658 * enforce PLM0|PLM3 for uncore events given they have no
659 * priv level filter. This is to ensure users understand what
660 * they are doing
661 */
663 if (inp->pfp_dfl_plm != (PFM_PLM0|PFM_PLM3)
664 && e[i].plm != (PFM_PLM0|PFM_PLM3)) {
665 DPRINT("uncore events must have PLM0|PLM3\n");
666 return PFMLIB_ERR_NOASSIGN;
667 }
668 }
669 }
670
671 /*
672 * initilize to empty
673 */
674 for(i=0; i < PMU_NHM_NUM_COUNTERS; i++)
675 assign_pc[i] = -1;
676
677
678 next_gen = 0; /* first generic counter */
679 last_gen = 3; /* last generic counter */
680
681 /*
682 * strongest constraint: only uncore_fixed_ctr0 or PMC0 only
683 */
684 if (nuf || npmc0) {
685 for(i=0; i < n; i++) {
686 ne = get_nhm_entry(e[i].event);
687 if (ne->pme_flags & PFMLIB_NHM_PMC0) {
688 if (pfm_regmask_isset(r_pmcs, 0))
689 return PFMLIB_ERR_NOASSIGN;
690 assign_pc[i] = 0;
691 next_gen = 1;
692 }
694 if (pfm_regmask_isset(r_pmcs, 20))
695 return PFMLIB_ERR_NOASSIGN;
696 assign_pc[i] = 20;
697 }
698 }
699 }
700 /*
701 * 2nd strongest constraint first: works only on PMC0 or PMC1
702 * On Nehalem, this constraint applies at the event-level
703 * (not unit mask level, fortunately)
704 *
705 * PEBS works on all 4 generic counters
706 *
707 * Because of sanity check above, we know we can find
708 * only up to 2 events with this constraint
709 */
710 if (npmc01) {
711 for(i=0; i < n; i++) {
712 ne = get_nhm_entry(e[i].event);
713 if (ne->pme_flags & PFMLIB_NHM_PMC01) {
714 while (next_gen < 2 && pfm_regmask_isset(r_pmcs, next_gen))
715 next_gen++;
716 if (next_gen == 2)
717 return PFMLIB_ERR_NOASSIGN;
718 assign_pc[i] = next_gen++;
719 }
720 }
721 }
722
723 /*
724 * next constraint: fixed counters
725 *
726 * We abuse the mapping here for assign_pc to make it easier
727 * to provide the correct values for pd[].
728 * We use:
729 * - 16 : fixed counter 0 (pmc16, pmd16)
730 * - 17 : fixed counter 1 (pmc16, pmd17)
731 * - 18 : fixed counter 2 (pmc16, pmd18)
732 */
733 fixed_ctr = pfm_regmask_isset(r_pmcs, 16) ? 0 : 0x7;
734 if (fixed_ctr) {
735 for(i=0; i < n; i++) {
736 /*
737 * Nehalem fixed counters (as for architected perfmon v3)
738 * does support anythr filter
739 */
740 if (HAS_OPTIONS(i)) {
741 if (use_pebs && pfm_nhm_is_pebs(e+i))
742 continue;
743
744 if (cntrs[i].flags != PFM_NHM_SEL_ANYTHR)
745 continue;
746 }
747 if ((fixed_ctr & 0x1) && pfm_nhm_is_fixed(e+i, 0)) {
748 assign_pc[i] = 16;
749 fixed_ctr &= ~1;
750 }
751 if ((fixed_ctr & 0x2) && pfm_nhm_is_fixed(e+i, 1)) {
752 assign_pc[i] = 17;
753 fixed_ctr &= ~2;
754 }
755 if ((fixed_ctr & 0x4) && pfm_nhm_is_fixed(e+i, 2)) {
756 assign_pc[i] = 18;
757 fixed_ctr &= ~4;
758 }
759 }
760 }
761 /*
762 * uncore events on any of the 8 counters
763 */
764 next_unc_gen = 21; /* first generic uncore counter config */
765 last_unc_gen = 28; /* last generic uncore counter config */
766 for(i=0; i < n; i++) {
767 ne = get_nhm_entry(e[i].event);
768 if (ne->pme_flags & PFMLIB_NHM_UNC) {
769 for(; next_unc_gen <= last_unc_gen; next_unc_gen++) {
770 if (!pfm_regmask_isset(r_pmcs, next_unc_gen))
771 break;
772 }
773 if (next_unc_gen <= last_unc_gen)
774 assign_pc[i] = next_unc_gen++;
775 else {
776 DPRINT("cannot assign generic uncore event\n");
777 return PFMLIB_ERR_NOASSIGN;
778 }
779 }
780 }
781
782 /*
783 * assign what is left of the generic events
784 */
785 for(i=0; i < n; i++) {
786 if (assign_pc[i] == -1) {
787 for(; next_gen <= last_gen; next_gen++) {
788 DPRINT("i=%d next_gen=%d last=%d isset=%d\n", i, next_gen, last_gen, pfm_regmask_isset(r_pmcs, next_gen));
789 if (!pfm_regmask_isset(r_pmcs, next_gen))
790 break;
791 }
792 if (next_gen <= last_gen) {
793 assign_pc[i] = next_gen++;
794 } else {
795 DPRINT("cannot assign generic event\n");
796 return PFMLIB_ERR_NOASSIGN;
797 }
798 }
799 }
800
801 /*
802 * setup core fixed counters
803 */
804 reg.val = 0;
805 for (i=0; i < n ; i++ ) {
806 if (!is_fixed_pmc(assign_pc[i]))
807 continue;
808 val = 0;
809 /* if plm is 0, then assume not specified per-event and use default */
810 plm = e[i].plm ? e[i].plm : inp->pfp_dfl_plm;
811 if (plm & PFM_PLM0)
812 val |= 1ULL;
813 if (plm & PFM_PLM3)
814 val |= 2ULL;
815 if (cntrs && cntrs[i].flags & PFM_NHM_SEL_ANYTHR)
816 val |= 4ULL;
817 val |= 1ULL << 3; /* force APIC int (kernel may force it anyway) */
818
819 reg.val |= val << ((assign_pc[i]-16)<<2);
820 }
821
822 if (reg.val) {
823 pc[npc].reg_num = 16;
824 pc[npc].reg_value = reg.val;
825 pc[npc].reg_addr = 0x38D;
826 pc[npc].reg_alt_addr = 0x38D;
827
828 __pfm_vbprintf("[FIXED_CTRL(pmc%u)=0x%"PRIx64" pmi0=1 en0=0x%"PRIx64" any0=%d pmi1=1 en1=0x%"PRIx64" any1=%d pmi2=1 en2=0x%"PRIx64" any2=%d] ",
829 pc[npc].reg_num,
830 reg.val,
831 reg.val & 0x3ULL,
832 !!(reg.val & 0x4ULL),
833 (reg.val>>4) & 0x3ULL,
834 !!((reg.val>>4) & 0x4ULL),
835 (reg.val>>8) & 0x3ULL,
836 !!((reg.val>>8) & 0x4ULL));
837
838 if ((fixed_ctr & 0x1) == 0)
839 __pfm_vbprintf("INSTRUCTIONS_RETIRED ");
840 if ((fixed_ctr & 0x2) == 0)
841 __pfm_vbprintf("UNHALTED_CORE_CYCLES ");
842 if ((fixed_ctr & 0x4) == 0)
843 __pfm_vbprintf("UNHALTED_REFERENCE_CYCLES ");
844 __pfm_vbprintf("\n");
845
846 npc++;
847
848 if ((fixed_ctr & 0x1) == 0)
849 __pfm_vbprintf("[FIXED_CTR0(pmd16)]\n");
850 if ((fixed_ctr & 0x2) == 0)
851 __pfm_vbprintf("[FIXED_CTR1(pmd17)]\n");
852 if ((fixed_ctr & 0x4) == 0)
853 __pfm_vbprintf("[FIXED_CTR2(pmd18)]\n");
854 }
855
856 /*
857 * setup core counter config
858 */
859 for (i=0; i < n ; i++ ) {
860 /* skip fixed counters */
861 if (is_fixed_pmc(assign_pc[i]) || is_uncore(assign_pc[i]))
862 continue;
863
864 reg.val = 0; /* assume reserved bits are zeroed */
865
866 /* if plm is 0, then assume not specified per-event and use default */
867 plm = e[i].plm ? e[i].plm : inp->pfp_dfl_plm;
868
869 ne = get_nhm_entry(e[i].event);
870 val = ne->pme_code;
871
872 reg.sel_event = val & 0xff;
873
874 umask = (val >> 8) & 0xff;
875
876 u_flags = 0;
877
878 /*
879 * for OFFCORE_RSP, the unit masks are all in the
880 * dedicated OFFCORE_RSP MSRs and event unit mask must be
881 * 0x1 (extracted from pme_code)
882 */
884 for(k=0; k < e[i].num_masks; k++) {
885 int midx = pfm_nhm_midx2uidx(e[i].event, e[i].unit_masks[k]);
886 umask |= ne->pme_umasks[midx].pme_ucode;
887 u_flags |= ne->pme_umasks[midx].pme_uflags;
888 }
889 val |= umask << 8;
890
891 reg.sel_umask = umask;
892 reg.sel_usr = plm & PFM_PLM3 ? 1 : 0;
893 reg.sel_os = plm & PFM_PLM0 ? 1 : 0;
894 reg.sel_en = 1; /* force enable bit to 1 */
895 reg.sel_int = 1; /* force APIC int to 1 */
896
897 reg.sel_cnt_mask = val >>24;
898 reg.sel_inv = val >> 23;
899 reg.sel_anythr = val >> 21;
900 reg.sel_edge = val >> 18;
901
902 if (cntrs) {
903 /*
904 * occupancy reset flag is for uncore counters only
905 */
906 if (cntrs[i].flags & PFM_NHM_SEL_OCC_RST)
907 return PFMLIB_ERR_INVAL;
908
909 if (!reg.sel_cnt_mask) {
910 /*
911 * counter mask is 8-bit wide, do not silently
912 * wrap-around
913 */
914 if (cntrs[i].cnt_mask > 255)
915 return PFMLIB_ERR_INVAL;
916
917 reg.sel_cnt_mask = cntrs[i].cnt_mask;
918 }
919
920 if (!reg.sel_edge)
921 reg.sel_edge = cntrs[i].flags & PFM_NHM_SEL_EDGE ? 1 : 0;
922 if (!reg.sel_inv)
923 reg.sel_inv = cntrs[i].flags & PFM_NHM_SEL_INV ? 1 : 0;
924 if (!reg.sel_anythr)
925 reg.sel_anythr = cntrs[i].flags & PFM_NHM_SEL_ANYTHR ? 1 : 0;
926 }
927
928 if (u_flags || (ne->pme_flags & PFMLIB_NHM_PEBS))
929 pebs_mask |= 1ULL << assign_pc[i];
930
931 /*
932 * check for MEM_INST_RETIRED:LATENCY_ABOVE_THRESHOLD_0 to enable load latency filtering
933 * when PEBS is used. There is only one threshold possible, yet mutliple counters may be
934 * programmed with this event/umask. That means they all share the same threshold.
935 */
936 if (reg.sel_event == 0xb && (umask & 0x10))
937 ld_mask |= 1ULL << assign_pc[i];
938
939 pc[npc].reg_num = assign_pc[i];
940 pc[npc].reg_value = reg.val;
941 pc[npc].reg_addr = NHM_SEL_BASE+assign_pc[i];
942 pc[npc].reg_alt_addr= NHM_SEL_BASE+assign_pc[i];
943
944 __pfm_vbprintf("[PERFEVTSEL%u(pmc%u)=0x%"PRIx64" event_sel=0x%x umask=0x%x os=%d usr=%d anythr=%d en=%d int=%d inv=%d edge=%d cnt_mask=%d] %s\n",
945 pc[npc].reg_num,
946 pc[npc].reg_num,
947 reg.val,
948 reg.sel_event,
949 reg.sel_umask,
950 reg.sel_os,
951 reg.sel_usr,
952 reg.sel_anythr,
953 reg.sel_en,
954 reg.sel_int,
955 reg.sel_inv,
956 reg.sel_edge,
957 reg.sel_cnt_mask,
958 ne->pme_name);
959
960 __pfm_vbprintf("[PMC%u(pmd%u)]\n",
961 pc[npc].reg_num,
962 pc[npc].reg_num);
963
964 npc++;
965 }
966 /*
967 * setup uncore fixed counter config
968 */
969 if (nuf) {
970 pc[npc].reg_num = 20;
971 pc[npc].reg_value = 0x5ULL; /* ena=1, PMI=dtermined by kernel */
972 pc[npc].reg_addr = 0x395;
973 pc[npc].reg_alt_addr = 0x395;
974 __pfm_vbprintf("[UNC_FIXED_CTRL(pmc20)=0x%"PRIx64" pmi=1 ena=1] UNC_CLK_UNHALTED\n", pc[npc].reg_value);
975 __pfm_vbprintf("[UNC_FIXED_CTR0(pmd20)]\n");
976 unc_global_ctrl |= 1ULL<< 32;
977 npc++;
978 }
979 /*
980 * setup uncore counter config
981 */
982 for (i=0; i < n ; i++ ) {
983
984 /* skip core counters, uncore fixed */
985 if (!is_uncore(assign_pc[i]) || assign_pc[i] == 20)
986 continue;
987
988 reg.val = 0; /* assume reserved bits are zerooed */
989
990 ne = get_nhm_entry(e[i].event);
991 val = ne->pme_code;
992
993 reg.usel_event = val & 0xff;
994
995 umask = (val >> 8) & 0xff;
996
997 for(k=0; k < e[i].num_masks; k++) {
998 int midx = pfm_nhm_midx2uidx(e[i].event, e[i].unit_masks[k]);
999 umask |= ne->pme_umasks[midx].pme_ucode;
1000 }
1001
1002 val |= umask << 8;
1003
1004 reg.usel_umask = umask;
1005 reg.usel_en = 1; /* force enable bit to 1 */
1006 reg.usel_int = 1; /* force APIC int to 1 */
1007
1008 /*
1009 * allow hardcoded filters in event table
1010 */
1011 reg.usel_cnt_mask = val >>24;
1012 reg.usel_inv = val >> 23;
1013 reg.usel_edge = val >> 18;
1014 reg.usel_occ = val >> 17;
1015
1016 if (cntrs) {
1017 /*
1018 * anythread if for core counters only
1019 */
1020 if (cntrs[i].flags & PFM_NHM_SEL_ANYTHR)
1021 return PFMLIB_ERR_INVAL;
1022
1023 if (!reg.usel_cnt_mask) {
1024 /*
1025 * counter mask is 8-bit wide, do not silently
1026 * wrap-around
1027 */
1028 if (cntrs[i].cnt_mask > 255)
1029 return PFMLIB_ERR_INVAL;
1030
1031 reg.usel_cnt_mask = cntrs[i].cnt_mask;
1032 }
1033 if (!reg.usel_edge)
1034 reg.usel_edge = cntrs[i].flags & PFM_NHM_SEL_EDGE ? 1 : 0;
1035
1036 if (!reg.usel_inv)
1037 reg.usel_inv = cntrs[i].flags & PFM_NHM_SEL_INV ? 1 : 0;
1038
1039 if (!reg.usel_occ)
1040 reg.usel_occ = cntrs[i].flags & PFM_NHM_SEL_OCC_RST ? 1 : 0;
1041 }
1042
1043 unc_global_ctrl |= 1ULL<< (assign_pc[i] - 21);
1044 pc[npc].reg_num = assign_pc[i];
1045 pc[npc].reg_value = reg.val;
1046 pc[npc].reg_addr = UNC_NHM_SEL_BASE+assign_pc[i] - 21;
1047 pc[npc].reg_alt_addr= UNC_NHM_SEL_BASE+assign_pc[i] - 21;
1048
1049 __pfm_vbprintf("[UNC_PERFEVTSEL%u(pmc%u)=0x%"PRIx64" event=0x%x umask=0x%x en=%d int=%d inv=%d edge=%d occ=%d cnt_msk=%d] %s\n",
1050 pc[npc].reg_num - 21,
1051 pc[npc].reg_num,
1052 reg.val,
1053 reg.usel_event,
1054 reg.usel_umask,
1055 reg.usel_en,
1056 reg.usel_int,
1057 reg.usel_inv,
1058 reg.usel_edge,
1059 reg.usel_occ,
1060 reg.usel_cnt_mask,
1061 ne->pme_name);
1062
1063 __pfm_vbprintf("[UNC_PMC%u(pmd%u)]\n",
1064 pc[npc].reg_num - 21,
1065 pc[npc].reg_num);
1066 npc++;
1067 }
1068
1069 /*
1070 * setup pmds: must be in the same order as the events
1071 */
1072 for (i=0; i < n ; i++) {
1073 switch (assign_pc[i]) {
1074 case 0 ... 4:
1075 pd[i].reg_num = assign_pc[i];
1076 pd[i].reg_addr = NHM_CTR_BASE+assign_pc[i];
1077 /* index to use with RDPMC */
1078 pd[i].reg_alt_addr = assign_pc[i];
1079 break;
1080 case 16 ... 18:
1081 /* setup pd array */
1082 pd[i].reg_num = assign_pc[i];
1083 pd[i].reg_addr = NHM_FIXED_CTR_BASE+assign_pc[i]-16;
1084 pd[i].reg_alt_addr = 0x40000000+assign_pc[i]-16;
1085 break;
1086 case 20:
1087 pd[i].reg_num = 20;
1090 break;
1091 case 21 ... 28:
1092 pd[i].reg_num = assign_pc[i];
1093 pd[i].reg_addr = UNC_NHM_CTR_BASE + assign_pc[i] - 21;
1094 pd[i].reg_alt_addr = UNC_NHM_CTR_BASE + assign_pc[i] - 21;
1095 break;
1096 }
1097 }
1098 outp->pfp_pmd_count = i;
1099
1100 /*
1101 * setup PEBS_ENABLE
1102 */
1103 if (use_pebs && pebs_mask) {
1104 if (!lat)
1105 ld_mask = 0;
1106 /*
1107 * check that PEBS_ENABLE is available
1108 */
1109 if (pfm_regmask_isset(r_pmcs, 17))
1110 return PFMLIB_ERR_NOASSIGN;
1111
1112 pc[npc].reg_num = 17;
1113 pc[npc].reg_value = pebs_mask | (ld_mask <<32);
1114 pc[npc].reg_addr = 0x3f1; /* IA32_PEBS_ENABLE */
1115 pc[npc].reg_alt_addr = 0x3f1; /* IA32_PEBS_ENABLE */
1116
1117 __pfm_vbprintf("[PEBS_ENABLE(pmc%u)=0x%"PRIx64" ena0=%d ena1=%d ena2=%d ena3=%d ll0=%d ll1=%d ll2=%d ll3=%d]\n",
1118 pc[npc].reg_num,
1119 pc[npc].reg_value,
1120 pc[npc].reg_value & 0x1,
1121 (pc[npc].reg_value >> 1) & 0x1,
1122 (pc[npc].reg_value >> 2) & 0x1,
1123 (pc[npc].reg_value >> 3) & 0x1,
1124 (pc[npc].reg_value >> 32) & 0x1,
1125 (pc[npc].reg_value >> 33) & 0x1,
1126 (pc[npc].reg_value >> 34) & 0x1,
1127 (pc[npc].reg_value >> 35) & 0x1);
1128
1129 npc++;
1130
1131 if (ld_mask) {
1132 if (lat < 3 || lat > 0xffff) {
1133 DPRINT("invalid load latency threshold %u (must be in [3:0xffff])\n", lat);
1134 return PFMLIB_ERR_INVAL;
1135 }
1136
1137 if (pfm_regmask_isset(r_pmcs, 18))
1138 return PFMLIB_ERR_NOASSIGN;
1139
1140 pc[npc].reg_num = 18;
1141 pc[npc].reg_value = lat;
1142 pc[npc].reg_addr = 0x3f1; /* IA32_PEBS_ENABLE */
1143 pc[npc].reg_alt_addr = 0x3f1; /* IA32_PEBS_ENABLE */
1144 __pfm_vbprintf("[LOAD_LATENCY_THRESHOLD(pmc%u)=0x%"PRIx64"]\n",
1145 pc[npc].reg_num,
1146 pc[npc].reg_value);
1147
1148 npc++;
1149 }
1150 }
1151
1152 /*
1153 * setup OFFCORE_RSP0
1154 */
1155 if (offcore_rsp0_value) {
1156 pc[npc].reg_num = 19;
1157 pc[npc].reg_value = offcore_rsp0_value;
1158 pc[npc].reg_addr = 0x1a6;
1159 pc[npc].reg_alt_addr = 0x1a6;
1160 __pfm_vbprintf("[OFFCORE_RSP0(pmc%u)=0x%"PRIx64"]\n",
1161 pc[npc].reg_num,
1162 pc[npc].reg_value);
1163 npc++;
1164 }
1165 /*
1166 * setup OFFCORE_RSP1
1167 */
1168 if (offcore_rsp1_value) {
1169 pc[npc].reg_num = 31;
1170 pc[npc].reg_value = offcore_rsp1_value;
1171 pc[npc].reg_addr = 0x1a7;
1172 pc[npc].reg_alt_addr = 0x1a7;
1173 __pfm_vbprintf("[OFFCORE_RSP1(pmc%u)=0x%"PRIx64"]\n",
1174 pc[npc].reg_num,
1175 pc[npc].reg_value);
1176 npc++;
1177 }
1178
1179 outp->pfp_pmc_count = npc;
1180
1181 return PFMLIB_SUCCESS;
1182}
1183
1184static int
1186{
1187 static int lbr_plm_map[4]={
1188 0x3, /* PLM0=0 PLM3=0 neq0=1 eq0=1 */
1189 0x1, /* PLM0=0 PLM3=1 neq0=0 eq0=1 */
1190 0x2, /* PLM0=1 PLM3=0 neq0=1 eq0=0 */
1191 0x0 /* PLM0=1 PLM3=1 neq0=0 eq0=0 */
1192 };
1194 unsigned int filter, i, c;
1195 unsigned int plm;
1196
1197 /*
1198 * check LBR_SELECT is available
1199 */
1200 if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 30))
1201 return PFMLIB_ERR_NOASSIGN;
1202
1203 reg.val = 0; /* capture everything */
1204
1205 plm = param->pfp_nhm_lbr.lbr_plm;
1206 if (!plm)
1207 plm = inp->pfp_dfl_plm;
1208
1209 /*
1210 * LBR does not distinguish PLM1, PLM2 from PLM3
1211 */
1212
1213 i = plm & PFM_PLM0 ? 0x2 : 0;
1214 i |= plm & PFM_PLM3 ? 0x1 : 0;
1215
1216 if (lbr_plm_map[i] & 0x1)
1217 reg.lbr_select.cpl_eq0 = 1;
1218
1219 if (lbr_plm_map[i] & 0x2)
1220 reg.lbr_select.cpl_neq0 = 1;
1221
1222 filter = param->pfp_nhm_lbr.lbr_filter;
1223
1224 if (filter & PFM_NHM_LBR_JCC)
1225 reg.lbr_select.jcc = 1;
1226
1227 if (filter & PFM_NHM_LBR_NEAR_REL_CALL)
1228 reg.lbr_select.near_rel_call = 1;
1229
1230 if (filter & PFM_NHM_LBR_NEAR_IND_CALL)
1231 reg.lbr_select.near_ind_call = 1;
1232
1233 if (filter & PFM_NHM_LBR_NEAR_RET)
1234 reg.lbr_select.near_ret = 1;
1235
1236 if (filter & PFM_NHM_LBR_NEAR_IND_JMP)
1237 reg.lbr_select.near_ind_jmp = 1;
1238
1239 if (filter & PFM_NHM_LBR_NEAR_REL_JMP)
1240 reg.lbr_select.near_rel_jmp = 1;
1241
1242 if (filter & PFM_NHM_LBR_FAR_BRANCH)
1243 reg.lbr_select.far_branch = 1;
1244
1245 __pfm_vbprintf("[LBR_SELECT(PMC30)=0x%"PRIx64" eq0=%d neq0=%d jcc=%d rel=%d ind=%d ret=%d ind_jmp=%d rel_jmp=%d far=%d ]\n",
1246 reg.val,
1247 reg.lbr_select.cpl_eq0,
1248 reg.lbr_select.cpl_neq0,
1249 reg.lbr_select.jcc,
1252 reg.lbr_select.near_ret,
1256
1257 __pfm_vbprintf("[LBR_TOS(PMD31)]\n");
1258
1259 __pfm_vbprintf("[LBR_FROM-LBR_TO(PMD32..PMD63)]\n");
1260
1261 c = outp->pfp_pmc_count;
1262
1263 outp->pfp_pmcs[c].reg_num = 30;
1264 outp->pfp_pmcs[c].reg_value = reg.val;
1265 outp->pfp_pmcs[c].reg_addr = 0x1c8;
1266 outp->pfp_pmcs[c].reg_alt_addr = 0x1c8;
1267 c++;
1268 outp->pfp_pmc_count = c;
1269
1270 c = outp->pfp_pmd_count;
1271
1272 outp->pfp_pmds[c].reg_num = 31;
1273 outp->pfp_pmds[c].reg_value = 0;
1274 outp->pfp_pmds[c].reg_addr = 0x1c9;
1275 outp->pfp_pmds[c].reg_alt_addr = 0x1c9;
1276 c++;
1277
1278 for(i=0; i < 32; i++, c++) {
1279 outp->pfp_pmds[c].reg_num = 32 + i;
1280 outp->pfp_pmds[c].reg_value = 0;
1281 outp->pfp_pmds[c].reg_addr = (i>>1) + ((i & 0x1) ? 0x6c0 : 0x680);
1282 outp->pfp_pmds[c].reg_alt_addr = (i>>1) + ((i & 0x1) ? 0x6c0 : 0x680);
1283 }
1284 outp->pfp_pmd_count = c;
1285 return PFMLIB_SUCCESS;
1286}
1287
1288static int
1289pfm_nhm_dispatch_events(pfmlib_input_param_t *inp, void *model_in, pfmlib_output_param_t *outp, void *model_out)
1290{
1292 int ret;
1293
1294 if (inp->pfp_dfl_plm & (PFM_PLM1|PFM_PLM2)) {
1295 DPRINT("invalid plm=%x\n", inp->pfp_dfl_plm);
1296 return PFMLIB_ERR_INVAL;
1297 }
1298 ret = pfm_nhm_dispatch_counters(inp, mod_in, outp);
1299 if (ret != PFMLIB_SUCCESS)
1300 return ret;
1301
1302 if (mod_in && mod_in->pfp_nhm_lbr.lbr_used)
1303 ret = pfm_nhm_dispatch_lbr(inp, mod_in, outp);
1304
1305 return ret;
1306}
1307
1308static int
1309pfm_nhm_get_event_code(unsigned int i, unsigned int cnt, int *code)
1310{
1311 pfmlib_regmask_t cnts;
1312
1313 pfm_get_impl_counters(&cnts);
1314
1315 if (cnt != PFMLIB_CNT_FIRST
1316 && (cnt > MAX_COUNTERS ||
1317 !pfm_regmask_isset(&cnts, cnt)))
1318 return PFMLIB_ERR_INVAL;
1319
1320 *code = get_nhm_entry(i)->pme_code;
1321
1322 return PFMLIB_SUCCESS;
1323}
1324
1325static void
1327{
1328 pme_nhm_entry_t *ne;
1329 unsigned int i;
1330
1331 memset(counters, 0, sizeof(*counters));
1332
1333 ne = get_nhm_entry(j);
1334
1335 if (ne->pme_flags & PFMLIB_NHM_UNC_FIXED) {
1336 pfm_regmask_set(counters, 20);
1337 return;
1338 }
1339
1340 if (ne->pme_flags & PFMLIB_NHM_UNC) {
1341 pfm_regmask_set(counters, 20);
1342 pfm_regmask_set(counters, 21);
1343 pfm_regmask_set(counters, 22);
1344 pfm_regmask_set(counters, 23);
1345 pfm_regmask_set(counters, 24);
1346 pfm_regmask_set(counters, 25);
1347 pfm_regmask_set(counters, 26);
1348 pfm_regmask_set(counters, 27);
1349 return;
1350 }
1351 /*
1352 * fixed counter events have no unit mask
1353 */
1354 if (ne->pme_flags & PFMLIB_NHM_FIXED0)
1355 pfm_regmask_set(counters, 16);
1356
1357 if (ne->pme_flags & PFMLIB_NHM_FIXED1)
1358 pfm_regmask_set(counters, 17);
1359
1361 pfm_regmask_set(counters, 18);
1362
1363 /*
1364 * extract from unit mask level
1365 */
1366 for (i=0; i < ne->pme_numasks; i++) {
1368 pfm_regmask_set(counters, 16);
1370 pfm_regmask_set(counters, 17);
1372 pfm_regmask_set(counters, 18);
1373 }
1374
1375 /*
1376 * event on FIXED_CTR2 is exclusive CPU_CLK_UNHALTED:REF
1377 * PMC0|PMC1 only on 0,1, constraint at event-level
1378 */
1379 if (!pfm_regmask_isset(counters, 18)) {
1380 pfm_regmask_set(counters, 0);
1381 if (!(ne->pme_flags & PFMLIB_NHM_PMC0))
1382 pfm_regmask_set(counters, 1);
1384 pfm_regmask_set(counters, 2);
1385 pfm_regmask_set(counters, 3);
1386 }
1387 }
1388}
1389
1390static void
1392{
1393 *impl_pmcs = nhm_impl_pmcs;
1394}
1395
1396static void
1398{
1399 *impl_pmds = nhm_impl_pmds;
1400}
1401
1402static void
1404{
1405 /* core generic */
1406 pfm_regmask_set(impl_counters, 0);
1407 pfm_regmask_set(impl_counters, 1);
1408 pfm_regmask_set(impl_counters, 2);
1409 pfm_regmask_set(impl_counters, 3);
1410 /* core fixed */
1411 pfm_regmask_set(impl_counters, 16);
1412 pfm_regmask_set(impl_counters, 17);
1413 pfm_regmask_set(impl_counters, 18);
1414
1415 /* uncore pmd registers all counters */
1416 pfm_regmask_or(impl_counters, impl_counters, &nhm_impl_unc_pmds);
1417}
1418
1419/*
1420 * Even though, CPUID 0xa returns in eax the actual counter
1421 * width, the architecture specifies that writes are limited
1422 * to lower 32-bits. As such, only the lower 32-bit have full
1423 * degree of freedom. That is the "useable" counter width.
1424 */
1425#define PMU_NHM_COUNTER_WIDTH 32
1426
1427static void
1429{
1430 /*
1431 * Even though, CPUID 0xa returns in eax the actual counter
1432 * width, the architecture specifies that writes are limited
1433 * to lower 32-bits. As such, only the lower 31 bits have full
1434 * degree of freedom. That is the "useable" counter width.
1435 */
1436 *width = PMU_NHM_COUNTER_WIDTH;
1437}
1438
1439static char *
1441{
1442 return get_nhm_entry(i)->pme_name;
1443}
1444
1445static int
1446pfm_nhm_get_event_description(unsigned int ev, char **str)
1447{
1448 char *s;
1449 s = get_nhm_entry(ev)->pme_desc;
1450 if (s) {
1451 *str = strdup(s);
1452 } else {
1453 *str = NULL;
1454 }
1455 return PFMLIB_SUCCESS;
1456}
1457static char *
1458pfm_nhm_get_event_mask_name(unsigned int ev, unsigned int midx)
1459{
1460 midx = pfm_nhm_midx2uidx(ev, midx);
1461 return get_nhm_entry(ev)->pme_umasks[midx].pme_uname;
1462}
1463
1464static int
1465pfm_nhm_get_event_mask_desc(unsigned int ev, unsigned int midx, char **str)
1466{
1467 char *s;
1468
1469 midx = pfm_nhm_midx2uidx(ev, midx);
1470 s = get_nhm_entry(ev)->pme_umasks[midx].pme_udesc;
1471 if (s) {
1472 *str = strdup(s);
1473 } else {
1474 *str = NULL;
1475 }
1476 return PFMLIB_SUCCESS;
1477}
1478
1479static unsigned int
1481{
1482 int i, num = 0;
1483 pme_nhm_entry_t *ne;
1484 int model;
1485
1486 ne = get_nhm_entry(ev);
1487
1488 for (i=0; i < ne->pme_numasks; i++) {
1490 if (!model || model == cpu_model)
1491 num++;
1492 }
1493DPRINT("event %s numasks=%d\n", ne->pme_name, num);
1494 return num;
1495}
1496
1497static int
1498pfm_nhm_get_event_mask_code(unsigned int ev, unsigned int midx, unsigned int *code)
1499{
1500 midx = pfm_nhm_midx2uidx(ev, midx);
1501 *code =get_nhm_entry(ev)->pme_umasks[midx].pme_ucode;
1502 return PFMLIB_SUCCESS;
1503}
1504
1505static int
1507{
1508 e->event = pme_cycles;
1509 return PFMLIB_SUCCESS;
1510}
1511
1512static int
1514{
1515 e->event = pme_instr;;
1516 return PFMLIB_SUCCESS;
1517}
1518
1519/*
1520 * the following function implement the model
1521 * specific API directly available to user
1522 */
1523
1524/*
1525 * Check if event and all provided unit masks support PEBS
1526 *
1527 * return:
1528 * PFMLIB_ERR_INVAL: invalid event e
1529 * 1 event supports PEBS
1530 * 0 event does not support PEBS
1531 *
1532 */
1533int
1535{
1536 pme_nhm_entry_t *ne;
1537 unsigned int i, n=0;
1538
1539 if (e == NULL || e->event >= intel_nhm_support.pme_count)
1540 return PFMLIB_ERR_INVAL;
1541
1542 ne = get_nhm_entry(e->event);
1543 if (ne->pme_flags & PFMLIB_NHM_PEBS)
1544 return 1;
1545
1546 /*
1547 * ALL unit mask must support PEBS for this test to return true
1548 */
1549 for(i=0; i < e->num_masks; i++) {
1550 int midx;
1551 /* check for valid unit mask */
1552 if (e->unit_masks[i] >= ne->pme_numasks)
1553 return PFMLIB_ERR_INVAL;
1554 midx = pfm_nhm_midx2uidx(e->event, e->unit_masks[i]);
1555 if (ne->pme_umasks[midx].pme_uflags & PFMLIB_NHM_PEBS)
1556 n++;
1557 }
1558 return n > 0 && n == e->num_masks;
1559}
1560
1561/*
1562 * Check if event is uncore
1563 * return:
1564 * PFMLIB_ERR_INVAL: invalid event e
1565 * 1 event is uncore
1566 * 0 event is not uncore
1567 */
1568int
1570{
1571 if (PFMLIB_INITIALIZED() == 0)
1572 return 0;
1573
1574 if (e == NULL || e->event >= num_pe)
1575 return PFMLIB_ERR_INVAL;
1576
1578}
1579
1580static const char *data_src_encodings[]={
1581/* 0 */ "unknown L3 cache miss",
1582/* 1 */ "minimal latency core cache hit. Request was satisfied by L1 data cache",
1583/* 2 */ "pending core cache HIT. Outstanding core cache miss to same cacheline address already underway",
1584/* 3 */ "data request satisfied by the L2",
1585/* 4 */ "L3 HIT. Local or remote home request that hit L3 in the uncore with no coherency actions required (snooping)",
1586/* 5 */ "L3 HIT. Local or remote home request that hit L3 and was serviced by another core with a cross core snoop where no modified copy was found (clean)",
1587/* 6 */ "L3 HIT. Local or remote home request that hit L3 and was serviced by another core with a cross core snoop where modified copies were found (HITM)",
1588/* 7 */ "reserved",
1589/* 8 */ "L3 MISS. Local homed request that missed L3 and was serviced by forwarded data following a cross package snoop where no modified copy was found (remote home requests are not counted)",
1590/* 9 */ "reserved",
1591/* 10 */ "L3 MISS. Local homed request that missed L3 and was serviced by local DRAM (go to shared state)",
1592/* 11 */ "L3 MISS. Remote homed request that missed L3 and was serviced by remote DRAM (go to shared state)",
1593/* 12 */ "L3 MISS. Local homed request that missed L3 and was serviced by local DRAM (go to exclusive state)",
1594/* 13 */ "L3 MISS. Remote homed request that missed L3 and was serviced by remote DRAM (go to exclusive state)",
1595/* 14 */ "reserved",
1596/* 15 */ "request to uncacheable memory"
1597};
1598
1599/*
1600 * return data source encoding based on index in val
1601 * To be used with PEBS load latency filtering to decode
1602 * source of the load miss
1603 */
1604int pfm_nhm_data_src_desc(unsigned int val, char **desc)
1605{
1606 if (val > 15 || !desc)
1607 return PFMLIB_ERR_INVAL;
1608
1609 *desc = strdup(data_src_encodings[val]);
1610 if (!*desc)
1611 return PFMLIB_ERR_NOMEM;
1612
1613 return PFMLIB_SUCCESS;
1614}
1615
1617 .pmu_name = "Intel Nehalem",
1618 .pmu_type = PFMLIB_INTEL_NHM_PMU,
1619 .pme_count = 0,/* patched at runtime */
1620 .pmc_count = 0,/* patched at runtime */
1621 .pmd_count = 0,/* patched at runtime */
1622 .num_cnt = 0,/* patched at runtime */
1623 .get_event_code = pfm_nhm_get_event_code,
1624 .get_event_name = pfm_nhm_get_event_name,
1625 .get_event_counters = pfm_nhm_get_event_counters,
1626 .dispatch_events = pfm_nhm_dispatch_events,
1627 .pmu_detect = pfm_nhm_detect,
1628 .pmu_init = pfm_nhm_init,
1629 .get_impl_pmcs = pfm_nhm_get_impl_pmcs,
1630 .get_impl_pmds = pfm_nhm_get_impl_pmds,
1631 .get_impl_counters = pfm_nhm_get_impl_counters,
1632 .get_hw_counter_width = pfm_nhm_get_hw_counter_width,
1633 .get_event_desc = pfm_nhm_get_event_description,
1634 .get_num_event_masks = pfm_nhm_get_num_event_masks,
1635 .get_event_mask_name = pfm_nhm_get_event_mask_name,
1636 .get_event_mask_code = pfm_nhm_get_event_mask_code,
1637 .get_event_mask_desc = pfm_nhm_get_event_mask_desc,
1638 .get_cycle_event = pfm_nhm_get_cycle_event,
1639 .get_inst_retired_event = pfm_nhm_get_inst_retired
1640};
1641
1643 .pmu_name = "Intel Westmere",
1644 .pmu_type = PFMLIB_INTEL_WSM_PMU,
1645 .pme_count = 0,/* patched at runtime */
1646 .pmc_count = 0,/* patched at runtime */
1647 .pmd_count = 0,/* patched at runtime */
1648 .num_cnt = 0,/* patched at runtime */
1649 .get_event_code = pfm_nhm_get_event_code,
1650 .get_event_name = pfm_nhm_get_event_name,
1651 .get_event_counters = pfm_nhm_get_event_counters,
1652 .dispatch_events = pfm_nhm_dispatch_events,
1653 .pmu_detect = pfm_wsm_detect,
1654 .pmu_init = pfm_nhm_init,
1655 .get_impl_pmcs = pfm_nhm_get_impl_pmcs,
1656 .get_impl_pmds = pfm_nhm_get_impl_pmds,
1657 .get_impl_counters = pfm_nhm_get_impl_counters,
1658 .get_hw_counter_width = pfm_nhm_get_hw_counter_width,
1659 .get_event_desc = pfm_nhm_get_event_description,
1660 .get_num_event_masks = pfm_nhm_get_num_event_masks,
1661 .get_event_mask_name = pfm_nhm_get_event_mask_name,
1662 .get_event_mask_code = pfm_nhm_get_event_mask_code,
1663 .get_event_mask_desc = pfm_nhm_get_event_mask_desc,
1664 .get_cycle_event = pfm_nhm_get_cycle_event,
1665 .get_inst_retired_event = pfm_nhm_get_inst_retired
1666};
int i
double s
Definition: byte_profile.c:36
double f(double a)
Definition: cpi.c:23
#define PME_COREI7_INSTRUCTIONS_RETIRED
#define PME_COREI7_EVENT_COUNT
static pme_nhm_entry_t corei7_pe[]
#define PME_COREI7_UNHALTED_CORE_CYCLES
#define PME_COREI7_UNC_EVENT_COUNT
static pme_nhm_entry_t corei7_unc_pe[]
#define PME_WSM_EVENT_COUNT
static pme_nhm_entry_t wsm_pe[]
#define PME_WSM_INSTRUCTIONS_RETIRED
#define PME_WSM_UNHALTED_CORE_CYCLES
static pme_nhm_entry_t intel_wsm_unc_pe[]
#define PME_WSM_UNC_EVENT_COUNT
static double c[MATRIX_SIZE][MATRIX_SIZE]
Definition: libmsr_basic.c:40
#define PFM_PLM2
Definition: pfmlib.h:52
#define PFMLIB_INTEL_WSM_PMU
Definition: pfmlib.h:239
static int pfm_regmask_set(pfmlib_regmask_t *h, unsigned int b)
Definition: pfmlib.h:321
#define PFMLIB_SUCCESS
Definition: pfmlib.h:283
#define PFM_PLM3
Definition: pfmlib.h:53
#define PFMLIB_NO_PMU
Definition: pfmlib.h:221
static int pfm_regmask_weight(pfmlib_regmask_t *h, unsigned int *w)
Definition: pfmlib.h:343
#define PFMLIB_ERR_NOMEM
Definition: pfmlib.h:307
#define PFMLIB_INTEL_NHM_PMU
Definition: pfmlib.h:238
#define PFMLIB_ERR_INVAL
Definition: pfmlib.h:285
#define PFMLIB_ERR_TOOMANY
Definition: pfmlib.h:295
#define PFM_PLM0
Definition: pfmlib.h:50
pfm_err_t pfm_get_impl_counters(pfmlib_regmask_t *impl_counters)
static int pfm_regmask_isset(pfmlib_regmask_t *h, unsigned int b)
Definition: pfmlib.h:313
#define PFMLIB_ERR_NOASSIGN
Definition: pfmlib.h:288
#define PFM_PLM1
Definition: pfmlib.h:51
static int pfm_regmask_or(pfmlib_regmask_t *dst, pfmlib_regmask_t *h1, pfmlib_regmask_t *h2)
Definition: pfmlib.h:399
#define PFMLIB_ERR_NOTSUPP
Definition: pfmlib.h:284
int model
Definition: pfmlib_amd64.c:86
int family
Definition: pfmlib_amd64.c:85
int forced_pmu
int pfm_nhm_is_pebs(pfmlib_event_t *e)
static int pfm_wsm_detect(void)
#define UNC_NHM_CTR_BASE
pfm_pmu_support_t intel_nhm_support
#define HAS_OPTIONS(x)
static const char * data_src_encodings[]
static int pfm_nhm_get_event_description(unsigned int ev, char **str)
static int pfm_nhm_get_cycle_event(pfmlib_event_t *e)
static int pfm_nhm_dispatch_events(pfmlib_input_param_t *inp, void *model_in, pfmlib_output_param_t *outp, void *model_out)
static unsigned int pfm_nhm_get_num_event_masks(unsigned int ev)
static pme_nhm_entry_t * get_nhm_entry(unsigned int i)
#define NHM_CTR_BASE
static char * pfm_nhm_get_event_mask_name(unsigned int ev, unsigned int midx)
static int pfm_nhm_get_event_mask_desc(unsigned int ev, unsigned int midx, char **str)
static int pme_cycles
static int pme_instr
#define PMU_NHM_COUNTER_WIDTH
#define is_fixed_pmc(a)
static void fixup_mem_uncore_retired(void)
static int aaj80
static void pfm_nhm_get_impl_pmcs(pfmlib_regmask_t *impl_pmcs)
static int pfm_nhm_check_cmask(pfmlib_event_t *e, pme_nhm_entry_t *ne, pfmlib_nhm_counter_t *cntr)
#define INTEL_ARCH_MISP_BR_RETIRED
static pfmlib_regmask_t nhm_impl_unc_pmds
#define UNC_NHM_SEL_BASE
static unsigned int num_pe
static int pfm_nhm_detect(void)
static pfmlib_regmask_t nhm_impl_pmds
static int pfm_nhm_detect_common(void)
static void pfm_nhm_get_hw_counter_width(unsigned int *width)
static int pfm_nhm_is_fixed(pfmlib_event_t *e, unsigned int f)
static void pfm_nhm_get_impl_pmds(pfmlib_regmask_t *impl_pmds)
static int pfm_nhm_init(void)
static char * pfm_nhm_get_event_name(unsigned int i)
int pfm_nhm_data_src_desc(unsigned int val, char **desc)
static void setup_nhm_impl_unc_regs(void)
static pme_nhm_entry_t * pe
#define NHM_SEL_BASE
#define is_uncore(a)
static int pfm_nhm_get_inst_retired(pfmlib_event_t *e)
static void pfm_nhm_get_event_counters(unsigned int j, pfmlib_regmask_t *counters)
static pfmlib_regmask_t nhm_impl_pmcs
static void pfm_nhm_get_impl_counters(pfmlib_regmask_t *impl_counters)
static unsigned int num_unc_pe
#define UNC_NHM_FIXED_CTR_BASE
int pfm_nhm_is_uncore(pfmlib_event_t *e)
#define PFMLIB_NHM_ALL_FLAGS
static int pfm_nhm_get_event_code(unsigned int i, unsigned int cnt, int *code)
static int pfm_nhm_get_event_mask_code(unsigned int ev, unsigned int midx, unsigned int *code)
static pme_nhm_entry_t * unc_pe
static pfmlib_regmask_t nhm_impl_unc_pmcs
static int cpu_model
#define NHM_NUM_GEN_COUNTERS
pfm_pmu_support_t intel_wsm_support
static int pfm_nhm_dispatch_counters(pfmlib_input_param_t *inp, pfmlib_nhm_input_param_t *param, pfmlib_output_param_t *outp)
#define NHM_FIXED_CTR_BASE
static void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
#define NHM_NUM_FIXED_COUNTERS
static int pfm_nhm_dispatch_lbr(pfmlib_input_param_t *inp, pfmlib_nhm_input_param_t *param, pfmlib_output_param_t *outp)
#define MAX_COUNTERS
static int pfm_nhm_midx2uidx(unsigned int ev, unsigned int midx)
#define PFM_NHM_SEL_ANYTHR
#define PFM_NHM_LBR_FAR_BRANCH
#define PFM_NHM_LBR_NEAR_REL_JMP
#define PFM_NHM_LBR_NEAR_RET
#define PFM_NHM_LBR_NEAR_IND_JMP
#define PFM_NHM_SEL_OCC_RST
#define PFM_NHM_LBR_JCC
#define PFM_NHM_LBR_NEAR_REL_CALL
#define PFM_NHM_SEL_INV
#define PFM_NHM_LBR_NEAR_IND_CALL
#define PFM_NHM_SEL_EDGE
#define PMU_NHM_NUM_COUNTERS
#define PFMLIB_NHM_UNC
#define PFMLIB_NHM_PMC0
#define PFMLIB_NHM_FIXED2_ONLY
#define PFMLIB_NHM_UNC_FIXED
#define PFMLIB_NHM_FIXED0
#define PFMLIB_NHM_PMC01
#define PFMLIB_NHM_OFFCORE_RSP0
#define PFMLIB_NHM_OFFCORE_RSP1
#define PFMLIB_NHM_FIXED1
#define PFMLIB_NHM_PEBS
#define PFMLIB_NHM_UMASK_NCOMBO
#define pme_code
int __pfm_getcpuinfo_attr(const char *attr, char *ret_buf, size_t maxlen)
void __pfm_vbprintf(const char *fmt,...)
Definition: pfmlib_priv.c:52
#define PFMLIB_INITIALIZED()
Definition: pfmlib_priv.h:72
#define DPRINT(fmt, a...)
Definition: pfmlib_priv.h:90
#define PFMLIB_CNT_FIRST
Definition: pfmlib_priv.h:62
unsigned int pmc_count
Definition: pfmlib_priv.h:37
unsigned int num_cnt
Definition: pfmlib_priv.h:38
unsigned int pmd_count
Definition: pfmlib_priv.h:36
unsigned int pme_count
Definition: pfmlib_priv.h:35
unsigned int num_masks
Definition: pfmlib.h:90
unsigned int plm
Definition: pfmlib.h:87
unsigned int unit_masks[PFMLIB_MAX_MASKS_PER_EVENT]
Definition: pfmlib.h:89
unsigned int event
Definition: pfmlib.h:86
unsigned int pfp_dfl_plm
Definition: pfmlib.h:110
pfmlib_regmask_t pfp_unavail_pmcs
Definition: pfmlib.h:114
pfmlib_event_t pfp_events[PFMLIB_MAX_PMCS]
Definition: pfmlib.h:113
unsigned int pfp_event_count
Definition: pfmlib.h:109
unsigned long cnt_mask
pfmlib_nhm_counter_t pfp_nhm_counters[PMU_NHM_NUM_COUNTERS]
pfmlib_nhm_pebs_t pfp_nhm_pebs
pfmlib_nhm_lbr_t pfp_nhm_lbr
unsigned int lbr_filter
unsigned int lbr_used
unsigned int lbr_plm
unsigned int pebs_used
unsigned int ld_lat_thres
pfmlib_reg_t pfp_pmds[PFMLIB_MAX_PMDS]
Definition: pfmlib.h:130
pfmlib_reg_t pfp_pmcs[PFMLIB_MAX_PMCS]
Definition: pfmlib.h:129
unsigned int pfp_pmc_count
Definition: pfmlib.h:127
unsigned int pfp_pmd_count
Definition: pfmlib.h:128
unsigned long long reg_value
Definition: pfmlib.h:98
unsigned int reg_num
Definition: pfmlib.h:100
unsigned long reg_alt_addr
Definition: pfmlib.h:102
unsigned long long reg_addr
Definition: pfmlib.h:99
pme_nhm_umask_t pme_umasks[PFMLIB_NHM_MAX_UMASK]
unsigned int pme_code
char * pme_desc
unsigned int pme_numasks
char * pme_name
unsigned int pme_flags
unsigned long sel_usr
unsigned long sel_inv
unsigned long usel_edge
unsigned long sel_en
unsigned long sel_edge
unsigned long sel_os
unsigned long sel_anythr
unsigned long usel_occ
unsigned long cpl_neq0
unsigned long near_ind_jmp
unsigned long sel_cnt_mask
unsigned long sel_int
unsigned long near_rel_jmp
unsigned long usel_inv
unsigned long near_ind_call
unsigned long usel_event
unsigned long cpl_eq0
unsigned long near_rel_call
unsigned long sel_umask
unsigned long usel_cnt_mask
unsigned long usel_umask
unsigned long long val
unsigned long near_ret
struct pfm_nhm_sel_reg_t::@29 lbr_select
unsigned long far_branch
unsigned long usel_en
unsigned long usel_int
unsigned long jcc
unsigned long sel_event