4 .pme_desc =
"Accumulated cycles",
8 { .pme_name =
"Instr_cnt",
9 .pme_desc =
"Number of instructions completed",
16 .pme_name =
"Dispatch0_IC_miss",
17 .pme_desc =
"I-buffer is empty from I-Cache miss",
22 .pme_name =
"IU_stat_jmp_correct_pred",
23 .pme_desc =
"Retired non-annulled register indirect jumps predicted correctly",
28 .pme_name =
"Dispatch0_2nd_br",
29 .pme_desc =
"Stall cycles due to having two branch instructions line-up in one 4-instruction group causing the second branch in the group to be re-fetched, delaying it's entrance into the I-buffer",
34 .pme_name =
"Rstall_storeQ",
35 .pme_desc =
"R-stage stall for a store instruction which is the next instruction to be executed, but it stailled due to the store queue being full",
40 .pme_name =
"Rstall_IU_use",
41 .pme_desc =
"R-stage stall for an event that the next instruction to be executed depends on the result of a preceeding integer instruction in the pipeline that is not yet available",
46 .pme_name =
"IU_stat_ret_correct_pred",
47 .pme_desc =
"Retired non-annulled returns predicted correctly",
53 .pme_desc =
"I-cache refrences",
59 .pme_desc =
"D-cache read references (including accesses that subsequently trap)",
64 .pme_name =
"Rstall_FP_use",
65 .pme_desc =
"R-stage stall for an event that the next instruction to be executed depends on the result of a preceeding floating-point instruction in the pipeline that is not yet available",
70 .pme_name =
"SW_pf_instr",
71 .pme_desc =
"Retired SW prefetch instructions",
77 .pme_desc =
"L2-cache references",
82 .pme_name =
"L2_write_hit_RTO",
83 .pme_desc =
"L2-cache exclusive requests that hit L2-cache in S, O, or Os state and thus, do a read-to-own bus transaction",
88 .pme_name =
"L2_snoop_inv_sh",
89 .pme_desc =
"L2 cache lines that were written back to the L3 cache due to requests from both cores",
94 .pme_name =
"L2_rd_miss",
95 .pme_desc =
"L2-cache miss events (including atomics) from D-cache events",
101 .pme_desc =
"P-cache cacheable loads",
106 .pme_name =
"SI_snoop_sh",
107 .pme_desc =
"Counts snoops from remote processor(s) including RTS, RTSR, RTO, RTOR, RS, RSR, RTSM, and WS",
112 .pme_name =
"SI_ciq_flow_sh",
113 .pme_desc =
"Counts system clock cycles when the flow control (PauseOut) signal is asserted",
118 .pme_name =
"Re_DC_miss",
119 .pme_desc =
"Stall due to loads that miss D-cache and get recirculated",
124 .pme_name =
"SW_count_NOP0",
125 .pme_desc =
"Retired, non-annulled special software NOP instructions (which is equivalent to 'sethi %hi(0xfc000), %g0' instruction)",
129 { .pme_name =
"IU_Stat_Br_miss_taken",
130 .pme_desc =
"Retired branches that were predicted to be taken, but in fact were not taken",
134 { .pme_name =
"IU_Stat_Br_Count_taken",
135 .pme_desc =
"Retired taken branches",
139 { .pme_name =
"HW_pf_exec",
140 .pme_desc =
"Hardware prefetches enqueued in the prefetch queue",
145 .pme_name =
"FA_pipe_completion",
146 .pme_desc =
"Instructions that complete execution on the FPG ALU pipelines",
151 .pme_name =
"SSM_L3_wb_remote",
152 .pme_desc =
"L3 cache line victimizations from this core which generate R_WB transactions to non-LPA (remote physical address) regions",
157 .pme_name =
"SSM_L3_miss_local",
158 .pme_desc =
"L3 cache misses to LPA (local physical address) from this core which generate an RTS, RTO, or RS transaction",
163 .pme_name =
"SSM_L3_miss_mtag_remote",
164 .pme_desc =
"L3 cache misses to LPA (local physical address) from this core which generate retry (R_*) transactions including R_RTS, R_RTO, and R_RS",
169 .pme_name =
"SW_pf_str_trapped",
170 .pme_desc =
"Strong software prefetch instructions trapping due to TLB miss",
175 .pme_name =
"SW_pf_PC_installed",
176 .pme_desc =
"Software prefetch instructions that installed lines in the P-cache",
181 .pme_name =
"IPB_to_IC_fill",
182 .pme_desc =
"I-cache filles from the instruction prefetch buffer",
187 .pme_name =
"L2_write_miss",
188 .pme_desc =
"L2-cache misses from this core by cacheable store requests",
193 .pme_name =
"MC_reads_0_sh",
194 .pme_desc =
"Read requests completed to memory bank 0",
199 .pme_name =
"MC_reads_1_sh",
200 .pme_desc =
"Read requests completed to memory bank 1",
205 .pme_name =
"MC_reads_2_sh",
206 .pme_desc =
"Read requests completed to memory bank 2",
211 .pme_name =
"MC_reads_3_sh",
212 .pme_desc =
"Read requests completed to memory bank 3",
217 .pme_name =
"MC_stalls_0_sh",
218 .pme_desc =
"Clock cycles that requests were stalled in the MCU queues because bank 0 was busy with a previous request",
223 .pme_name =
"MC_stalls_2_sh",
224 .pme_desc =
"Clock cycles that requests were stalled in the MCU queues because bank 2 was busy with a previous request",
229 .pme_name =
"L2_hit_other_half",
230 .pme_desc =
"L2 cache hits from this core to the ways filled by the other core when the cache is in the pseudo-split mode",
235 .pme_name =
"L3_rd_miss",
236 .pme_desc =
"L3 cache misses sent out to SIU from this code by cacheable I-cache, D-cache, PO-cache, and W-cache (excluding block store) requests",
241 .pme_name =
"Re_L2_miss",
242 .pme_desc =
"Stall cycles due to recirculation of cacheable loads that miss both D-cache and L2 cache",
247 .pme_name =
"IC_miss_cancelled",
248 .pme_desc =
"I-cache miss requests cancelled due to new fetch stream",
253 .pme_name =
"DC_wr_miss",
254 .pme_desc =
"D-cache store accesses that miss D-cache",
259 .pme_name =
"L3_hit_I_state_sh",
260 .pme_desc =
"Tag hits in L3 cache when the line is in I state",
265 .pme_name =
"SI_RTS_src_data",
266 .pme_desc =
"Local RTS transactions due to I-cache, D-cache, or P-cache requests from this core where data is from the cache of another processor on the system, not from memory",
271 .pme_name =
"L2_IC_miss",
272 .pme_desc =
"L2 cache misses from this code by cacheable I-cache requests",
277 .pme_name =
"SSM_new_transaction_sh",
278 .pme_desc =
"New SSM transactions (RTSU, RTOU, UGM) observed by this processor on the Fireplane Interconnect",
283 .pme_name =
"L2_SW_pf_miss",
284 .pme_desc =
"L2 cache misses by software prefetch requests from this core",
290 .pme_desc =
"L2 cache lines that were written back to the L3 cache because of requests from this core",
295 .pme_name =
"L2_wb_sh",
296 .pme_desc =
"L2 cache lines that were written back to the L3 cache because of requests from both cores",
301 .pme_name =
"L2_snoop_cb_sh",
302 .pme_desc =
"L2 cache lines that were copied back due to other processors",
309 .pme_name =
"Dispatch0_other",
310 .pme_desc =
"Stall cycles due to the event that no instructions are dispatched because the I-queue is empty due to various other events, including branch target address fetch and various events which cause an instruction to be refetched",
316 .pme_desc =
"D-cache write references by cacheable stores (excluding block stores)",
321 .pme_name =
"Re_DC_missovhd",
322 .pme_desc =
"Stall cycles due to D-cache load miss",
327 .pme_name =
"Re_FPU_bypass",
328 .pme_desc =
"Stall due to recirculation when an FPU bypass condition that does not have a direct bypass path occurs",
333 .pme_name =
"L3_write_hit_RTO",
334 .pme_desc =
"L3 cache hits in O, Os, or S state by cacheable store requests from this core that do a read-to-own (RTO) bus transaction",
339 .pme_name =
"L2L3_snoop_inv_sh",
340 .pme_desc =
"L2 and L3 cache lines that were invalidated due to other processors doing RTO, RTOR, RTOU, or WS transactions",
345 .pme_name =
"IC_L2_req",
346 .pme_desc =
"I-cache requests sent to L2 cache",
351 .pme_name =
"DC_rd_miss",
352 .pme_desc =
"Cacheable loads (excluding atomics and block loads) that miss D-cache as well as P-cache (for FP loads)",
357 .pme_name =
"L2_hit_I_state_sh",
358 .pme_desc =
"Tag hits in L2 cache when the line is in I state",
363 .pme_name =
"L3_write_miss_RTO",
364 .pme_desc =
"L3 cache misses from this core by cacheable store requests that do a read-to-own (RTO) bus transaction. This count does not include RTO requests for prefetch (fcn=2,3/22,23) instructions",
369 .pme_name =
"L2_miss",
370 .pme_desc =
"L2 cache misses from this core by cacheable I-cache, D-cache, P-cache, and W-cache (excluding block stores) requests",
375 .pme_name =
"SI_owned_sh",
376 .pme_desc =
"Number of times owned_in is asserted on bus requests from the local processor",
381 .pme_name =
"SI_RTO_src_data",
382 .pme_desc =
"Number of local RTO transactions due to W-cache or P-cache requests from this core where data is from the cache of another processor on the system, not from memory",
387 .pme_name =
"SW_pf_duplicate",
388 .pme_desc =
"Number of software prefetch instructions that were dropped because the prefetch request matched an outstanding requests in the prefetch queue or the request hit the P-cache",
393 .pme_name =
"IU_stat_jmp_mispred",
394 .pme_desc =
"Number of retired non-annulled register indirect jumps mispredicted",
399 .pme_name =
"ITLB_miss",
400 .pme_desc =
"I-TLB misses",
405 .pme_name =
"DTLB_miss",
406 .pme_desc =
"D-TLB misses",
411 .pme_name =
"WC_miss",
412 .pme_desc =
"W-cache misses",
417 .pme_name =
"IC_fill",
418 .pme_desc =
"Number of I-cache fills excluding fills from the instruction prefetch buffer. This is the best approximation of the number of I-cache misses for instructions that were actually executed",
423 .pme_name =
"IU_stat_ret_mispred",
424 .pme_desc =
"Number of retired non-annulled returns mispredicted",
429 .pme_name =
"Re_L3_miss",
430 .pme_desc =
"Stall cycles due to recirculation of cacheable loads that miss D-cache, L2, and L3 cache",
435 .pme_name =
"Re_PFQ_full",
436 .pme_desc =
"Stall cycles due to recirculation of prefetch instructions because the prefetch queue (PFQ) was full",
441 .pme_name =
"PC_soft_hit",
442 .pme_desc =
"Number of cacheable FP loads that hit a P-cache line that was prefetched by a software prefetch instruction",
447 .pme_name =
"PC_inv",
448 .pme_desc =
"Number of P-cache lines that were invalidated due to external snoops, internal stores, and L2 evictions",
453 .pme_name =
"PC_hard_hit",
454 .pme_desc =
"Number of FP loads that hit a P-cache line that was fetched by a FP load or a hardware prefetch, irrespective of whether the loads hit or miss the D-cache",
460 .pme_desc =
"Number of I-cache prefetch requests sent to L2 cache",
465 .pme_name =
"SW_count_NOP1",
466 .pme_desc =
"Retired, non-annulled special software NOP instructions (which is equivalent to 'sethi %hi(0xfc000), %g0' instruction)",
471 .pme_name =
"IU_stat_br_miss_untaken",
472 .pme_desc =
"Number of retired non-annulled conditional branches that were predicted to be not taken, but in fact were taken",
477 .pme_name =
"IU_stat_br_count_taken",
478 .pme_desc =
"Number of retired non-annulled conditional branches that were taken",
483 .pme_name =
"PC_miss",
484 .pme_desc =
"Number of cacheable FP loads that miss P-cache, irrespective of whether the loads hit or miss the D-cache",
489 .pme_name =
"MC_writes_0_sh",
490 .pme_desc =
"Number of write requests complete to memory bank 0",
495 .pme_name =
"MC_writes_1_sh",
496 .pme_desc =
"Number of write requests complete to memory bank 1",
501 .pme_name =
"MC_writes_2_sh",
502 .pme_desc =
"Number of write requests complete to memory bank 2",
507 .pme_name =
"MC_writes_3_sh",
508 .pme_desc =
"Number of write requests complete to memory bank 3",
513 .pme_name =
"MC_stalls_1_sh",
514 .pme_desc =
"Number of processor cycles that requests were stalled in the MCU queues because bank 0 was busy with a previous requests",
519 .pme_name =
"MC_stalls_3_sh",
520 .pme_desc =
"Number of processor cycles that requests were stalled in the MCU queues because bank 3 was busy with a previous requests",
525 .pme_name =
"Re_RAW_miss",
526 .pme_desc =
"Stall cycles due to recirculation when there is a load instruction in the E-stage of the pipeline which has a non-bypassable read-after-write (RAW) hazard with an earlier store instruction",
531 .pme_name =
"FM_pipe_completion",
532 .pme_desc =
"Number of retired instructions that complete execution on the FLoat-Point/Graphics Multiply pipeline",
537 .pme_name =
"SSM_L3_miss_mtag_remote",
538 .pme_desc =
"Number of L3 cache misses to LPA (local physical address) from this core which generate retry (R_*) transactions including R_RTS, R_RTO, and R_RS",
543 .pme_name =
"SSM_L3_miss_remote",
544 .pme_desc =
"Number of L3 cache misses from this core which generate retry (R_*) transactions to non-LPA (non-local physical address) address space, or R_WS transactions due to block store (BST) / block store commit (BSTC) to any address space (LPA or non-LPA), or R_RTO due to atomic request on Os state to LPA space.",
549 .pme_name =
"SW_pf_exec",
550 .pme_desc =
"Number of retired, non-trapping software prefetch instructions that completed, i.e. number of retired prefetch instructions that were not dropped due to the prefecth queue being full",
555 .pme_name =
"SW_pf_str_exec",
556 .pme_desc =
"Number of retired, non-trapping strong prefetch instructions that completed",
561 .pme_name =
"SW_pf_dropped",
562 .pme_desc =
"Number of software prefetch instructions dropped due to TLB miss or due to the prefetch queue being full",
567 .pme_name =
"SW_pf_L2_installed",
568 .pme_desc =
"Number of software prefetch instructions that installed lines in the L2 cache",
573 .pme_name =
"L2_HW_pf_miss",
574 .pme_desc =
"Number of L2 cache misses by hardware prefetch requests from this core",
579 .pme_name =
"L3_miss",
580 .pme_desc =
"Number of L3 cache misses sent out to SIU from this core by cacheable I-cache, D-cache, P-cache, and W-cache (exclusing block stores) requests",
585 .pme_name =
"L3_IC_miss",
586 .pme_desc =
"Number of L3 cache misses by cacheable I-cache requests from this core",
591 .pme_name =
"L3_SW_pf_miss",
592 .pme_desc =
"Number of L3 cache misses by software prefetch requests from this core",
597 .pme_name =
"L3_hit_other_half",
598 .pme_desc =
"Number of L3 cache hits from this core to the ways filled by the other core when the cache is in pseudo-split mode",
604 .pme_desc =
"Number of L3 cache lines that were written back because of requests from this core",
609 .pme_name =
"L3_wb_sh",
610 .pme_desc =
"Number of L3 cache lines that were written back because of requests from both cores",
615 .pme_name =
"L2L3_snoop_cb_sh",
616 .pme_desc =
"Total number of L2 and L3 cache lines that were copied back due to other processors",
621#define PME_ULTRA4PLUS_EVENT_COUNT (sizeof(ultra4plus_pe)/sizeof(pme_sparc_entry_t))
static pme_sparc_entry_t ultra4plus_pe[]