1 #ifdef CONFIG_CPU_SUP_INTEL
3 /* The maximal number of PEBS events: */
4 #define MAX_PEBS_EVENTS 4
6 /* The size of a BTS record in bytes: */
7 #define BTS_RECORD_SIZE 24
9 #define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
10 #define PEBS_BUFFER_SIZE PAGE_SIZE
13 * pebs_record_32 for p4 and core not supported
15 struct pebs_record_32 {
23 struct pebs_record_core
{
28 u64 r12
, r13
, r14
, r15
;
31 struct pebs_record_nhm
{
36 u64 r12
, r13
, r14
, r15
;
37 u64 status
, dla
, dse
, lat
;
41 * Bits in the debugctlmsr controlling branch tracing.
43 #define X86_DEBUGCTL_TR (1 << 6)
44 #define X86_DEBUGCTL_BTS (1 << 7)
45 #define X86_DEBUGCTL_BTINT (1 << 8)
46 #define X86_DEBUGCTL_BTS_OFF_OS (1 << 9)
47 #define X86_DEBUGCTL_BTS_OFF_USR (1 << 10)
50 * A debug store configuration.
52 * We only support architectures that use 64bit fields.
57 u64 bts_absolute_maximum
;
58 u64 bts_interrupt_threshold
;
61 u64 pebs_absolute_maximum
;
62 u64 pebs_interrupt_threshold
;
63 u64 pebs_event_reset
[MAX_PEBS_EVENTS
];
66 static void init_debug_store_on_cpu(int cpu
)
68 struct debug_store
*ds
= per_cpu(cpu_hw_events
, cpu
).ds
;
73 wrmsr_on_cpu(cpu
, MSR_IA32_DS_AREA
,
74 (u32
)((u64
)(unsigned long)ds
),
75 (u32
)((u64
)(unsigned long)ds
>> 32));
78 static void fini_debug_store_on_cpu(int cpu
)
80 if (!per_cpu(cpu_hw_events
, cpu
).ds
)
83 wrmsr_on_cpu(cpu
, MSR_IA32_DS_AREA
, 0, 0);
86 static void release_ds_buffers(void)
90 if (!x86_pmu
.bts
&& !x86_pmu
.pebs
)
95 for_each_online_cpu(cpu
)
96 fini_debug_store_on_cpu(cpu
);
98 for_each_possible_cpu(cpu
) {
99 struct debug_store
*ds
= per_cpu(cpu_hw_events
, cpu
).ds
;
104 per_cpu(cpu_hw_events
, cpu
).ds
= NULL
;
106 kfree((void *)(unsigned long)ds
->pebs_buffer_base
);
107 kfree((void *)(unsigned long)ds
->bts_buffer_base
);
114 static int reserve_ds_buffers(void)
118 if (!x86_pmu
.bts
&& !x86_pmu
.pebs
)
123 for_each_possible_cpu(cpu
) {
124 struct debug_store
*ds
;
129 ds
= kzalloc(sizeof(*ds
), GFP_KERNEL
);
132 per_cpu(cpu_hw_events
, cpu
).ds
= ds
;
135 buffer
= kzalloc(BTS_BUFFER_SIZE
, GFP_KERNEL
);
136 if (unlikely(!buffer
))
139 max
= BTS_BUFFER_SIZE
/ BTS_RECORD_SIZE
;
142 ds
->bts_buffer_base
= (u64
)(unsigned long)buffer
;
143 ds
->bts_index
= ds
->bts_buffer_base
;
144 ds
->bts_absolute_maximum
= ds
->bts_buffer_base
+
145 max
* BTS_RECORD_SIZE
;
146 ds
->bts_interrupt_threshold
= ds
->bts_absolute_maximum
-
147 thresh
* BTS_RECORD_SIZE
;
151 buffer
= kzalloc(PEBS_BUFFER_SIZE
, GFP_KERNEL
);
152 if (unlikely(!buffer
))
155 max
= PEBS_BUFFER_SIZE
/ x86_pmu
.pebs_record_size
;
157 ds
->pebs_buffer_base
= (u64
)(unsigned long)buffer
;
158 ds
->pebs_index
= ds
->pebs_buffer_base
;
159 ds
->pebs_absolute_maximum
= ds
->pebs_buffer_base
+
160 max
* x86_pmu
.pebs_record_size
;
162 * Always use single record PEBS
164 ds
->pebs_interrupt_threshold
= ds
->pebs_buffer_base
+
165 x86_pmu
.pebs_record_size
;
172 release_ds_buffers();
174 for_each_online_cpu(cpu
)
175 init_debug_store_on_cpu(cpu
);
187 static struct event_constraint bts_constraint
=
188 EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS
, 0);
190 static void intel_pmu_enable_bts(u64 config
)
192 unsigned long debugctlmsr
;
194 debugctlmsr
= get_debugctlmsr();
196 debugctlmsr
|= X86_DEBUGCTL_TR
;
197 debugctlmsr
|= X86_DEBUGCTL_BTS
;
198 debugctlmsr
|= X86_DEBUGCTL_BTINT
;
200 if (!(config
& ARCH_PERFMON_EVENTSEL_OS
))
201 debugctlmsr
|= X86_DEBUGCTL_BTS_OFF_OS
;
203 if (!(config
& ARCH_PERFMON_EVENTSEL_USR
))
204 debugctlmsr
|= X86_DEBUGCTL_BTS_OFF_USR
;
206 update_debugctlmsr(debugctlmsr
);
209 static void intel_pmu_disable_bts(void)
211 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
212 unsigned long debugctlmsr
;
217 debugctlmsr
= get_debugctlmsr();
220 ~(X86_DEBUGCTL_TR
| X86_DEBUGCTL_BTS
| X86_DEBUGCTL_BTINT
|
221 X86_DEBUGCTL_BTS_OFF_OS
| X86_DEBUGCTL_BTS_OFF_USR
);
223 update_debugctlmsr(debugctlmsr
);
226 static void intel_pmu_drain_bts_buffer(void)
228 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
229 struct debug_store
*ds
= cpuc
->ds
;
235 struct perf_event
*event
= cpuc
->events
[X86_PMC_IDX_FIXED_BTS
];
236 struct bts_record
*at
, *top
;
237 struct perf_output_handle handle
;
238 struct perf_event_header header
;
239 struct perf_sample_data data
;
248 at
= (struct bts_record
*)(unsigned long)ds
->bts_buffer_base
;
249 top
= (struct bts_record
*)(unsigned long)ds
->bts_index
;
254 ds
->bts_index
= ds
->bts_buffer_base
;
256 perf_sample_data_init(&data
, 0);
257 data
.period
= event
->hw
.last_period
;
261 * Prepare a generic sample, i.e. fill in the invariant fields.
262 * We will overwrite the from and to address before we output
265 perf_prepare_sample(&header
, &data
, event
, ®s
);
267 if (perf_output_begin(&handle
, event
, header
.size
* (top
- at
), 1, 1))
270 for (; at
< top
; at
++) {
274 perf_output_sample(&handle
, &header
, &data
, event
);
277 perf_output_end(&handle
);
279 /* There's new data available. */
280 event
->hw
.interrupts
++;
281 event
->pending_kill
= POLL_IN
;
288 static struct event_constraint intel_core_pebs_events
[] = {
289 PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */
290 PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
291 PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
292 PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
293 PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */
294 PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */
295 PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */
296 PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */
297 PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */
301 static struct event_constraint intel_nehalem_pebs_events
[] = {
302 PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */
303 PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */
304 PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */
305 PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */
306 PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */
307 PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */
308 PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */
309 PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */
310 PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */
314 static struct event_constraint
*
315 intel_pebs_constraints(struct perf_event
*event
)
317 struct event_constraint
*c
;
319 if (!event
->attr
.precise
)
322 if (x86_pmu
.pebs_constraints
) {
323 for_each_event_constraint(c
, x86_pmu
.pebs_constraints
) {
324 if ((event
->hw
.config
& c
->cmask
) == c
->code
)
329 return &emptyconstraint
;
332 static void intel_pmu_pebs_enable(struct perf_event
*event
)
334 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
335 struct hw_perf_event
*hwc
= &event
->hw
;
336 u64 val
= cpuc
->pebs_enabled
;
338 hwc
->config
&= ~ARCH_PERFMON_EVENTSEL_INT
;
340 val
|= 1ULL << hwc
->idx
;
341 wrmsrl(MSR_IA32_PEBS_ENABLE
, val
);
343 if (x86_pmu
.intel_cap
.pebs_trap
)
344 intel_pmu_lbr_enable(event
);
347 static void intel_pmu_pebs_disable(struct perf_event
*event
)
349 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
350 struct hw_perf_event
*hwc
= &event
->hw
;
351 u64 val
= cpuc
->pebs_enabled
;
353 val
&= ~(1ULL << hwc
->idx
);
354 wrmsrl(MSR_IA32_PEBS_ENABLE
, val
);
356 hwc
->config
|= ARCH_PERFMON_EVENTSEL_INT
;
358 if (x86_pmu
.intel_cap
.pebs_trap
)
359 intel_pmu_lbr_disable(event
);
362 static void intel_pmu_pebs_enable_all(void)
364 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
366 if (cpuc
->pebs_enabled
)
367 wrmsrl(MSR_IA32_PEBS_ENABLE
, cpuc
->pebs_enabled
);
370 static void intel_pmu_pebs_disable_all(void)
372 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
374 if (cpuc
->pebs_enabled
)
375 wrmsrl(MSR_IA32_PEBS_ENABLE
, 0);
378 #include <asm/insn.h>
380 static inline bool kernel_ip(unsigned long ip
)
383 return ip
> PAGE_OFFSET
;
389 static int intel_pmu_pebs_fixup_ip(struct pt_regs
*regs
)
391 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
392 unsigned long from
= cpuc
->lbr_entries
[0].from
;
393 unsigned long old_to
, to
= cpuc
->lbr_entries
[0].to
;
394 unsigned long ip
= regs
->ip
;
397 * We don't need to fixup if the PEBS assist is fault like
399 if (!x86_pmu
.intel_cap
.pebs_trap
)
403 * No LBR entry, no basic block, no rewinding
405 if (!cpuc
->lbr_stack
.nr
|| !from
|| !to
)
409 * Basic blocks should never cross user/kernel boundaries
411 if (kernel_ip(ip
) != kernel_ip(to
))
415 * unsigned math, either ip is before the start (impossible) or
416 * the basic block is larger than 1 page (sanity)
418 if ((ip
- to
) > PAGE_SIZE
)
422 * We sampled a branch insn, rewind using the LBR stack
431 u8 buf
[MAX_INSN_SIZE
];
435 if (!kernel_ip(ip
)) {
436 int bytes
, size
= MAX_INSN_SIZE
;
438 bytes
= copy_from_user_nmi(buf
, (void __user
*)to
, size
);
446 kernel_insn_init(&insn
, kaddr
);
447 insn_get_length(&insn
);
457 * Even though we decoded the basic block, the instruction stream
458 * never matched the given IP, either the TO or the IP got corrupted.
463 static int intel_pmu_save_and_restart(struct perf_event
*event
);
465 static void intel_pmu_drain_pebs_core(struct pt_regs
*iregs
)
467 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
468 struct debug_store
*ds
= cpuc
->ds
;
469 struct perf_event
*event
= cpuc
->events
[0]; /* PMC0 only */
470 struct pebs_record_core
*at
, *top
;
471 struct perf_sample_data data
;
472 struct perf_raw_record raw
;
476 if (!event
|| !ds
|| !x86_pmu
.pebs
)
479 intel_pmu_pebs_disable_all();
481 at
= (struct pebs_record_core
*)(unsigned long)ds
->pebs_buffer_base
;
482 top
= (struct pebs_record_core
*)(unsigned long)ds
->pebs_index
;
487 ds
->pebs_index
= ds
->pebs_buffer_base
;
489 if (!intel_pmu_save_and_restart(event
))
492 perf_sample_data_init(&data
, 0);
493 data
.period
= event
->hw
.last_period
;
495 if (event
->attr
.sample_type
& PERF_SAMPLE_RAW
) {
496 raw
.size
= x86_pmu
.pebs_record_size
;
504 * Should not happen, we program the threshold at 1 and do not
510 * We use the interrupt regs as a base because the PEBS record
511 * does not contain a full regs set, specifically it seems to
512 * lack segment descriptors, which get used by things like
515 * In the simple case fix up only the IP and BP,SP regs, for
516 * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
517 * A possible PERF_SAMPLE_REGS will have to transfer all regs.
524 if (intel_pmu_pebs_fixup_ip(®s
))
525 regs
.flags
|= PERF_EFLAGS_EXACT
;
527 regs
.flags
&= ~PERF_EFLAGS_EXACT
;
529 if (perf_event_overflow(event
, 1, &data
, ®s
))
533 intel_pmu_pebs_enable_all();
536 static void intel_pmu_drain_pebs_nhm(struct pt_regs
*iregs
)
538 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
539 struct debug_store
*ds
= cpuc
->ds
;
540 struct pebs_record_nhm
*at
, *top
;
541 struct perf_sample_data data
;
542 struct perf_event
*event
= NULL
;
543 struct perf_raw_record raw
;
547 if (!ds
|| !x86_pmu
.pebs
)
550 intel_pmu_pebs_disable_all();
552 at
= (struct pebs_record_nhm
*)(unsigned long)ds
->pebs_buffer_base
;
553 top
= (struct pebs_record_nhm
*)(unsigned long)ds
->pebs_index
;
558 ds
->pebs_index
= ds
->pebs_buffer_base
;
563 * Should not happen, we program the threshold at 1 and do not
566 WARN_ON_ONCE(n
> MAX_PEBS_EVENTS
);
568 for ( ; at
< top
; at
++) {
569 for_each_bit(bit
, (unsigned long *)&at
->status
, MAX_PEBS_EVENTS
) {
570 if (!cpuc
->events
[bit
]->attr
.precise
)
573 event
= cpuc
->events
[bit
];
579 if (!intel_pmu_save_and_restart(event
))
582 perf_sample_data_init(&data
, 0);
583 data
.period
= event
->hw
.last_period
;
585 if (event
->attr
.sample_type
& PERF_SAMPLE_RAW
) {
586 raw
.size
= x86_pmu
.pebs_record_size
;
592 * See the comment in intel_pmu_drain_pebs_core()
599 if (intel_pmu_pebs_fixup_ip(®s
))
600 regs
.flags
|= PERF_EFLAGS_EXACT
;
602 regs
.flags
&= ~PERF_EFLAGS_EXACT
;
604 if (perf_event_overflow(event
, 1, &data
, ®s
))
608 intel_pmu_pebs_enable_all();
612 * BTS, PEBS probe and setup
615 static void intel_ds_init(void)
618 * No support for 32bit formats
620 if (!boot_cpu_has(X86_FEATURE_DTES64
))
623 x86_pmu
.bts
= boot_cpu_has(X86_FEATURE_BTS
);
624 x86_pmu
.pebs
= boot_cpu_has(X86_FEATURE_PEBS
);
626 char pebs_type
= x86_pmu
.intel_cap
.pebs_trap
? '+' : '-';
627 int format
= x86_pmu
.intel_cap
.pebs_format
;
631 printk(KERN_CONT
"PEBS fmt0%c, ", pebs_type
);
632 x86_pmu
.pebs_record_size
= sizeof(struct pebs_record_core
);
633 x86_pmu
.drain_pebs
= intel_pmu_drain_pebs_core
;
634 x86_pmu
.pebs_constraints
= intel_core_pebs_events
;
638 printk(KERN_CONT
"PEBS fmt1%c, ", pebs_type
);
639 x86_pmu
.pebs_record_size
= sizeof(struct pebs_record_nhm
);
640 x86_pmu
.drain_pebs
= intel_pmu_drain_pebs_nhm
;
641 x86_pmu
.pebs_constraints
= intel_nehalem_pebs_events
;
645 printk(KERN_CONT
"no PEBS fmt%d%c, ", format
, pebs_type
);
652 #else /* CONFIG_CPU_SUP_INTEL */
654 static int reseve_ds_buffers(void)
659 static void release_ds_buffers(void)
663 #endif /* CONFIG_CPU_SUP_INTEL */