1 #ifdef CONFIG_CPU_SUP_INTEL
3 /* The maximal number of PEBS events: */
4 #define MAX_PEBS_EVENTS 4
6 /* The size of a BTS record in bytes: */
7 #define BTS_RECORD_SIZE 24
9 #define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
10 #define PEBS_BUFFER_SIZE PAGE_SIZE
13 * pebs_record_32 for p4 and core not supported
15 struct pebs_record_32 {
23 struct pebs_record_core
{
28 u64 r12
, r13
, r14
, r15
;
31 struct pebs_record_nhm
{
36 u64 r12
, r13
, r14
, r15
;
37 u64 status
, dla
, dse
, lat
;
41 * A debug store configuration.
43 * We only support architectures that use 64bit fields.
48 u64 bts_absolute_maximum
;
49 u64 bts_interrupt_threshold
;
52 u64 pebs_absolute_maximum
;
53 u64 pebs_interrupt_threshold
;
54 u64 pebs_event_reset
[MAX_PEBS_EVENTS
];
57 static void init_debug_store_on_cpu(int cpu
)
59 struct debug_store
*ds
= per_cpu(cpu_hw_events
, cpu
).ds
;
64 wrmsr_on_cpu(cpu
, MSR_IA32_DS_AREA
,
65 (u32
)((u64
)(unsigned long)ds
),
66 (u32
)((u64
)(unsigned long)ds
>> 32));
69 static void fini_debug_store_on_cpu(int cpu
)
71 if (!per_cpu(cpu_hw_events
, cpu
).ds
)
74 wrmsr_on_cpu(cpu
, MSR_IA32_DS_AREA
, 0, 0);
77 static void release_ds_buffers(void)
81 if (!x86_pmu
.bts
&& !x86_pmu
.pebs
)
86 for_each_online_cpu(cpu
)
87 fini_debug_store_on_cpu(cpu
);
89 for_each_possible_cpu(cpu
) {
90 struct debug_store
*ds
= per_cpu(cpu_hw_events
, cpu
).ds
;
95 per_cpu(cpu_hw_events
, cpu
).ds
= NULL
;
97 kfree((void *)(unsigned long)ds
->pebs_buffer_base
);
98 kfree((void *)(unsigned long)ds
->bts_buffer_base
);
105 static int reserve_ds_buffers(void)
109 if (!x86_pmu
.bts
&& !x86_pmu
.pebs
)
114 for_each_possible_cpu(cpu
) {
115 struct debug_store
*ds
;
120 ds
= kzalloc(sizeof(*ds
), GFP_KERNEL
);
123 per_cpu(cpu_hw_events
, cpu
).ds
= ds
;
126 buffer
= kzalloc(BTS_BUFFER_SIZE
, GFP_KERNEL
);
127 if (unlikely(!buffer
))
130 max
= BTS_BUFFER_SIZE
/ BTS_RECORD_SIZE
;
133 ds
->bts_buffer_base
= (u64
)(unsigned long)buffer
;
134 ds
->bts_index
= ds
->bts_buffer_base
;
135 ds
->bts_absolute_maximum
= ds
->bts_buffer_base
+
136 max
* BTS_RECORD_SIZE
;
137 ds
->bts_interrupt_threshold
= ds
->bts_absolute_maximum
-
138 thresh
* BTS_RECORD_SIZE
;
142 buffer
= kzalloc(PEBS_BUFFER_SIZE
, GFP_KERNEL
);
143 if (unlikely(!buffer
))
146 max
= PEBS_BUFFER_SIZE
/ x86_pmu
.pebs_record_size
;
148 ds
->pebs_buffer_base
= (u64
)(unsigned long)buffer
;
149 ds
->pebs_index
= ds
->pebs_buffer_base
;
150 ds
->pebs_absolute_maximum
= ds
->pebs_buffer_base
+
151 max
* x86_pmu
.pebs_record_size
;
153 * Always use single record PEBS
155 ds
->pebs_interrupt_threshold
= ds
->pebs_buffer_base
+
156 x86_pmu
.pebs_record_size
;
163 release_ds_buffers();
165 for_each_online_cpu(cpu
)
166 init_debug_store_on_cpu(cpu
);
178 static struct event_constraint bts_constraint
=
179 EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS
, 0);
181 static void intel_pmu_enable_bts(u64 config
)
183 unsigned long debugctlmsr
;
185 debugctlmsr
= get_debugctlmsr();
187 debugctlmsr
|= DEBUGCTLMSR_TR
;
188 debugctlmsr
|= DEBUGCTLMSR_BTS
;
189 debugctlmsr
|= DEBUGCTLMSR_BTINT
;
191 if (!(config
& ARCH_PERFMON_EVENTSEL_OS
))
192 debugctlmsr
|= DEBUGCTLMSR_BTS_OFF_OS
;
194 if (!(config
& ARCH_PERFMON_EVENTSEL_USR
))
195 debugctlmsr
|= DEBUGCTLMSR_BTS_OFF_USR
;
197 update_debugctlmsr(debugctlmsr
);
200 static void intel_pmu_disable_bts(void)
202 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
203 unsigned long debugctlmsr
;
208 debugctlmsr
= get_debugctlmsr();
211 ~(DEBUGCTLMSR_TR
| DEBUGCTLMSR_BTS
| DEBUGCTLMSR_BTINT
|
212 DEBUGCTLMSR_BTS_OFF_OS
| DEBUGCTLMSR_BTS_OFF_USR
);
214 update_debugctlmsr(debugctlmsr
);
217 static void intel_pmu_drain_bts_buffer(void)
219 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
220 struct debug_store
*ds
= cpuc
->ds
;
226 struct perf_event
*event
= cpuc
->events
[X86_PMC_IDX_FIXED_BTS
];
227 struct bts_record
*at
, *top
;
228 struct perf_output_handle handle
;
229 struct perf_event_header header
;
230 struct perf_sample_data data
;
239 at
= (struct bts_record
*)(unsigned long)ds
->bts_buffer_base
;
240 top
= (struct bts_record
*)(unsigned long)ds
->bts_index
;
245 ds
->bts_index
= ds
->bts_buffer_base
;
247 perf_sample_data_init(&data
, 0);
248 data
.period
= event
->hw
.last_period
;
252 * Prepare a generic sample, i.e. fill in the invariant fields.
253 * We will overwrite the from and to address before we output
256 perf_prepare_sample(&header
, &data
, event
, ®s
);
258 if (perf_output_begin(&handle
, event
, header
.size
* (top
- at
), 1, 1))
261 for (; at
< top
; at
++) {
265 perf_output_sample(&handle
, &header
, &data
, event
);
268 perf_output_end(&handle
);
270 /* There's new data available. */
271 event
->hw
.interrupts
++;
272 event
->pending_kill
= POLL_IN
;
279 static struct event_constraint intel_core_pebs_events
[] = {
280 PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */
281 PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
282 PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
283 PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
284 PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */
285 PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */
286 PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */
287 PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */
288 PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */
292 static struct event_constraint intel_nehalem_pebs_events
[] = {
293 PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */
294 PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */
295 PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */
296 PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */
297 PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */
298 PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */
299 PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */
300 PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */
301 PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */
305 static struct event_constraint
*
306 intel_pebs_constraints(struct perf_event
*event
)
308 struct event_constraint
*c
;
310 if (!event
->attr
.precise
)
313 if (x86_pmu
.pebs_constraints
) {
314 for_each_event_constraint(c
, x86_pmu
.pebs_constraints
) {
315 if ((event
->hw
.config
& c
->cmask
) == c
->code
)
320 return &emptyconstraint
;
323 static void intel_pmu_pebs_enable(struct perf_event
*event
)
325 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
326 struct hw_perf_event
*hwc
= &event
->hw
;
328 hwc
->config
&= ~ARCH_PERFMON_EVENTSEL_INT
;
330 cpuc
->pebs_enabled
|= 1ULL << hwc
->idx
;
331 WARN_ON_ONCE(cpuc
->enabled
);
333 if (x86_pmu
.intel_cap
.pebs_trap
)
334 intel_pmu_lbr_enable(event
);
337 static void intel_pmu_pebs_disable(struct perf_event
*event
)
339 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
340 struct hw_perf_event
*hwc
= &event
->hw
;
342 cpuc
->pebs_enabled
&= ~(1ULL << hwc
->idx
);
344 wrmsrl(MSR_IA32_PEBS_ENABLE
, cpuc
->pebs_enabled
);
346 hwc
->config
|= ARCH_PERFMON_EVENTSEL_INT
;
348 if (x86_pmu
.intel_cap
.pebs_trap
)
349 intel_pmu_lbr_disable(event
);
352 static void intel_pmu_pebs_enable_all(void)
354 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
356 if (cpuc
->pebs_enabled
)
357 wrmsrl(MSR_IA32_PEBS_ENABLE
, cpuc
->pebs_enabled
);
360 static void intel_pmu_pebs_disable_all(void)
362 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
364 if (cpuc
->pebs_enabled
)
365 wrmsrl(MSR_IA32_PEBS_ENABLE
, 0);
368 #include <asm/insn.h>
370 static inline bool kernel_ip(unsigned long ip
)
373 return ip
> PAGE_OFFSET
;
379 static int intel_pmu_pebs_fixup_ip(struct pt_regs
*regs
)
381 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
382 unsigned long from
= cpuc
->lbr_entries
[0].from
;
383 unsigned long old_to
, to
= cpuc
->lbr_entries
[0].to
;
384 unsigned long ip
= regs
->ip
;
387 * We don't need to fixup if the PEBS assist is fault like
389 if (!x86_pmu
.intel_cap
.pebs_trap
)
393 * No LBR entry, no basic block, no rewinding
395 if (!cpuc
->lbr_stack
.nr
|| !from
|| !to
)
399 * Basic blocks should never cross user/kernel boundaries
401 if (kernel_ip(ip
) != kernel_ip(to
))
405 * unsigned math, either ip is before the start (impossible) or
406 * the basic block is larger than 1 page (sanity)
408 if ((ip
- to
) > PAGE_SIZE
)
412 * We sampled a branch insn, rewind using the LBR stack
421 u8 buf
[MAX_INSN_SIZE
];
425 if (!kernel_ip(ip
)) {
426 int bytes
, size
= MAX_INSN_SIZE
;
428 bytes
= copy_from_user_nmi(buf
, (void __user
*)to
, size
);
436 kernel_insn_init(&insn
, kaddr
);
437 insn_get_length(&insn
);
447 * Even though we decoded the basic block, the instruction stream
448 * never matched the given IP, either the TO or the IP got corrupted.
453 static int intel_pmu_save_and_restart(struct perf_event
*event
);
455 static void intel_pmu_drain_pebs_core(struct pt_regs
*iregs
)
457 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
458 struct debug_store
*ds
= cpuc
->ds
;
459 struct perf_event
*event
= cpuc
->events
[0]; /* PMC0 only */
460 struct pebs_record_core
*at
, *top
;
461 struct perf_sample_data data
;
462 struct perf_raw_record raw
;
466 if (!ds
|| !x86_pmu
.pebs
)
469 at
= (struct pebs_record_core
*)(unsigned long)ds
->pebs_buffer_base
;
470 top
= (struct pebs_record_core
*)(unsigned long)ds
->pebs_index
;
473 * Whatever else happens, drain the thing
475 ds
->pebs_index
= ds
->pebs_buffer_base
;
477 if (!test_bit(0, cpuc
->active_mask
))
480 WARN_ON_ONCE(!event
);
482 if (!event
->attr
.precise
)
489 if (!intel_pmu_save_and_restart(event
))
493 * Should not happen, we program the threshold at 1 and do not
499 perf_sample_data_init(&data
, 0);
500 data
.period
= event
->hw
.last_period
;
502 if (event
->attr
.sample_type
& PERF_SAMPLE_RAW
) {
503 raw
.size
= x86_pmu
.pebs_record_size
;
509 * We use the interrupt regs as a base because the PEBS record
510 * does not contain a full regs set, specifically it seems to
511 * lack segment descriptors, which get used by things like
514 * In the simple case fix up only the IP and BP,SP regs, for
515 * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
516 * A possible PERF_SAMPLE_REGS will have to transfer all regs.
523 if (intel_pmu_pebs_fixup_ip(®s
))
524 regs
.flags
|= PERF_EFLAGS_EXACT
;
526 regs
.flags
&= ~PERF_EFLAGS_EXACT
;
528 if (perf_event_overflow(event
, 1, &data
, ®s
))
532 static void intel_pmu_drain_pebs_nhm(struct pt_regs
*iregs
)
534 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
535 struct debug_store
*ds
= cpuc
->ds
;
536 struct pebs_record_nhm
*at
, *top
;
537 struct perf_sample_data data
;
538 struct perf_event
*event
= NULL
;
539 struct perf_raw_record raw
;
544 if (!ds
|| !x86_pmu
.pebs
)
547 at
= (struct pebs_record_nhm
*)(unsigned long)ds
->pebs_buffer_base
;
548 top
= (struct pebs_record_nhm
*)(unsigned long)ds
->pebs_index
;
550 ds
->pebs_index
= ds
->pebs_buffer_base
;
557 * Should not happen, we program the threshold at 1 and do not
560 WARN_ON_ONCE(n
> MAX_PEBS_EVENTS
);
562 for ( ; at
< top
; at
++) {
563 for_each_bit(bit
, (unsigned long *)&at
->status
, MAX_PEBS_EVENTS
) {
564 event
= cpuc
->events
[bit
];
565 if (!test_bit(bit
, cpuc
->active_mask
))
568 WARN_ON_ONCE(!event
);
570 if (!event
->attr
.precise
)
573 if (__test_and_set_bit(bit
, (unsigned long *)&status
))
579 if (!event
|| bit
>= MAX_PEBS_EVENTS
)
582 if (!intel_pmu_save_and_restart(event
))
585 perf_sample_data_init(&data
, 0);
586 data
.period
= event
->hw
.last_period
;
588 if (event
->attr
.sample_type
& PERF_SAMPLE_RAW
) {
589 raw
.size
= x86_pmu
.pebs_record_size
;
595 * See the comment in intel_pmu_drain_pebs_core()
602 if (intel_pmu_pebs_fixup_ip(®s
))
603 regs
.flags
|= PERF_EFLAGS_EXACT
;
605 regs
.flags
&= ~PERF_EFLAGS_EXACT
;
607 if (perf_event_overflow(event
, 1, &data
, ®s
))
613 * BTS, PEBS probe and setup
616 static void intel_ds_init(void)
619 * No support for 32bit formats
621 if (!boot_cpu_has(X86_FEATURE_DTES64
))
624 x86_pmu
.bts
= boot_cpu_has(X86_FEATURE_BTS
);
625 x86_pmu
.pebs
= boot_cpu_has(X86_FEATURE_PEBS
);
627 char pebs_type
= x86_pmu
.intel_cap
.pebs_trap
? '+' : '-';
628 int format
= x86_pmu
.intel_cap
.pebs_format
;
632 printk(KERN_CONT
"PEBS fmt0%c, ", pebs_type
);
633 x86_pmu
.pebs_record_size
= sizeof(struct pebs_record_core
);
634 x86_pmu
.drain_pebs
= intel_pmu_drain_pebs_core
;
635 x86_pmu
.pebs_constraints
= intel_core_pebs_events
;
639 printk(KERN_CONT
"PEBS fmt1%c, ", pebs_type
);
640 x86_pmu
.pebs_record_size
= sizeof(struct pebs_record_nhm
);
641 x86_pmu
.drain_pebs
= intel_pmu_drain_pebs_nhm
;
642 x86_pmu
.pebs_constraints
= intel_nehalem_pebs_events
;
646 printk(KERN_CONT
"no PEBS fmt%d%c, ", format
, pebs_type
);
653 #else /* CONFIG_CPU_SUP_INTEL */
655 static int reserve_ds_buffers(void)
660 static void release_ds_buffers(void)
664 #endif /* CONFIG_CPU_SUP_INTEL */