perf_counter, x86: Fix generic cache events on P6-mobile CPUs
[linux-2.6/verdex.git] / arch / x86 / kernel / cpu / perf_counter.c
blobfffc126dbdf09ac66469372200b220c25ff9d2c4
1 /*
2 * Performance counter x86 architecture code
4 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
5 * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
6 * Copyright (C) 2009 Jaswinder Singh Rajput
7 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
8 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
10 * For licencing details see kernel-base/COPYING
13 #include <linux/perf_counter.h>
14 #include <linux/capability.h>
15 #include <linux/notifier.h>
16 #include <linux/hardirq.h>
17 #include <linux/kprobes.h>
18 #include <linux/module.h>
19 #include <linux/kdebug.h>
20 #include <linux/sched.h>
21 #include <linux/uaccess.h>
22 #include <linux/highmem.h>
24 #include <asm/apic.h>
25 #include <asm/stacktrace.h>
26 #include <asm/nmi.h>
28 static u64 perf_counter_mask __read_mostly;
30 struct cpu_hw_counters {
31 struct perf_counter *counters[X86_PMC_IDX_MAX];
32 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
33 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
34 unsigned long interrupts;
35 int enabled;
39 * struct x86_pmu - generic x86 pmu
41 struct x86_pmu {
42 const char *name;
43 int version;
44 int (*handle_irq)(struct pt_regs *);
45 void (*disable_all)(void);
46 void (*enable_all)(void);
47 void (*enable)(struct hw_perf_counter *, int);
48 void (*disable)(struct hw_perf_counter *, int);
49 unsigned eventsel;
50 unsigned perfctr;
51 u64 (*event_map)(int);
52 u64 (*raw_event)(u64);
53 int max_events;
54 int num_counters;
55 int num_counters_fixed;
56 int counter_bits;
57 u64 counter_mask;
58 u64 max_period;
59 u64 intel_ctrl;
62 static struct x86_pmu x86_pmu __read_mostly;
64 static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
65 .enabled = 1,
69 * Not sure about some of these
71 static const u64 p6_perfmon_event_map[] =
73 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
74 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
75 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
76 [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
77 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
78 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
79 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
82 static u64 p6_pmu_event_map(int event)
84 return p6_perfmon_event_map[event];
88 * Counter setting that is specified not to count anything.
89 * We use this to effectively disable a counter.
91 * L2_RQSTS with 0 MESI unit mask.
93 #define P6_NOP_COUNTER 0x0000002EULL
95 static u64 p6_pmu_raw_event(u64 event)
97 #define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
98 #define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
99 #define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
100 #define P6_EVNTSEL_INV_MASK 0x00800000ULL
101 #define P6_EVNTSEL_COUNTER_MASK 0xFF000000ULL
103 #define P6_EVNTSEL_MASK \
104 (P6_EVNTSEL_EVENT_MASK | \
105 P6_EVNTSEL_UNIT_MASK | \
106 P6_EVNTSEL_EDGE_MASK | \
107 P6_EVNTSEL_INV_MASK | \
108 P6_EVNTSEL_COUNTER_MASK)
110 return event & P6_EVNTSEL_MASK;
115 * Intel PerfMon v3. Used on Core2 and later.
117 static const u64 intel_perfmon_event_map[] =
119 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
120 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
121 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
122 [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
123 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
124 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
125 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
128 static u64 intel_pmu_event_map(int event)
130 return intel_perfmon_event_map[event];
134 * Generalized hw caching related event table, filled
135 * in on a per model basis. A value of 0 means
136 * 'not supported', -1 means 'event makes no sense on
137 * this CPU', any other value means the raw event
138 * ID.
141 #define C(x) PERF_COUNT_HW_CACHE_##x
143 static u64 __read_mostly hw_cache_event_ids
144 [PERF_COUNT_HW_CACHE_MAX]
145 [PERF_COUNT_HW_CACHE_OP_MAX]
146 [PERF_COUNT_HW_CACHE_RESULT_MAX];
148 static const u64 nehalem_hw_cache_event_ids
149 [PERF_COUNT_HW_CACHE_MAX]
150 [PERF_COUNT_HW_CACHE_OP_MAX]
151 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
153 [ C(L1D) ] = {
154 [ C(OP_READ) ] = {
155 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
156 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
158 [ C(OP_WRITE) ] = {
159 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
160 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
162 [ C(OP_PREFETCH) ] = {
163 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
164 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
167 [ C(L1I ) ] = {
168 [ C(OP_READ) ] = {
169 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
170 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
172 [ C(OP_WRITE) ] = {
173 [ C(RESULT_ACCESS) ] = -1,
174 [ C(RESULT_MISS) ] = -1,
176 [ C(OP_PREFETCH) ] = {
177 [ C(RESULT_ACCESS) ] = 0x0,
178 [ C(RESULT_MISS) ] = 0x0,
181 [ C(LL ) ] = {
182 [ C(OP_READ) ] = {
183 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
184 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
186 [ C(OP_WRITE) ] = {
187 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
188 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
190 [ C(OP_PREFETCH) ] = {
191 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
192 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
195 [ C(DTLB) ] = {
196 [ C(OP_READ) ] = {
197 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
198 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
200 [ C(OP_WRITE) ] = {
201 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
202 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
204 [ C(OP_PREFETCH) ] = {
205 [ C(RESULT_ACCESS) ] = 0x0,
206 [ C(RESULT_MISS) ] = 0x0,
209 [ C(ITLB) ] = {
210 [ C(OP_READ) ] = {
211 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
212 [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */
214 [ C(OP_WRITE) ] = {
215 [ C(RESULT_ACCESS) ] = -1,
216 [ C(RESULT_MISS) ] = -1,
218 [ C(OP_PREFETCH) ] = {
219 [ C(RESULT_ACCESS) ] = -1,
220 [ C(RESULT_MISS) ] = -1,
223 [ C(BPU ) ] = {
224 [ C(OP_READ) ] = {
225 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
226 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
228 [ C(OP_WRITE) ] = {
229 [ C(RESULT_ACCESS) ] = -1,
230 [ C(RESULT_MISS) ] = -1,
232 [ C(OP_PREFETCH) ] = {
233 [ C(RESULT_ACCESS) ] = -1,
234 [ C(RESULT_MISS) ] = -1,
239 static const u64 core2_hw_cache_event_ids
240 [PERF_COUNT_HW_CACHE_MAX]
241 [PERF_COUNT_HW_CACHE_OP_MAX]
242 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
244 [ C(L1D) ] = {
245 [ C(OP_READ) ] = {
246 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
247 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
249 [ C(OP_WRITE) ] = {
250 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
251 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
253 [ C(OP_PREFETCH) ] = {
254 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */
255 [ C(RESULT_MISS) ] = 0,
258 [ C(L1I ) ] = {
259 [ C(OP_READ) ] = {
260 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */
261 [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */
263 [ C(OP_WRITE) ] = {
264 [ C(RESULT_ACCESS) ] = -1,
265 [ C(RESULT_MISS) ] = -1,
267 [ C(OP_PREFETCH) ] = {
268 [ C(RESULT_ACCESS) ] = 0,
269 [ C(RESULT_MISS) ] = 0,
272 [ C(LL ) ] = {
273 [ C(OP_READ) ] = {
274 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
275 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
277 [ C(OP_WRITE) ] = {
278 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
279 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
281 [ C(OP_PREFETCH) ] = {
282 [ C(RESULT_ACCESS) ] = 0,
283 [ C(RESULT_MISS) ] = 0,
286 [ C(DTLB) ] = {
287 [ C(OP_READ) ] = {
288 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
289 [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */
291 [ C(OP_WRITE) ] = {
292 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
293 [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */
295 [ C(OP_PREFETCH) ] = {
296 [ C(RESULT_ACCESS) ] = 0,
297 [ C(RESULT_MISS) ] = 0,
300 [ C(ITLB) ] = {
301 [ C(OP_READ) ] = {
302 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
303 [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */
305 [ C(OP_WRITE) ] = {
306 [ C(RESULT_ACCESS) ] = -1,
307 [ C(RESULT_MISS) ] = -1,
309 [ C(OP_PREFETCH) ] = {
310 [ C(RESULT_ACCESS) ] = -1,
311 [ C(RESULT_MISS) ] = -1,
314 [ C(BPU ) ] = {
315 [ C(OP_READ) ] = {
316 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
317 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
319 [ C(OP_WRITE) ] = {
320 [ C(RESULT_ACCESS) ] = -1,
321 [ C(RESULT_MISS) ] = -1,
323 [ C(OP_PREFETCH) ] = {
324 [ C(RESULT_ACCESS) ] = -1,
325 [ C(RESULT_MISS) ] = -1,
330 static const u64 atom_hw_cache_event_ids
331 [PERF_COUNT_HW_CACHE_MAX]
332 [PERF_COUNT_HW_CACHE_OP_MAX]
333 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
335 [ C(L1D) ] = {
336 [ C(OP_READ) ] = {
337 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */
338 [ C(RESULT_MISS) ] = 0,
340 [ C(OP_WRITE) ] = {
341 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */
342 [ C(RESULT_MISS) ] = 0,
344 [ C(OP_PREFETCH) ] = {
345 [ C(RESULT_ACCESS) ] = 0x0,
346 [ C(RESULT_MISS) ] = 0,
349 [ C(L1I ) ] = {
350 [ C(OP_READ) ] = {
351 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
352 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
354 [ C(OP_WRITE) ] = {
355 [ C(RESULT_ACCESS) ] = -1,
356 [ C(RESULT_MISS) ] = -1,
358 [ C(OP_PREFETCH) ] = {
359 [ C(RESULT_ACCESS) ] = 0,
360 [ C(RESULT_MISS) ] = 0,
363 [ C(LL ) ] = {
364 [ C(OP_READ) ] = {
365 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
366 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
368 [ C(OP_WRITE) ] = {
369 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
370 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
372 [ C(OP_PREFETCH) ] = {
373 [ C(RESULT_ACCESS) ] = 0,
374 [ C(RESULT_MISS) ] = 0,
377 [ C(DTLB) ] = {
378 [ C(OP_READ) ] = {
379 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */
380 [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */
382 [ C(OP_WRITE) ] = {
383 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */
384 [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */
386 [ C(OP_PREFETCH) ] = {
387 [ C(RESULT_ACCESS) ] = 0,
388 [ C(RESULT_MISS) ] = 0,
391 [ C(ITLB) ] = {
392 [ C(OP_READ) ] = {
393 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
394 [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
396 [ C(OP_WRITE) ] = {
397 [ C(RESULT_ACCESS) ] = -1,
398 [ C(RESULT_MISS) ] = -1,
400 [ C(OP_PREFETCH) ] = {
401 [ C(RESULT_ACCESS) ] = -1,
402 [ C(RESULT_MISS) ] = -1,
405 [ C(BPU ) ] = {
406 [ C(OP_READ) ] = {
407 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
408 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
410 [ C(OP_WRITE) ] = {
411 [ C(RESULT_ACCESS) ] = -1,
412 [ C(RESULT_MISS) ] = -1,
414 [ C(OP_PREFETCH) ] = {
415 [ C(RESULT_ACCESS) ] = -1,
416 [ C(RESULT_MISS) ] = -1,
421 static u64 intel_pmu_raw_event(u64 event)
423 #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
424 #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
425 #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
426 #define CORE_EVNTSEL_INV_MASK 0x00800000ULL
427 #define CORE_EVNTSEL_COUNTER_MASK 0xFF000000ULL
429 #define CORE_EVNTSEL_MASK \
430 (CORE_EVNTSEL_EVENT_MASK | \
431 CORE_EVNTSEL_UNIT_MASK | \
432 CORE_EVNTSEL_EDGE_MASK | \
433 CORE_EVNTSEL_INV_MASK | \
434 CORE_EVNTSEL_COUNTER_MASK)
436 return event & CORE_EVNTSEL_MASK;
439 static const u64 amd_hw_cache_event_ids
440 [PERF_COUNT_HW_CACHE_MAX]
441 [PERF_COUNT_HW_CACHE_OP_MAX]
442 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
444 [ C(L1D) ] = {
445 [ C(OP_READ) ] = {
446 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
447 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
449 [ C(OP_WRITE) ] = {
450 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
451 [ C(RESULT_MISS) ] = 0,
453 [ C(OP_PREFETCH) ] = {
454 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
455 [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
458 [ C(L1I ) ] = {
459 [ C(OP_READ) ] = {
460 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */
461 [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */
463 [ C(OP_WRITE) ] = {
464 [ C(RESULT_ACCESS) ] = -1,
465 [ C(RESULT_MISS) ] = -1,
467 [ C(OP_PREFETCH) ] = {
468 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
469 [ C(RESULT_MISS) ] = 0,
472 [ C(LL ) ] = {
473 [ C(OP_READ) ] = {
474 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
475 [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
477 [ C(OP_WRITE) ] = {
478 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
479 [ C(RESULT_MISS) ] = 0,
481 [ C(OP_PREFETCH) ] = {
482 [ C(RESULT_ACCESS) ] = 0,
483 [ C(RESULT_MISS) ] = 0,
486 [ C(DTLB) ] = {
487 [ C(OP_READ) ] = {
488 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
489 [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
491 [ C(OP_WRITE) ] = {
492 [ C(RESULT_ACCESS) ] = 0,
493 [ C(RESULT_MISS) ] = 0,
495 [ C(OP_PREFETCH) ] = {
496 [ C(RESULT_ACCESS) ] = 0,
497 [ C(RESULT_MISS) ] = 0,
500 [ C(ITLB) ] = {
501 [ C(OP_READ) ] = {
502 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
503 [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */
505 [ C(OP_WRITE) ] = {
506 [ C(RESULT_ACCESS) ] = -1,
507 [ C(RESULT_MISS) ] = -1,
509 [ C(OP_PREFETCH) ] = {
510 [ C(RESULT_ACCESS) ] = -1,
511 [ C(RESULT_MISS) ] = -1,
514 [ C(BPU ) ] = {
515 [ C(OP_READ) ] = {
516 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */
517 [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */
519 [ C(OP_WRITE) ] = {
520 [ C(RESULT_ACCESS) ] = -1,
521 [ C(RESULT_MISS) ] = -1,
523 [ C(OP_PREFETCH) ] = {
524 [ C(RESULT_ACCESS) ] = -1,
525 [ C(RESULT_MISS) ] = -1,
531 * AMD Performance Monitor K7 and later.
533 static const u64 amd_perfmon_event_map[] =
535 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
536 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
537 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
538 [PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
539 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
540 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
543 static u64 amd_pmu_event_map(int event)
545 return amd_perfmon_event_map[event];
548 static u64 amd_pmu_raw_event(u64 event)
550 #define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL
551 #define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
552 #define K7_EVNTSEL_EDGE_MASK 0x000040000ULL
553 #define K7_EVNTSEL_INV_MASK 0x000800000ULL
554 #define K7_EVNTSEL_COUNTER_MASK 0x0FF000000ULL
556 #define K7_EVNTSEL_MASK \
557 (K7_EVNTSEL_EVENT_MASK | \
558 K7_EVNTSEL_UNIT_MASK | \
559 K7_EVNTSEL_EDGE_MASK | \
560 K7_EVNTSEL_INV_MASK | \
561 K7_EVNTSEL_COUNTER_MASK)
563 return event & K7_EVNTSEL_MASK;
567 * Propagate counter elapsed time into the generic counter.
568 * Can only be executed on the CPU where the counter is active.
569 * Returns the delta events processed.
571 static u64
572 x86_perf_counter_update(struct perf_counter *counter,
573 struct hw_perf_counter *hwc, int idx)
575 int shift = 64 - x86_pmu.counter_bits;
576 u64 prev_raw_count, new_raw_count;
577 s64 delta;
580 * Careful: an NMI might modify the previous counter value.
582 * Our tactic to handle this is to first atomically read and
583 * exchange a new raw count - then add that new-prev delta
584 * count to the generic counter atomically:
586 again:
587 prev_raw_count = atomic64_read(&hwc->prev_count);
588 rdmsrl(hwc->counter_base + idx, new_raw_count);
590 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
591 new_raw_count) != prev_raw_count)
592 goto again;
595 * Now we have the new raw value and have updated the prev
596 * timestamp already. We can now calculate the elapsed delta
597 * (counter-)time and add that to the generic counter.
599 * Careful, not all hw sign-extends above the physical width
600 * of the count.
602 delta = (new_raw_count << shift) - (prev_raw_count << shift);
603 delta >>= shift;
605 atomic64_add(delta, &counter->count);
606 atomic64_sub(delta, &hwc->period_left);
608 return new_raw_count;
611 static atomic_t active_counters;
612 static DEFINE_MUTEX(pmc_reserve_mutex);
614 static bool reserve_pmc_hardware(void)
616 int i;
618 if (nmi_watchdog == NMI_LOCAL_APIC)
619 disable_lapic_nmi_watchdog();
621 for (i = 0; i < x86_pmu.num_counters; i++) {
622 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
623 goto perfctr_fail;
626 for (i = 0; i < x86_pmu.num_counters; i++) {
627 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
628 goto eventsel_fail;
631 return true;
633 eventsel_fail:
634 for (i--; i >= 0; i--)
635 release_evntsel_nmi(x86_pmu.eventsel + i);
637 i = x86_pmu.num_counters;
639 perfctr_fail:
640 for (i--; i >= 0; i--)
641 release_perfctr_nmi(x86_pmu.perfctr + i);
643 if (nmi_watchdog == NMI_LOCAL_APIC)
644 enable_lapic_nmi_watchdog();
646 return false;
649 static void release_pmc_hardware(void)
651 int i;
653 for (i = 0; i < x86_pmu.num_counters; i++) {
654 release_perfctr_nmi(x86_pmu.perfctr + i);
655 release_evntsel_nmi(x86_pmu.eventsel + i);
658 if (nmi_watchdog == NMI_LOCAL_APIC)
659 enable_lapic_nmi_watchdog();
662 static void hw_perf_counter_destroy(struct perf_counter *counter)
664 if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) {
665 release_pmc_hardware();
666 mutex_unlock(&pmc_reserve_mutex);
670 static inline int x86_pmu_initialized(void)
672 return x86_pmu.handle_irq != NULL;
675 static inline int
676 set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
678 unsigned int cache_type, cache_op, cache_result;
679 u64 config, val;
681 config = attr->config;
683 cache_type = (config >> 0) & 0xff;
684 if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
685 return -EINVAL;
687 cache_op = (config >> 8) & 0xff;
688 if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
689 return -EINVAL;
691 cache_result = (config >> 16) & 0xff;
692 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
693 return -EINVAL;
695 val = hw_cache_event_ids[cache_type][cache_op][cache_result];
697 if (val == 0)
698 return -ENOENT;
700 if (val == -1)
701 return -EINVAL;
703 hwc->config |= val;
705 return 0;
709 * Setup the hardware configuration for a given attr_type
711 static int __hw_perf_counter_init(struct perf_counter *counter)
713 struct perf_counter_attr *attr = &counter->attr;
714 struct hw_perf_counter *hwc = &counter->hw;
715 u64 config;
716 int err;
718 if (!x86_pmu_initialized())
719 return -ENODEV;
721 err = 0;
722 if (!atomic_inc_not_zero(&active_counters)) {
723 mutex_lock(&pmc_reserve_mutex);
724 if (atomic_read(&active_counters) == 0 && !reserve_pmc_hardware())
725 err = -EBUSY;
726 else
727 atomic_inc(&active_counters);
728 mutex_unlock(&pmc_reserve_mutex);
730 if (err)
731 return err;
734 * Generate PMC IRQs:
735 * (keep 'enabled' bit clear for now)
737 hwc->config = ARCH_PERFMON_EVENTSEL_INT;
740 * Count user and OS events unless requested not to.
742 if (!attr->exclude_user)
743 hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
744 if (!attr->exclude_kernel)
745 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
747 if (!hwc->sample_period) {
748 hwc->sample_period = x86_pmu.max_period;
749 hwc->last_period = hwc->sample_period;
750 atomic64_set(&hwc->period_left, hwc->sample_period);
753 counter->destroy = hw_perf_counter_destroy;
756 * Raw event type provide the config in the event structure
758 if (attr->type == PERF_TYPE_RAW) {
759 hwc->config |= x86_pmu.raw_event(attr->config);
760 return 0;
763 if (attr->type == PERF_TYPE_HW_CACHE)
764 return set_ext_hw_attr(hwc, attr);
766 if (attr->config >= x86_pmu.max_events)
767 return -EINVAL;
770 * The generic map:
772 config = x86_pmu.event_map(attr->config);
774 if (config == 0)
775 return -ENOENT;
777 if (config == -1LL)
778 return -EINVAL;
780 hwc->config |= config;
782 return 0;
785 static void p6_pmu_disable_all(void)
787 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
788 u64 val;
790 if (!cpuc->enabled)
791 return;
793 cpuc->enabled = 0;
794 barrier();
796 /* p6 only has one enable register */
797 rdmsrl(MSR_P6_EVNTSEL0, val);
798 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
799 wrmsrl(MSR_P6_EVNTSEL0, val);
802 static void intel_pmu_disable_all(void)
804 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
807 static void amd_pmu_disable_all(void)
809 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
810 int idx;
812 if (!cpuc->enabled)
813 return;
815 cpuc->enabled = 0;
817 * ensure we write the disable before we start disabling the
818 * counters proper, so that amd_pmu_enable_counter() does the
819 * right thing.
821 barrier();
823 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
824 u64 val;
826 if (!test_bit(idx, cpuc->active_mask))
827 continue;
828 rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
829 if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
830 continue;
831 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
832 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
836 void hw_perf_disable(void)
838 if (!x86_pmu_initialized())
839 return;
840 return x86_pmu.disable_all();
843 static void p6_pmu_enable_all(void)
845 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
846 unsigned long val;
848 if (cpuc->enabled)
849 return;
851 cpuc->enabled = 1;
852 barrier();
854 /* p6 only has one enable register */
855 rdmsrl(MSR_P6_EVNTSEL0, val);
856 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
857 wrmsrl(MSR_P6_EVNTSEL0, val);
860 static void intel_pmu_enable_all(void)
862 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
865 static void amd_pmu_enable_all(void)
867 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
868 int idx;
870 if (cpuc->enabled)
871 return;
873 cpuc->enabled = 1;
874 barrier();
876 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
877 struct perf_counter *counter = cpuc->counters[idx];
878 u64 val;
880 if (!test_bit(idx, cpuc->active_mask))
881 continue;
883 val = counter->hw.config;
884 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
885 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
889 void hw_perf_enable(void)
891 if (!x86_pmu_initialized())
892 return;
893 x86_pmu.enable_all();
896 static inline u64 intel_pmu_get_status(void)
898 u64 status;
900 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
902 return status;
905 static inline void intel_pmu_ack_status(u64 ack)
907 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
910 static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
912 (void)checking_wrmsrl(hwc->config_base + idx,
913 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
916 static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
918 (void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
921 static inline void
922 intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
924 int idx = __idx - X86_PMC_IDX_FIXED;
925 u64 ctrl_val, mask;
927 mask = 0xfULL << (idx * 4);
929 rdmsrl(hwc->config_base, ctrl_val);
930 ctrl_val &= ~mask;
931 (void)checking_wrmsrl(hwc->config_base, ctrl_val);
934 static inline void
935 p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
937 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
938 u64 val = P6_NOP_COUNTER;
940 if (cpuc->enabled)
941 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
943 (void)checking_wrmsrl(hwc->config_base + idx, val);
946 static inline void
947 intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
949 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
950 intel_pmu_disable_fixed(hwc, idx);
951 return;
954 x86_pmu_disable_counter(hwc, idx);
957 static inline void
958 amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
960 x86_pmu_disable_counter(hwc, idx);
963 static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
966 * Set the next IRQ period, based on the hwc->period_left value.
967 * To be called with the counter disabled in hw:
969 static int
970 x86_perf_counter_set_period(struct perf_counter *counter,
971 struct hw_perf_counter *hwc, int idx)
973 s64 left = atomic64_read(&hwc->period_left);
974 s64 period = hwc->sample_period;
975 int err, ret = 0;
978 * If we are way outside a reasoable range then just skip forward:
980 if (unlikely(left <= -period)) {
981 left = period;
982 atomic64_set(&hwc->period_left, left);
983 hwc->last_period = period;
984 ret = 1;
987 if (unlikely(left <= 0)) {
988 left += period;
989 atomic64_set(&hwc->period_left, left);
990 hwc->last_period = period;
991 ret = 1;
994 * Quirk: certain CPUs dont like it if just 1 event is left:
996 if (unlikely(left < 2))
997 left = 2;
999 if (left > x86_pmu.max_period)
1000 left = x86_pmu.max_period;
1002 per_cpu(prev_left[idx], smp_processor_id()) = left;
1005 * The hw counter starts counting from this counter offset,
1006 * mark it to be able to extra future deltas:
1008 atomic64_set(&hwc->prev_count, (u64)-left);
1010 err = checking_wrmsrl(hwc->counter_base + idx,
1011 (u64)(-left) & x86_pmu.counter_mask);
1013 perf_counter_update_userpage(counter);
1015 return ret;
1018 static inline void
1019 intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
1021 int idx = __idx - X86_PMC_IDX_FIXED;
1022 u64 ctrl_val, bits, mask;
1023 int err;
1026 * Enable IRQ generation (0x8),
1027 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
1028 * if requested:
1030 bits = 0x8ULL;
1031 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
1032 bits |= 0x2;
1033 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
1034 bits |= 0x1;
1035 bits <<= (idx * 4);
1036 mask = 0xfULL << (idx * 4);
1038 rdmsrl(hwc->config_base, ctrl_val);
1039 ctrl_val &= ~mask;
1040 ctrl_val |= bits;
1041 err = checking_wrmsrl(hwc->config_base, ctrl_val);
1044 static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
1046 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
1047 u64 val;
1049 val = hwc->config;
1050 if (cpuc->enabled)
1051 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1053 (void)checking_wrmsrl(hwc->config_base + idx, val);
1057 static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
1059 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
1060 intel_pmu_enable_fixed(hwc, idx);
1061 return;
1064 x86_pmu_enable_counter(hwc, idx);
1067 static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
1069 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
1071 if (cpuc->enabled)
1072 x86_pmu_enable_counter(hwc, idx);
1075 static int
1076 fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
1078 unsigned int event;
1080 if (!x86_pmu.num_counters_fixed)
1081 return -1;
1083 event = hwc->config & ARCH_PERFMON_EVENT_MASK;
1085 if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
1086 return X86_PMC_IDX_FIXED_INSTRUCTIONS;
1087 if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
1088 return X86_PMC_IDX_FIXED_CPU_CYCLES;
1089 if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
1090 return X86_PMC_IDX_FIXED_BUS_CYCLES;
1092 return -1;
1096 * Find a PMC slot for the freshly enabled / scheduled in counter:
1098 static int x86_pmu_enable(struct perf_counter *counter)
1100 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
1101 struct hw_perf_counter *hwc = &counter->hw;
1102 int idx;
1104 idx = fixed_mode_idx(counter, hwc);
1105 if (idx >= 0) {
1107 * Try to get the fixed counter, if that is already taken
1108 * then try to get a generic counter:
1110 if (test_and_set_bit(idx, cpuc->used_mask))
1111 goto try_generic;
1113 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
1115 * We set it so that counter_base + idx in wrmsr/rdmsr maps to
1116 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
1118 hwc->counter_base =
1119 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
1120 hwc->idx = idx;
1121 } else {
1122 idx = hwc->idx;
1123 /* Try to get the previous generic counter again */
1124 if (test_and_set_bit(idx, cpuc->used_mask)) {
1125 try_generic:
1126 idx = find_first_zero_bit(cpuc->used_mask,
1127 x86_pmu.num_counters);
1128 if (idx == x86_pmu.num_counters)
1129 return -EAGAIN;
1131 set_bit(idx, cpuc->used_mask);
1132 hwc->idx = idx;
1134 hwc->config_base = x86_pmu.eventsel;
1135 hwc->counter_base = x86_pmu.perfctr;
1138 perf_counters_lapic_init();
1140 x86_pmu.disable(hwc, idx);
1142 cpuc->counters[idx] = counter;
1143 set_bit(idx, cpuc->active_mask);
1145 x86_perf_counter_set_period(counter, hwc, idx);
1146 x86_pmu.enable(hwc, idx);
1148 perf_counter_update_userpage(counter);
1150 return 0;
1153 static void x86_pmu_unthrottle(struct perf_counter *counter)
1155 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
1156 struct hw_perf_counter *hwc = &counter->hw;
1158 if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
1159 cpuc->counters[hwc->idx] != counter))
1160 return;
1162 x86_pmu.enable(hwc, hwc->idx);
1165 void perf_counter_print_debug(void)
1167 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
1168 struct cpu_hw_counters *cpuc;
1169 unsigned long flags;
1170 int cpu, idx;
1172 if (!x86_pmu.num_counters)
1173 return;
1175 local_irq_save(flags);
1177 cpu = smp_processor_id();
1178 cpuc = &per_cpu(cpu_hw_counters, cpu);
1180 if (x86_pmu.version >= 2) {
1181 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
1182 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1183 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
1184 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
1186 pr_info("\n");
1187 pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
1188 pr_info("CPU#%d: status: %016llx\n", cpu, status);
1189 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
1190 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
1192 pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask);
1194 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1195 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
1196 rdmsrl(x86_pmu.perfctr + idx, pmc_count);
1198 prev_left = per_cpu(prev_left[idx], cpu);
1200 pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n",
1201 cpu, idx, pmc_ctrl);
1202 pr_info("CPU#%d: gen-PMC%d count: %016llx\n",
1203 cpu, idx, pmc_count);
1204 pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
1205 cpu, idx, prev_left);
1207 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
1208 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
1210 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
1211 cpu, idx, pmc_count);
1213 local_irq_restore(flags);
1216 static void x86_pmu_disable(struct perf_counter *counter)
1218 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
1219 struct hw_perf_counter *hwc = &counter->hw;
1220 int idx = hwc->idx;
1223 * Must be done before we disable, otherwise the nmi handler
1224 * could reenable again:
1226 clear_bit(idx, cpuc->active_mask);
1227 x86_pmu.disable(hwc, idx);
1230 * Make sure the cleared pointer becomes visible before we
1231 * (potentially) free the counter:
1233 barrier();
1236 * Drain the remaining delta count out of a counter
1237 * that we are disabling:
1239 x86_perf_counter_update(counter, hwc, idx);
1240 cpuc->counters[idx] = NULL;
1241 clear_bit(idx, cpuc->used_mask);
1243 perf_counter_update_userpage(counter);
1247 * Save and restart an expired counter. Called by NMI contexts,
1248 * so it has to be careful about preempting normal counter ops:
1250 static int intel_pmu_save_and_restart(struct perf_counter *counter)
1252 struct hw_perf_counter *hwc = &counter->hw;
1253 int idx = hwc->idx;
1254 int ret;
1256 x86_perf_counter_update(counter, hwc, idx);
1257 ret = x86_perf_counter_set_period(counter, hwc, idx);
1259 if (counter->state == PERF_COUNTER_STATE_ACTIVE)
1260 intel_pmu_enable_counter(hwc, idx);
1262 return ret;
1265 static void intel_pmu_reset(void)
1267 unsigned long flags;
1268 int idx;
1270 if (!x86_pmu.num_counters)
1271 return;
1273 local_irq_save(flags);
1275 printk("clearing PMU state on CPU#%d\n", smp_processor_id());
1277 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1278 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
1279 checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
1281 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
1282 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
1285 local_irq_restore(flags);
1288 static int p6_pmu_handle_irq(struct pt_regs *regs)
1290 struct perf_sample_data data;
1291 struct cpu_hw_counters *cpuc;
1292 struct perf_counter *counter;
1293 struct hw_perf_counter *hwc;
1294 int idx, handled = 0;
1295 u64 val;
1297 data.regs = regs;
1298 data.addr = 0;
1300 cpuc = &__get_cpu_var(cpu_hw_counters);
1302 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1303 if (!test_bit(idx, cpuc->active_mask))
1304 continue;
1306 counter = cpuc->counters[idx];
1307 hwc = &counter->hw;
1309 val = x86_perf_counter_update(counter, hwc, idx);
1310 if (val & (1ULL << (x86_pmu.counter_bits - 1)))
1311 continue;
1314 * counter overflow
1316 handled = 1;
1317 data.period = counter->hw.last_period;
1319 if (!x86_perf_counter_set_period(counter, hwc, idx))
1320 continue;
1322 if (perf_counter_overflow(counter, 1, &data))
1323 p6_pmu_disable_counter(hwc, idx);
1326 if (handled)
1327 inc_irq_stat(apic_perf_irqs);
1329 return handled;
1333 * This handler is triggered by the local APIC, so the APIC IRQ handling
1334 * rules apply:
1336 static int intel_pmu_handle_irq(struct pt_regs *regs)
1338 struct perf_sample_data data;
1339 struct cpu_hw_counters *cpuc;
1340 int bit, loops;
1341 u64 ack, status;
1343 data.regs = regs;
1344 data.addr = 0;
1346 cpuc = &__get_cpu_var(cpu_hw_counters);
1348 perf_disable();
1349 status = intel_pmu_get_status();
1350 if (!status) {
1351 perf_enable();
1352 return 0;
1355 loops = 0;
1356 again:
1357 if (++loops > 100) {
1358 WARN_ONCE(1, "perfcounters: irq loop stuck!\n");
1359 perf_counter_print_debug();
1360 intel_pmu_reset();
1361 perf_enable();
1362 return 1;
1365 inc_irq_stat(apic_perf_irqs);
1366 ack = status;
1367 for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
1368 struct perf_counter *counter = cpuc->counters[bit];
1370 clear_bit(bit, (unsigned long *) &status);
1371 if (!test_bit(bit, cpuc->active_mask))
1372 continue;
1374 if (!intel_pmu_save_and_restart(counter))
1375 continue;
1377 data.period = counter->hw.last_period;
1379 if (perf_counter_overflow(counter, 1, &data))
1380 intel_pmu_disable_counter(&counter->hw, bit);
1383 intel_pmu_ack_status(ack);
1386 * Repeat if there is more work to be done:
1388 status = intel_pmu_get_status();
1389 if (status)
1390 goto again;
1392 perf_enable();
1394 return 1;
1397 static int amd_pmu_handle_irq(struct pt_regs *regs)
1399 struct perf_sample_data data;
1400 struct cpu_hw_counters *cpuc;
1401 struct perf_counter *counter;
1402 struct hw_perf_counter *hwc;
1403 int idx, handled = 0;
1404 u64 val;
1406 data.regs = regs;
1407 data.addr = 0;
1409 cpuc = &__get_cpu_var(cpu_hw_counters);
1411 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1412 if (!test_bit(idx, cpuc->active_mask))
1413 continue;
1415 counter = cpuc->counters[idx];
1416 hwc = &counter->hw;
1418 val = x86_perf_counter_update(counter, hwc, idx);
1419 if (val & (1ULL << (x86_pmu.counter_bits - 1)))
1420 continue;
1423 * counter overflow
1425 handled = 1;
1426 data.period = counter->hw.last_period;
1428 if (!x86_perf_counter_set_period(counter, hwc, idx))
1429 continue;
1431 if (perf_counter_overflow(counter, 1, &data))
1432 amd_pmu_disable_counter(hwc, idx);
1435 if (handled)
1436 inc_irq_stat(apic_perf_irqs);
1438 return handled;
1441 void smp_perf_pending_interrupt(struct pt_regs *regs)
1443 irq_enter();
1444 ack_APIC_irq();
1445 inc_irq_stat(apic_pending_irqs);
1446 perf_counter_do_pending();
1447 irq_exit();
1450 void set_perf_counter_pending(void)
1452 apic->send_IPI_self(LOCAL_PENDING_VECTOR);
1455 void perf_counters_lapic_init(void)
1457 if (!x86_pmu_initialized())
1458 return;
1461 * Always use NMI for PMU
1463 apic_write(APIC_LVTPC, APIC_DM_NMI);
1466 static int __kprobes
1467 perf_counter_nmi_handler(struct notifier_block *self,
1468 unsigned long cmd, void *__args)
1470 struct die_args *args = __args;
1471 struct pt_regs *regs;
1473 if (!atomic_read(&active_counters))
1474 return NOTIFY_DONE;
1476 switch (cmd) {
1477 case DIE_NMI:
1478 case DIE_NMI_IPI:
1479 break;
1481 default:
1482 return NOTIFY_DONE;
1485 regs = args->regs;
1487 apic_write(APIC_LVTPC, APIC_DM_NMI);
1489 * Can't rely on the handled return value to say it was our NMI, two
1490 * counters could trigger 'simultaneously' raising two back-to-back NMIs.
1492 * If the first NMI handles both, the latter will be empty and daze
1493 * the CPU.
1495 x86_pmu.handle_irq(regs);
1497 return NOTIFY_STOP;
1500 static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
1501 .notifier_call = perf_counter_nmi_handler,
1502 .next = NULL,
1503 .priority = 1
1506 static struct x86_pmu p6_pmu = {
1507 .name = "p6",
1508 .handle_irq = p6_pmu_handle_irq,
1509 .disable_all = p6_pmu_disable_all,
1510 .enable_all = p6_pmu_enable_all,
1511 .enable = p6_pmu_enable_counter,
1512 .disable = p6_pmu_disable_counter,
1513 .eventsel = MSR_P6_EVNTSEL0,
1514 .perfctr = MSR_P6_PERFCTR0,
1515 .event_map = p6_pmu_event_map,
1516 .raw_event = p6_pmu_raw_event,
1517 .max_events = ARRAY_SIZE(p6_perfmon_event_map),
1518 .max_period = (1ULL << 31) - 1,
1519 .version = 0,
1520 .num_counters = 2,
1522 * Counters have 40 bits implemented. However they are designed such
1523 * that bits [32-39] are sign extensions of bit 31. As such the
1524 * effective width of a counter for P6-like PMU is 32 bits only.
1526 * See IA-32 Intel Architecture Software developer manual Vol 3B
1528 .counter_bits = 32,
1529 .counter_mask = (1ULL << 32) - 1,
1532 static struct x86_pmu intel_pmu = {
1533 .name = "Intel",
1534 .handle_irq = intel_pmu_handle_irq,
1535 .disable_all = intel_pmu_disable_all,
1536 .enable_all = intel_pmu_enable_all,
1537 .enable = intel_pmu_enable_counter,
1538 .disable = intel_pmu_disable_counter,
1539 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
1540 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
1541 .event_map = intel_pmu_event_map,
1542 .raw_event = intel_pmu_raw_event,
1543 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
1545 * Intel PMCs cannot be accessed sanely above 32 bit width,
1546 * so we install an artificial 1<<31 period regardless of
1547 * the generic counter period:
1549 .max_period = (1ULL << 31) - 1,
1552 static struct x86_pmu amd_pmu = {
1553 .name = "AMD",
1554 .handle_irq = amd_pmu_handle_irq,
1555 .disable_all = amd_pmu_disable_all,
1556 .enable_all = amd_pmu_enable_all,
1557 .enable = amd_pmu_enable_counter,
1558 .disable = amd_pmu_disable_counter,
1559 .eventsel = MSR_K7_EVNTSEL0,
1560 .perfctr = MSR_K7_PERFCTR0,
1561 .event_map = amd_pmu_event_map,
1562 .raw_event = amd_pmu_raw_event,
1563 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
1564 .num_counters = 4,
1565 .counter_bits = 48,
1566 .counter_mask = (1ULL << 48) - 1,
1567 /* use highest bit to detect overflow */
1568 .max_period = (1ULL << 47) - 1,
1571 static int p6_pmu_init(void)
1573 switch (boot_cpu_data.x86_model) {
1574 case 1:
1575 case 3: /* Pentium Pro */
1576 case 5:
1577 case 6: /* Pentium II */
1578 case 7:
1579 case 8:
1580 case 11: /* Pentium III */
1581 break;
1582 case 9:
1583 case 13:
1584 /* Pentium M */
1585 break;
1586 default:
1587 pr_cont("unsupported p6 CPU model %d ",
1588 boot_cpu_data.x86_model);
1589 return -ENODEV;
1592 if (!cpu_has_apic) {
1593 pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
1594 return -ENODEV;
1597 x86_pmu = p6_pmu;
1599 return 0;
1602 static int intel_pmu_init(void)
1604 union cpuid10_edx edx;
1605 union cpuid10_eax eax;
1606 unsigned int unused;
1607 unsigned int ebx;
1608 int version;
1610 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
1611 /* check for P6 processor family */
1612 if (boot_cpu_data.x86 == 6) {
1613 return p6_pmu_init();
1614 } else {
1615 return -ENODEV;
1620 * Check whether the Architectural PerfMon supports
1621 * Branch Misses Retired Event or not.
1623 cpuid(10, &eax.full, &ebx, &unused, &edx.full);
1624 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
1625 return -ENODEV;
1627 version = eax.split.version_id;
1628 if (version < 2)
1629 return -ENODEV;
1631 x86_pmu = intel_pmu;
1632 x86_pmu.version = version;
1633 x86_pmu.num_counters = eax.split.num_counters;
1634 x86_pmu.counter_bits = eax.split.bit_width;
1635 x86_pmu.counter_mask = (1ULL << eax.split.bit_width) - 1;
1638 * Quirk: v2 perfmon does not report fixed-purpose counters, so
1639 * assume at least 3 counters:
1641 x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
1644 * Install the hw-cache-events table:
1646 switch (boot_cpu_data.x86_model) {
1647 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
1648 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
1649 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
1650 case 29: /* six-core 45 nm xeon "Dunnington" */
1651 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
1652 sizeof(hw_cache_event_ids));
1654 pr_cont("Core2 events, ");
1655 break;
1656 default:
1657 case 26:
1658 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
1659 sizeof(hw_cache_event_ids));
1661 pr_cont("Nehalem/Corei7 events, ");
1662 break;
1663 case 28:
1664 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
1665 sizeof(hw_cache_event_ids));
1667 pr_cont("Atom events, ");
1668 break;
1670 return 0;
1673 static int amd_pmu_init(void)
1675 /* Performance-monitoring supported from K7 and later: */
1676 if (boot_cpu_data.x86 < 6)
1677 return -ENODEV;
1679 x86_pmu = amd_pmu;
1681 /* Events are common for all AMDs */
1682 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
1683 sizeof(hw_cache_event_ids));
1685 return 0;
1688 void __init init_hw_perf_counters(void)
1690 int err;
1692 pr_info("Performance Counters: ");
1694 switch (boot_cpu_data.x86_vendor) {
1695 case X86_VENDOR_INTEL:
1696 err = intel_pmu_init();
1697 break;
1698 case X86_VENDOR_AMD:
1699 err = amd_pmu_init();
1700 break;
1701 default:
1702 return;
1704 if (err != 0) {
1705 pr_cont("no PMU driver, software counters only.\n");
1706 return;
1709 pr_cont("%s PMU driver.\n", x86_pmu.name);
1711 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
1712 WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
1713 x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
1714 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1716 perf_counter_mask = (1 << x86_pmu.num_counters) - 1;
1717 perf_max_counters = x86_pmu.num_counters;
1719 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
1720 WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
1721 x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
1722 x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
1725 perf_counter_mask |=
1726 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
1727 x86_pmu.intel_ctrl = perf_counter_mask;
1729 perf_counters_lapic_init();
1730 register_die_notifier(&perf_counter_nmi_notifier);
1732 pr_info("... version: %d\n", x86_pmu.version);
1733 pr_info("... bit width: %d\n", x86_pmu.counter_bits);
1734 pr_info("... generic counters: %d\n", x86_pmu.num_counters);
1735 pr_info("... value mask: %016Lx\n", x86_pmu.counter_mask);
1736 pr_info("... max period: %016Lx\n", x86_pmu.max_period);
1737 pr_info("... fixed-purpose counters: %d\n", x86_pmu.num_counters_fixed);
1738 pr_info("... counter mask: %016Lx\n", perf_counter_mask);
1741 static inline void x86_pmu_read(struct perf_counter *counter)
1743 x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
1746 static const struct pmu pmu = {
1747 .enable = x86_pmu_enable,
1748 .disable = x86_pmu_disable,
1749 .read = x86_pmu_read,
1750 .unthrottle = x86_pmu_unthrottle,
1753 const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
1755 int err;
1757 err = __hw_perf_counter_init(counter);
1758 if (err)
1759 return ERR_PTR(err);
1761 return &pmu;
1765 * callchain support
1768 static inline
1769 void callchain_store(struct perf_callchain_entry *entry, u64 ip)
1771 if (entry->nr < PERF_MAX_STACK_DEPTH)
1772 entry->ip[entry->nr++] = ip;
1775 static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry);
1776 static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry);
1777 static DEFINE_PER_CPU(int, in_nmi_frame);
1780 static void
1781 backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
1783 /* Ignore warnings */
1786 static void backtrace_warning(void *data, char *msg)
1788 /* Ignore warnings */
1791 static int backtrace_stack(void *data, char *name)
1793 per_cpu(in_nmi_frame, smp_processor_id()) =
1794 x86_is_stack_id(NMI_STACK, name);
1796 return 0;
1799 static void backtrace_address(void *data, unsigned long addr, int reliable)
1801 struct perf_callchain_entry *entry = data;
1803 if (per_cpu(in_nmi_frame, smp_processor_id()))
1804 return;
1806 if (reliable)
1807 callchain_store(entry, addr);
1810 static const struct stacktrace_ops backtrace_ops = {
1811 .warning = backtrace_warning,
1812 .warning_symbol = backtrace_warning_symbol,
1813 .stack = backtrace_stack,
1814 .address = backtrace_address,
1817 #include "../dumpstack.h"
1819 static void
1820 perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
1822 callchain_store(entry, PERF_CONTEXT_KERNEL);
1823 callchain_store(entry, regs->ip);
1825 dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
1829 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
1831 static unsigned long
1832 copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
1834 unsigned long offset, addr = (unsigned long)from;
1835 int type = in_nmi() ? KM_NMI : KM_IRQ0;
1836 unsigned long size, len = 0;
1837 struct page *page;
1838 void *map;
1839 int ret;
1841 do {
1842 ret = __get_user_pages_fast(addr, 1, 0, &page);
1843 if (!ret)
1844 break;
1846 offset = addr & (PAGE_SIZE - 1);
1847 size = min(PAGE_SIZE - offset, n - len);
1849 map = kmap_atomic(page, type);
1850 memcpy(to, map+offset, size);
1851 kunmap_atomic(map, type);
1852 put_page(page);
1854 len += size;
1855 to += size;
1856 addr += size;
1858 } while (len < n);
1860 return len;
1863 static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
1865 unsigned long bytes;
1867 bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
1869 return bytes == sizeof(*frame);
1872 static void
1873 perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
1875 struct stack_frame frame;
1876 const void __user *fp;
1878 if (!user_mode(regs))
1879 regs = task_pt_regs(current);
1881 fp = (void __user *)regs->bp;
1883 callchain_store(entry, PERF_CONTEXT_USER);
1884 callchain_store(entry, regs->ip);
1886 while (entry->nr < PERF_MAX_STACK_DEPTH) {
1887 frame.next_frame = NULL;
1888 frame.return_address = 0;
1890 if (!copy_stack_frame(fp, &frame))
1891 break;
1893 if ((unsigned long)fp < regs->sp)
1894 break;
1896 callchain_store(entry, frame.return_address);
1897 fp = frame.next_frame;
1901 static void
1902 perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
1904 int is_user;
1906 if (!regs)
1907 return;
1909 is_user = user_mode(regs);
1911 if (!current || current->pid == 0)
1912 return;
1914 if (is_user && current->state != TASK_RUNNING)
1915 return;
1917 if (!is_user)
1918 perf_callchain_kernel(regs, entry);
1920 if (current->mm)
1921 perf_callchain_user(regs, entry);
1924 struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1926 struct perf_callchain_entry *entry;
1928 if (in_nmi())
1929 entry = &__get_cpu_var(nmi_entry);
1930 else
1931 entry = &__get_cpu_var(irq_entry);
1933 entry->nr = 0;
1935 perf_do_callchain(regs, entry);
1937 return entry;