[PATCH] x86: Clean up x86 NMI sysctls
[usb.git] / arch / i386 / kernel / nmi.c
blob0fc4997fb14330d94d429d769867902102c37fe4
1 /*
2 * linux/arch/i386/nmi.c
4 * NMI watchdog support on APIC systems
6 * Started by Ingo Molnar <mingo@redhat.com>
8 * Fixes:
9 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
10 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
11 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
12 * Pavel Machek and
13 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
16 #include <linux/config.h>
17 #include <linux/delay.h>
18 #include <linux/interrupt.h>
19 #include <linux/module.h>
20 #include <linux/nmi.h>
21 #include <linux/sysdev.h>
22 #include <linux/sysctl.h>
23 #include <linux/percpu.h>
24 #include <linux/dmi.h>
25 #include <linux/kprobes.h>
27 #include <asm/smp.h>
28 #include <asm/nmi.h>
29 #include <asm/kdebug.h>
30 #include <asm/intel_arch_perfmon.h>
32 #include "mach_traps.h"
34 int unknown_nmi_panic;
35 int nmi_watchdog_enabled;
37 /* perfctr_nmi_owner tracks the ownership of the perfctr registers:
38 * evtsel_nmi_owner tracks the ownership of the event selection
39 * - different performance counters/ event selection may be reserved for
40 * different subsystems this reservation system just tries to coordinate
41 * things a little
43 static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner);
44 static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]);
46 /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
47 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
49 #define NMI_MAX_COUNTER_BITS 66
51 /* nmi_active:
52 * >0: the lapic NMI watchdog is active, but can be disabled
53 * <0: the lapic NMI watchdog has not been set up, and cannot
54 * be enabled
55 * 0: the lapic NMI watchdog is disabled, but can be enabled
57 atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
59 unsigned int nmi_watchdog = NMI_DEFAULT;
60 static unsigned int nmi_hz = HZ;
62 struct nmi_watchdog_ctlblk {
63 int enabled;
64 u64 check_bit;
65 unsigned int cccr_msr;
66 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
67 unsigned int evntsel_msr; /* the MSR to select the events to handle */
69 static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
71 /* local prototypes */
72 static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
74 extern void show_registers(struct pt_regs *regs);
75 extern int unknown_nmi_panic;
77 /* converts an msr to an appropriate reservation bit */
78 static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
80 /* returns the bit offset of the performance counter register */
81 switch (boot_cpu_data.x86_vendor) {
82 case X86_VENDOR_AMD:
83 return (msr - MSR_K7_PERFCTR0);
84 case X86_VENDOR_INTEL:
85 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
86 return (msr - MSR_ARCH_PERFMON_PERFCTR0);
88 switch (boot_cpu_data.x86) {
89 case 6:
90 return (msr - MSR_P6_PERFCTR0);
91 case 15:
92 return (msr - MSR_P4_BPU_PERFCTR0);
95 return 0;
98 /* converts an msr to an appropriate reservation bit */
99 static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
101 /* returns the bit offset of the event selection register */
102 switch (boot_cpu_data.x86_vendor) {
103 case X86_VENDOR_AMD:
104 return (msr - MSR_K7_EVNTSEL0);
105 case X86_VENDOR_INTEL:
106 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
107 return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
109 switch (boot_cpu_data.x86) {
110 case 6:
111 return (msr - MSR_P6_EVNTSEL0);
112 case 15:
113 return (msr - MSR_P4_BSU_ESCR0);
116 return 0;
119 /* checks for a bit availability (hack for oprofile) */
120 int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
122 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
124 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
127 /* checks the an msr for availability */
128 int avail_to_resrv_perfctr_nmi(unsigned int msr)
130 unsigned int counter;
132 counter = nmi_perfctr_msr_to_bit(msr);
133 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
135 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
138 int reserve_perfctr_nmi(unsigned int msr)
140 unsigned int counter;
142 counter = nmi_perfctr_msr_to_bit(msr);
143 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
145 if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner)))
146 return 1;
147 return 0;
150 void release_perfctr_nmi(unsigned int msr)
152 unsigned int counter;
154 counter = nmi_perfctr_msr_to_bit(msr);
155 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
157 clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner));
160 int reserve_evntsel_nmi(unsigned int msr)
162 unsigned int counter;
164 counter = nmi_evntsel_msr_to_bit(msr);
165 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
167 if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]))
168 return 1;
169 return 0;
172 void release_evntsel_nmi(unsigned int msr)
174 unsigned int counter;
176 counter = nmi_evntsel_msr_to_bit(msr);
177 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
179 clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]);
182 static __cpuinit inline int nmi_known_cpu(void)
184 switch (boot_cpu_data.x86_vendor) {
185 case X86_VENDOR_AMD:
186 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
187 case X86_VENDOR_INTEL:
188 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
189 return 1;
190 else
191 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
193 return 0;
196 #ifdef CONFIG_SMP
197 /* The performance counters used by NMI_LOCAL_APIC don't trigger when
198 * the CPU is idle. To make sure the NMI watchdog really ticks on all
199 * CPUs during the test make them busy.
201 static __init void nmi_cpu_busy(void *data)
203 volatile int *endflag = data;
204 local_irq_enable_in_hardirq();
205 /* Intentionally don't use cpu_relax here. This is
206 to make sure that the performance counter really ticks,
207 even if there is a simulator or similar that catches the
208 pause instruction. On a real HT machine this is fine because
209 all other CPUs are busy with "useless" delay loops and don't
210 care if they get somewhat less cycles. */
211 while (*endflag == 0)
212 barrier();
214 #endif
216 static int __init check_nmi_watchdog(void)
218 volatile int endflag = 0;
219 unsigned int *prev_nmi_count;
220 int cpu;
222 /* Enable NMI watchdog for newer systems.
223 Actually it should be safe for most systems before 2004 too except
224 for some IBM systems that corrupt registers when NMI happens
225 during SMM. Unfortunately we don't have more exact information
226 on these and use this coarse check. */
227 if (nmi_watchdog == NMI_DEFAULT && dmi_get_year(DMI_BIOS_DATE) >= 2004)
228 nmi_watchdog = NMI_LOCAL_APIC;
230 if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
231 return 0;
233 if (!atomic_read(&nmi_active))
234 return 0;
236 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
237 if (!prev_nmi_count)
238 return -1;
240 printk(KERN_INFO "Testing NMI watchdog ... ");
242 if (nmi_watchdog == NMI_LOCAL_APIC)
243 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
245 for_each_possible_cpu(cpu)
246 prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
247 local_irq_enable();
248 mdelay((10*1000)/nmi_hz); // wait 10 ticks
250 for_each_possible_cpu(cpu) {
251 #ifdef CONFIG_SMP
252 /* Check cpu_callin_map here because that is set
253 after the timer is started. */
254 if (!cpu_isset(cpu, cpu_callin_map))
255 continue;
256 #endif
257 if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
258 continue;
259 if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
260 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
261 cpu,
262 prev_nmi_count[cpu],
263 nmi_count(cpu));
264 per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0;
265 atomic_dec(&nmi_active);
268 if (!atomic_read(&nmi_active)) {
269 kfree(prev_nmi_count);
270 atomic_set(&nmi_active, -1);
271 return -1;
273 endflag = 1;
274 printk("OK.\n");
276 /* now that we know it works we can reduce NMI frequency to
277 something more reasonable; makes a difference in some configs */
278 if (nmi_watchdog == NMI_LOCAL_APIC) {
279 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
281 nmi_hz = 1;
283 * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter
284 * are writable, with higher bits sign extending from bit 31.
285 * So, we can only program the counter with 31 bit values and
286 * 32nd bit should be 1, for 33.. to be 1.
287 * Find the appropriate nmi_hz
289 if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 &&
290 ((u64)cpu_khz * 1000) > 0x7fffffffULL) {
291 u64 count = (u64)cpu_khz * 1000;
292 do_div(count, 0x7fffffffUL);
293 nmi_hz = count + 1;
297 kfree(prev_nmi_count);
298 return 0;
300 /* This needs to happen later in boot so counters are working */
301 late_initcall(check_nmi_watchdog);
303 static int __init setup_nmi_watchdog(char *str)
305 int nmi;
307 get_option(&str, &nmi);
309 if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
310 return 0;
312 * If any other x86 CPU has a local APIC, then
313 * please test the NMI stuff there and send me the
314 * missing bits. Right now Intel P6/P4 and AMD K7 only.
316 if ((nmi == NMI_LOCAL_APIC) && (nmi_known_cpu() == 0))
317 return 0; /* no lapic support */
318 nmi_watchdog = nmi;
319 return 1;
322 __setup("nmi_watchdog=", setup_nmi_watchdog);
324 static void disable_lapic_nmi_watchdog(void)
326 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
328 if (atomic_read(&nmi_active) <= 0)
329 return;
331 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
333 BUG_ON(atomic_read(&nmi_active) != 0);
336 static void enable_lapic_nmi_watchdog(void)
338 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
340 /* are we already enabled */
341 if (atomic_read(&nmi_active) != 0)
342 return;
344 /* are we lapic aware */
345 if (nmi_known_cpu() <= 0)
346 return;
348 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
349 touch_nmi_watchdog();
352 void disable_timer_nmi_watchdog(void)
354 BUG_ON(nmi_watchdog != NMI_IO_APIC);
356 if (atomic_read(&nmi_active) <= 0)
357 return;
359 disable_irq(0);
360 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
362 BUG_ON(atomic_read(&nmi_active) != 0);
365 void enable_timer_nmi_watchdog(void)
367 BUG_ON(nmi_watchdog != NMI_IO_APIC);
369 if (atomic_read(&nmi_active) == 0) {
370 touch_nmi_watchdog();
371 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
372 enable_irq(0);
376 #ifdef CONFIG_PM
378 static int nmi_pm_active; /* nmi_active before suspend */
380 static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
382 /* only CPU0 goes here, other CPUs should be offline */
383 nmi_pm_active = atomic_read(&nmi_active);
384 stop_apic_nmi_watchdog(NULL);
385 BUG_ON(atomic_read(&nmi_active) != 0);
386 return 0;
389 static int lapic_nmi_resume(struct sys_device *dev)
391 /* only CPU0 goes here, other CPUs should be offline */
392 if (nmi_pm_active > 0) {
393 setup_apic_nmi_watchdog(NULL);
394 touch_nmi_watchdog();
396 return 0;
400 static struct sysdev_class nmi_sysclass = {
401 set_kset_name("lapic_nmi"),
402 .resume = lapic_nmi_resume,
403 .suspend = lapic_nmi_suspend,
406 static struct sys_device device_lapic_nmi = {
407 .id = 0,
408 .cls = &nmi_sysclass,
411 static int __init init_lapic_nmi_sysfs(void)
413 int error;
415 /* should really be a BUG_ON but b/c this is an
416 * init call, it just doesn't work. -dcz
418 if (nmi_watchdog != NMI_LOCAL_APIC)
419 return 0;
421 if ( atomic_read(&nmi_active) < 0 )
422 return 0;
424 error = sysdev_class_register(&nmi_sysclass);
425 if (!error)
426 error = sysdev_register(&device_lapic_nmi);
427 return error;
429 /* must come after the local APIC's device_initcall() */
430 late_initcall(init_lapic_nmi_sysfs);
432 #endif /* CONFIG_PM */
435 * Activate the NMI watchdog via the local APIC.
436 * Original code written by Keith Owens.
439 static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr)
441 u64 count = (u64)cpu_khz * 1000;
443 do_div(count, nmi_hz);
444 if(descr)
445 Dprintk("setting %s to -0x%08Lx\n", descr, count);
446 wrmsrl(perfctr_msr, 0 - count);
449 /* Note that these events don't tick when the CPU idles. This means
450 the frequency varies with CPU load. */
452 #define K7_EVNTSEL_ENABLE (1 << 22)
453 #define K7_EVNTSEL_INT (1 << 20)
454 #define K7_EVNTSEL_OS (1 << 17)
455 #define K7_EVNTSEL_USR (1 << 16)
456 #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
457 #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
459 static int setup_k7_watchdog(void)
461 unsigned int perfctr_msr, evntsel_msr;
462 unsigned int evntsel;
463 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
465 perfctr_msr = MSR_K7_PERFCTR0;
466 evntsel_msr = MSR_K7_EVNTSEL0;
467 if (!reserve_perfctr_nmi(perfctr_msr))
468 goto fail;
470 if (!reserve_evntsel_nmi(evntsel_msr))
471 goto fail1;
473 wrmsrl(perfctr_msr, 0UL);
475 evntsel = K7_EVNTSEL_INT
476 | K7_EVNTSEL_OS
477 | K7_EVNTSEL_USR
478 | K7_NMI_EVENT;
480 /* setup the timer */
481 wrmsr(evntsel_msr, evntsel, 0);
482 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0");
483 apic_write(APIC_LVTPC, APIC_DM_NMI);
484 evntsel |= K7_EVNTSEL_ENABLE;
485 wrmsr(evntsel_msr, evntsel, 0);
487 wd->perfctr_msr = perfctr_msr;
488 wd->evntsel_msr = evntsel_msr;
489 wd->cccr_msr = 0; //unused
490 wd->check_bit = 1ULL<<63;
491 return 1;
492 fail1:
493 release_perfctr_nmi(perfctr_msr);
494 fail:
495 return 0;
498 static void stop_k7_watchdog(void)
500 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
502 wrmsr(wd->evntsel_msr, 0, 0);
504 release_evntsel_nmi(wd->evntsel_msr);
505 release_perfctr_nmi(wd->perfctr_msr);
508 #define P6_EVNTSEL0_ENABLE (1 << 22)
509 #define P6_EVNTSEL_INT (1 << 20)
510 #define P6_EVNTSEL_OS (1 << 17)
511 #define P6_EVNTSEL_USR (1 << 16)
512 #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
513 #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
515 static int setup_p6_watchdog(void)
517 unsigned int perfctr_msr, evntsel_msr;
518 unsigned int evntsel;
519 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
521 perfctr_msr = MSR_P6_PERFCTR0;
522 evntsel_msr = MSR_P6_EVNTSEL0;
523 if (!reserve_perfctr_nmi(perfctr_msr))
524 goto fail;
526 if (!reserve_evntsel_nmi(evntsel_msr))
527 goto fail1;
529 wrmsrl(perfctr_msr, 0UL);
531 evntsel = P6_EVNTSEL_INT
532 | P6_EVNTSEL_OS
533 | P6_EVNTSEL_USR
534 | P6_NMI_EVENT;
536 /* setup the timer */
537 wrmsr(evntsel_msr, evntsel, 0);
538 write_watchdog_counter(perfctr_msr, "P6_PERFCTR0");
539 apic_write(APIC_LVTPC, APIC_DM_NMI);
540 evntsel |= P6_EVNTSEL0_ENABLE;
541 wrmsr(evntsel_msr, evntsel, 0);
543 wd->perfctr_msr = perfctr_msr;
544 wd->evntsel_msr = evntsel_msr;
545 wd->cccr_msr = 0; //unused
546 wd->check_bit = 1ULL<<39;
547 return 1;
548 fail1:
549 release_perfctr_nmi(perfctr_msr);
550 fail:
551 return 0;
554 static void stop_p6_watchdog(void)
556 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
558 wrmsr(wd->evntsel_msr, 0, 0);
560 release_evntsel_nmi(wd->evntsel_msr);
561 release_perfctr_nmi(wd->perfctr_msr);
564 /* Note that these events don't tick when the CPU idles. This means
565 the frequency varies with CPU load. */
567 #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
568 #define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
569 #define P4_ESCR_OS (1<<3)
570 #define P4_ESCR_USR (1<<2)
571 #define P4_CCCR_OVF_PMI0 (1<<26)
572 #define P4_CCCR_OVF_PMI1 (1<<27)
573 #define P4_CCCR_THRESHOLD(N) ((N)<<20)
574 #define P4_CCCR_COMPLEMENT (1<<19)
575 #define P4_CCCR_COMPARE (1<<18)
576 #define P4_CCCR_REQUIRED (3<<16)
577 #define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
578 #define P4_CCCR_ENABLE (1<<12)
579 #define P4_CCCR_OVF (1<<31)
580 /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
581 CRU_ESCR0 (with any non-null event selector) through a complemented
582 max threshold. [IA32-Vol3, Section 14.9.9] */
584 static int setup_p4_watchdog(void)
586 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
587 unsigned int evntsel, cccr_val;
588 unsigned int misc_enable, dummy;
589 unsigned int ht_num;
590 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
592 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
593 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
594 return 0;
596 #ifdef CONFIG_SMP
597 /* detect which hyperthread we are on */
598 if (smp_num_siblings == 2) {
599 unsigned int ebx, apicid;
601 ebx = cpuid_ebx(1);
602 apicid = (ebx >> 24) & 0xff;
603 ht_num = apicid & 1;
604 } else
605 #endif
606 ht_num = 0;
608 /* performance counters are shared resources
609 * assign each hyperthread its own set
610 * (re-use the ESCR0 register, seems safe
611 * and keeps the cccr_val the same)
613 if (!ht_num) {
614 /* logical cpu 0 */
615 perfctr_msr = MSR_P4_IQ_PERFCTR0;
616 evntsel_msr = MSR_P4_CRU_ESCR0;
617 cccr_msr = MSR_P4_IQ_CCCR0;
618 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
619 } else {
620 /* logical cpu 1 */
621 perfctr_msr = MSR_P4_IQ_PERFCTR1;
622 evntsel_msr = MSR_P4_CRU_ESCR0;
623 cccr_msr = MSR_P4_IQ_CCCR1;
624 cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
627 if (!reserve_perfctr_nmi(perfctr_msr))
628 goto fail;
630 if (!reserve_evntsel_nmi(evntsel_msr))
631 goto fail1;
633 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
634 | P4_ESCR_OS
635 | P4_ESCR_USR;
637 cccr_val |= P4_CCCR_THRESHOLD(15)
638 | P4_CCCR_COMPLEMENT
639 | P4_CCCR_COMPARE
640 | P4_CCCR_REQUIRED;
642 wrmsr(evntsel_msr, evntsel, 0);
643 wrmsr(cccr_msr, cccr_val, 0);
644 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0");
645 apic_write(APIC_LVTPC, APIC_DM_NMI);
646 cccr_val |= P4_CCCR_ENABLE;
647 wrmsr(cccr_msr, cccr_val, 0);
648 wd->perfctr_msr = perfctr_msr;
649 wd->evntsel_msr = evntsel_msr;
650 wd->cccr_msr = cccr_msr;
651 wd->check_bit = 1ULL<<39;
652 return 1;
653 fail1:
654 release_perfctr_nmi(perfctr_msr);
655 fail:
656 return 0;
659 static void stop_p4_watchdog(void)
661 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
663 wrmsr(wd->cccr_msr, 0, 0);
664 wrmsr(wd->evntsel_msr, 0, 0);
666 release_evntsel_nmi(wd->evntsel_msr);
667 release_perfctr_nmi(wd->perfctr_msr);
670 #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
671 #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
673 static int setup_intel_arch_watchdog(void)
675 unsigned int ebx;
676 union cpuid10_eax eax;
677 unsigned int unused;
678 unsigned int perfctr_msr, evntsel_msr;
679 unsigned int evntsel;
680 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
683 * Check whether the Architectural PerfMon supports
684 * Unhalted Core Cycles Event or not.
685 * NOTE: Corresponding bit = 0 in ebx indicates event present.
687 cpuid(10, &(eax.full), &ebx, &unused, &unused);
688 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
689 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
690 goto fail;
692 perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
693 evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
695 if (!reserve_perfctr_nmi(perfctr_msr))
696 goto fail;
698 if (!reserve_evntsel_nmi(evntsel_msr))
699 goto fail1;
701 wrmsrl(perfctr_msr, 0UL);
703 evntsel = ARCH_PERFMON_EVENTSEL_INT
704 | ARCH_PERFMON_EVENTSEL_OS
705 | ARCH_PERFMON_EVENTSEL_USR
706 | ARCH_PERFMON_NMI_EVENT_SEL
707 | ARCH_PERFMON_NMI_EVENT_UMASK;
709 /* setup the timer */
710 wrmsr(evntsel_msr, evntsel, 0);
711 write_watchdog_counter(perfctr_msr, "INTEL_ARCH_PERFCTR0");
712 apic_write(APIC_LVTPC, APIC_DM_NMI);
713 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
714 wrmsr(evntsel_msr, evntsel, 0);
716 wd->perfctr_msr = perfctr_msr;
717 wd->evntsel_msr = evntsel_msr;
718 wd->cccr_msr = 0; //unused
719 wd->check_bit = 1ULL << (eax.split.bit_width - 1);
720 return 1;
721 fail1:
722 release_perfctr_nmi(perfctr_msr);
723 fail:
724 return 0;
727 static void stop_intel_arch_watchdog(void)
729 unsigned int ebx;
730 union cpuid10_eax eax;
731 unsigned int unused;
732 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
735 * Check whether the Architectural PerfMon supports
736 * Unhalted Core Cycles Event or not.
737 * NOTE: Corresponding bit = 0 in ebx indicates event present.
739 cpuid(10, &(eax.full), &ebx, &unused, &unused);
740 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
741 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
742 return;
744 wrmsr(wd->evntsel_msr, 0, 0);
745 release_evntsel_nmi(wd->evntsel_msr);
746 release_perfctr_nmi(wd->perfctr_msr);
749 void setup_apic_nmi_watchdog (void *unused)
751 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
753 /* only support LOCAL and IO APICs for now */
754 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
755 (nmi_watchdog != NMI_IO_APIC))
756 return;
758 if (wd->enabled == 1)
759 return;
761 /* cheap hack to support suspend/resume */
762 /* if cpu0 is not active neither should the other cpus */
763 if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
764 return;
766 if (nmi_watchdog == NMI_LOCAL_APIC) {
767 switch (boot_cpu_data.x86_vendor) {
768 case X86_VENDOR_AMD:
769 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
770 return;
771 if (!setup_k7_watchdog())
772 return;
773 break;
774 case X86_VENDOR_INTEL:
775 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
776 if (!setup_intel_arch_watchdog())
777 return;
778 break;
780 switch (boot_cpu_data.x86) {
781 case 6:
782 if (boot_cpu_data.x86_model > 0xd)
783 return;
785 if (!setup_p6_watchdog())
786 return;
787 break;
788 case 15:
789 if (boot_cpu_data.x86_model > 0x4)
790 return;
792 if (!setup_p4_watchdog())
793 return;
794 break;
795 default:
796 return;
798 break;
799 default:
800 return;
803 wd->enabled = 1;
804 atomic_inc(&nmi_active);
807 void stop_apic_nmi_watchdog(void *unused)
809 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
811 /* only support LOCAL and IO APICs for now */
812 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
813 (nmi_watchdog != NMI_IO_APIC))
814 return;
816 if (wd->enabled == 0)
817 return;
819 if (nmi_watchdog == NMI_LOCAL_APIC) {
820 switch (boot_cpu_data.x86_vendor) {
821 case X86_VENDOR_AMD:
822 stop_k7_watchdog();
823 break;
824 case X86_VENDOR_INTEL:
825 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
826 stop_intel_arch_watchdog();
827 break;
829 switch (boot_cpu_data.x86) {
830 case 6:
831 if (boot_cpu_data.x86_model > 0xd)
832 break;
833 stop_p6_watchdog();
834 break;
835 case 15:
836 if (boot_cpu_data.x86_model > 0x4)
837 break;
838 stop_p4_watchdog();
839 break;
841 break;
842 default:
843 return;
846 wd->enabled = 0;
847 atomic_dec(&nmi_active);
851 * the best way to detect whether a CPU has a 'hard lockup' problem
852 * is to check it's local APIC timer IRQ counts. If they are not
853 * changing then that CPU has some problem.
855 * as these watchdog NMI IRQs are generated on every CPU, we only
856 * have to check the current processor.
858 * since NMIs don't listen to _any_ locks, we have to be extremely
859 * careful not to rely on unsafe variables. The printk might lock
860 * up though, so we have to break up any console locks first ...
861 * [when there will be more tty-related locks, break them up
862 * here too!]
865 static unsigned int
866 last_irq_sums [NR_CPUS],
867 alert_counter [NR_CPUS];
869 void touch_nmi_watchdog (void)
871 int i;
874 * Just reset the alert counters, (other CPUs might be
875 * spinning on locks we hold):
877 for_each_possible_cpu(i)
878 alert_counter[i] = 0;
881 * Tickle the softlockup detector too:
883 touch_softlockup_watchdog();
885 EXPORT_SYMBOL(touch_nmi_watchdog);
887 extern void die_nmi(struct pt_regs *, const char *msg);
889 __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
893 * Since current_thread_info()-> is always on the stack, and we
894 * always switch the stack NMI-atomically, it's safe to use
895 * smp_processor_id().
897 unsigned int sum;
898 int touched = 0;
899 int cpu = smp_processor_id();
900 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
901 u64 dummy;
902 int rc=0;
904 /* check for other users first */
905 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
906 == NOTIFY_STOP) {
907 rc = 1;
908 touched = 1;
911 sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
913 /* if the apic timer isn't firing, this cpu isn't doing much */
914 if (!touched && last_irq_sums[cpu] == sum) {
916 * Ayiee, looks like this CPU is stuck ...
917 * wait a few IRQs (5 seconds) before doing the oops ...
919 alert_counter[cpu]++;
920 if (alert_counter[cpu] == 5*nmi_hz)
922 * die_nmi will return ONLY if NOTIFY_STOP happens..
924 die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP");
925 } else {
926 last_irq_sums[cpu] = sum;
927 alert_counter[cpu] = 0;
929 /* see if the nmi watchdog went off */
930 if (wd->enabled) {
931 if (nmi_watchdog == NMI_LOCAL_APIC) {
932 rdmsrl(wd->perfctr_msr, dummy);
933 if (dummy & wd->check_bit){
934 /* this wasn't a watchdog timer interrupt */
935 goto done;
938 /* only Intel P4 uses the cccr msr */
939 if (wd->cccr_msr != 0) {
941 * P4 quirks:
942 * - An overflown perfctr will assert its interrupt
943 * until the OVF flag in its CCCR is cleared.
944 * - LVTPC is masked on interrupt and must be
945 * unmasked by the LVTPC handler.
947 rdmsrl(wd->cccr_msr, dummy);
948 dummy &= ~P4_CCCR_OVF;
949 wrmsrl(wd->cccr_msr, dummy);
950 apic_write(APIC_LVTPC, APIC_DM_NMI);
952 else if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
953 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
954 /* P6 based Pentium M need to re-unmask
955 * the apic vector but it doesn't hurt
956 * other P6 variant.
957 * ArchPerfom/Core Duo also needs this */
958 apic_write(APIC_LVTPC, APIC_DM_NMI);
960 /* start the cycle over again */
961 write_watchdog_counter(wd->perfctr_msr, NULL);
962 rc = 1;
963 } else if (nmi_watchdog == NMI_IO_APIC) {
964 /* don't know how to accurately check for this.
965 * just assume it was a watchdog timer interrupt
966 * This matches the old behaviour.
968 rc = 1;
971 done:
972 return rc;
975 int do_nmi_callback(struct pt_regs * regs, int cpu)
977 #ifdef CONFIG_SYSCTL
978 if (unknown_nmi_panic)
979 return unknown_nmi_panic_callback(regs, cpu);
980 #endif
981 return 0;
984 #ifdef CONFIG_SYSCTL
986 static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
988 unsigned char reason = get_nmi_reason();
989 char buf[64];
991 sprintf(buf, "NMI received for unknown reason %02x\n", reason);
992 die_nmi(regs, buf);
993 return 0;
997 * proc handler for /proc/sys/kernel/nmi
999 int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
1000 void __user *buffer, size_t *length, loff_t *ppos)
1002 int old_state;
1004 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
1005 old_state = nmi_watchdog_enabled;
1006 proc_dointvec(table, write, file, buffer, length, ppos);
1007 if (!!old_state == !!nmi_watchdog_enabled)
1008 return 0;
1010 if (atomic_read(&nmi_active) < 0) {
1011 printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
1012 return -EIO;
1015 if (nmi_watchdog == NMI_DEFAULT) {
1016 if (nmi_known_cpu() > 0)
1017 nmi_watchdog = NMI_LOCAL_APIC;
1018 else
1019 nmi_watchdog = NMI_IO_APIC;
1022 if (nmi_watchdog == NMI_LOCAL_APIC) {
1023 if (nmi_watchdog_enabled)
1024 enable_lapic_nmi_watchdog();
1025 else
1026 disable_lapic_nmi_watchdog();
1027 } else {
1028 printk( KERN_WARNING
1029 "NMI watchdog doesn't know what hardware to touch\n");
1030 return -EIO;
1032 return 0;
1035 #endif
1037 EXPORT_SYMBOL(nmi_active);
1038 EXPORT_SYMBOL(nmi_watchdog);
1039 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
1040 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
1041 EXPORT_SYMBOL(reserve_perfctr_nmi);
1042 EXPORT_SYMBOL(release_perfctr_nmi);
1043 EXPORT_SYMBOL(reserve_evntsel_nmi);
1044 EXPORT_SYMBOL(release_evntsel_nmi);
1045 EXPORT_SYMBOL(disable_timer_nmi_watchdog);
1046 EXPORT_SYMBOL(enable_timer_nmi_watchdog);