2 * linux/arch/i386/nmi.c
4 * NMI watchdog support on APIC systems
6 * Started by Ingo Molnar <mingo@redhat.com>
9 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
10 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
11 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
13 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
16 #include <linux/delay.h>
17 #include <linux/interrupt.h>
18 #include <linux/module.h>
19 #include <linux/nmi.h>
20 #include <linux/sysdev.h>
21 #include <linux/sysctl.h>
22 #include <linux/percpu.h>
23 #include <linux/dmi.h>
24 #include <linux/kprobes.h>
25 #include <linux/cpumask.h>
26 #include <linux/kernel_stat.h>
30 #include <asm/kdebug.h>
31 #include <asm/intel_arch_perfmon.h>
33 #include "mach_traps.h"
35 int unknown_nmi_panic
;
36 int nmi_watchdog_enabled
;
38 /* perfctr_nmi_owner tracks the ownership of the perfctr registers:
39 * evtsel_nmi_owner tracks the ownership of the event selection
40 * - different performance counters/ event selection may be reserved for
41 * different subsystems this reservation system just tries to coordinate
44 static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner
);
45 static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner
[3]);
47 static cpumask_t backtrace_mask
= CPU_MASK_NONE
;
49 /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
50 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
52 #define NMI_MAX_COUNTER_BITS 66
55 * >0: the lapic NMI watchdog is active, but can be disabled
56 * <0: the lapic NMI watchdog has not been set up, and cannot
58 * 0: the lapic NMI watchdog is disabled, but can be enabled
60 atomic_t nmi_active
= ATOMIC_INIT(0); /* oprofile uses this */
62 unsigned int nmi_watchdog
= NMI_DEFAULT
;
63 static unsigned int nmi_hz
= HZ
;
65 struct nmi_watchdog_ctlblk
{
68 unsigned int cccr_msr
;
69 unsigned int perfctr_msr
; /* the MSR to reset in NMI handler */
70 unsigned int evntsel_msr
; /* the MSR to select the events to handle */
72 static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk
, nmi_watchdog_ctlblk
);
74 /* local prototypes */
75 static int unknown_nmi_panic_callback(struct pt_regs
*regs
, int cpu
);
77 extern void show_registers(struct pt_regs
*regs
);
78 extern int unknown_nmi_panic
;
80 /* converts an msr to an appropriate reservation bit */
81 static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr
)
83 /* returns the bit offset of the performance counter register */
84 switch (boot_cpu_data
.x86_vendor
) {
86 return (msr
- MSR_K7_PERFCTR0
);
87 case X86_VENDOR_INTEL
:
88 if (cpu_has(&boot_cpu_data
, X86_FEATURE_ARCH_PERFMON
))
89 return (msr
- MSR_ARCH_PERFMON_PERFCTR0
);
91 switch (boot_cpu_data
.x86
) {
93 return (msr
- MSR_P6_PERFCTR0
);
95 return (msr
- MSR_P4_BPU_PERFCTR0
);
101 /* converts an msr to an appropriate reservation bit */
102 static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr
)
104 /* returns the bit offset of the event selection register */
105 switch (boot_cpu_data
.x86_vendor
) {
107 return (msr
- MSR_K7_EVNTSEL0
);
108 case X86_VENDOR_INTEL
:
109 if (cpu_has(&boot_cpu_data
, X86_FEATURE_ARCH_PERFMON
))
110 return (msr
- MSR_ARCH_PERFMON_EVENTSEL0
);
112 switch (boot_cpu_data
.x86
) {
114 return (msr
- MSR_P6_EVNTSEL0
);
116 return (msr
- MSR_P4_BSU_ESCR0
);
122 /* checks for a bit availability (hack for oprofile) */
123 int avail_to_resrv_perfctr_nmi_bit(unsigned int counter
)
125 BUG_ON(counter
> NMI_MAX_COUNTER_BITS
);
127 return (!test_bit(counter
, &__get_cpu_var(perfctr_nmi_owner
)));
130 /* checks the an msr for availability */
131 int avail_to_resrv_perfctr_nmi(unsigned int msr
)
133 unsigned int counter
;
135 counter
= nmi_perfctr_msr_to_bit(msr
);
136 BUG_ON(counter
> NMI_MAX_COUNTER_BITS
);
138 return (!test_bit(counter
, &__get_cpu_var(perfctr_nmi_owner
)));
141 int reserve_perfctr_nmi(unsigned int msr
)
143 unsigned int counter
;
145 counter
= nmi_perfctr_msr_to_bit(msr
);
146 BUG_ON(counter
> NMI_MAX_COUNTER_BITS
);
148 if (!test_and_set_bit(counter
, &__get_cpu_var(perfctr_nmi_owner
)))
153 void release_perfctr_nmi(unsigned int msr
)
155 unsigned int counter
;
157 counter
= nmi_perfctr_msr_to_bit(msr
);
158 BUG_ON(counter
> NMI_MAX_COUNTER_BITS
);
160 clear_bit(counter
, &__get_cpu_var(perfctr_nmi_owner
));
163 int reserve_evntsel_nmi(unsigned int msr
)
165 unsigned int counter
;
167 counter
= nmi_evntsel_msr_to_bit(msr
);
168 BUG_ON(counter
> NMI_MAX_COUNTER_BITS
);
170 if (!test_and_set_bit(counter
, &__get_cpu_var(evntsel_nmi_owner
)[0]))
175 void release_evntsel_nmi(unsigned int msr
)
177 unsigned int counter
;
179 counter
= nmi_evntsel_msr_to_bit(msr
);
180 BUG_ON(counter
> NMI_MAX_COUNTER_BITS
);
182 clear_bit(counter
, &__get_cpu_var(evntsel_nmi_owner
)[0]);
185 static __cpuinit
inline int nmi_known_cpu(void)
187 switch (boot_cpu_data
.x86_vendor
) {
189 return ((boot_cpu_data
.x86
== 15) || (boot_cpu_data
.x86
== 6)
190 || (boot_cpu_data
.x86
== 16));
191 case X86_VENDOR_INTEL
:
192 if (cpu_has(&boot_cpu_data
, X86_FEATURE_ARCH_PERFMON
))
195 return ((boot_cpu_data
.x86
== 15) || (boot_cpu_data
.x86
== 6));
200 static int endflag __initdata
= 0;
203 /* The performance counters used by NMI_LOCAL_APIC don't trigger when
204 * the CPU is idle. To make sure the NMI watchdog really ticks on all
205 * CPUs during the test make them busy.
207 static __init
void nmi_cpu_busy(void *data
)
209 local_irq_enable_in_hardirq();
210 /* Intentionally don't use cpu_relax here. This is
211 to make sure that the performance counter really ticks,
212 even if there is a simulator or similar that catches the
213 pause instruction. On a real HT machine this is fine because
214 all other CPUs are busy with "useless" delay loops and don't
215 care if they get somewhat less cycles. */
221 static unsigned int adjust_for_32bit_ctr(unsigned int hz
)
224 unsigned int retval
= hz
;
227 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
228 * are writable, with higher bits sign extending from bit 31.
229 * So, we can only program the counter with 31 bit values and
230 * 32nd bit should be 1, for 33.. to be 1.
231 * Find the appropriate nmi_hz
233 counter_val
= (u64
)cpu_khz
* 1000;
234 do_div(counter_val
, retval
);
235 if (counter_val
> 0x7fffffffULL
) {
236 u64 count
= (u64
)cpu_khz
* 1000;
237 do_div(count
, 0x7fffffffUL
);
243 static int __init
check_nmi_watchdog(void)
245 unsigned int *prev_nmi_count
;
248 /* Enable NMI watchdog for newer systems.
249 Probably safe on most older systems too, but let's be careful.
250 IBM ThinkPads use INT10 inside SMM and that allows early NMI inside SMM
251 which hangs the system. Disable watchdog for all thinkpads */
252 if (nmi_watchdog
== NMI_DEFAULT
&& dmi_get_year(DMI_BIOS_DATE
) >= 2004 &&
253 !dmi_name_in_vendors("ThinkPad"))
254 nmi_watchdog
= NMI_LOCAL_APIC
;
256 if ((nmi_watchdog
== NMI_NONE
) || (nmi_watchdog
== NMI_DEFAULT
))
259 if (!atomic_read(&nmi_active
))
262 prev_nmi_count
= kmalloc(NR_CPUS
* sizeof(int), GFP_KERNEL
);
266 printk(KERN_INFO
"Testing NMI watchdog ... ");
268 if (nmi_watchdog
== NMI_LOCAL_APIC
)
269 smp_call_function(nmi_cpu_busy
, (void *)&endflag
, 0, 0);
271 for_each_possible_cpu(cpu
)
272 prev_nmi_count
[cpu
] = per_cpu(irq_stat
, cpu
).__nmi_count
;
274 mdelay((10*1000)/nmi_hz
); // wait 10 ticks
276 for_each_possible_cpu(cpu
) {
278 /* Check cpu_callin_map here because that is set
279 after the timer is started. */
280 if (!cpu_isset(cpu
, cpu_callin_map
))
283 if (!per_cpu(nmi_watchdog_ctlblk
, cpu
).enabled
)
285 if (nmi_count(cpu
) - prev_nmi_count
[cpu
] <= 5) {
286 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
290 per_cpu(nmi_watchdog_ctlblk
, cpu
).enabled
= 0;
291 atomic_dec(&nmi_active
);
294 if (!atomic_read(&nmi_active
)) {
295 kfree(prev_nmi_count
);
296 atomic_set(&nmi_active
, -1);
302 /* now that we know it works we can reduce NMI frequency to
303 something more reasonable; makes a difference in some configs */
304 if (nmi_watchdog
== NMI_LOCAL_APIC
) {
305 struct nmi_watchdog_ctlblk
*wd
= &__get_cpu_var(nmi_watchdog_ctlblk
);
309 if (wd
->perfctr_msr
== MSR_P6_PERFCTR0
||
310 wd
->perfctr_msr
== MSR_ARCH_PERFMON_PERFCTR0
) {
311 nmi_hz
= adjust_for_32bit_ctr(nmi_hz
);
315 kfree(prev_nmi_count
);
318 /* This needs to happen later in boot so counters are working */
319 late_initcall(check_nmi_watchdog
);
321 static int __init
setup_nmi_watchdog(char *str
)
325 get_option(&str
, &nmi
);
327 if ((nmi
>= NMI_INVALID
) || (nmi
< NMI_NONE
))
334 __setup("nmi_watchdog=", setup_nmi_watchdog
);
336 static void disable_lapic_nmi_watchdog(void)
338 BUG_ON(nmi_watchdog
!= NMI_LOCAL_APIC
);
340 if (atomic_read(&nmi_active
) <= 0)
343 on_each_cpu(stop_apic_nmi_watchdog
, NULL
, 0, 1);
345 BUG_ON(atomic_read(&nmi_active
) != 0);
348 static void enable_lapic_nmi_watchdog(void)
350 BUG_ON(nmi_watchdog
!= NMI_LOCAL_APIC
);
352 /* are we already enabled */
353 if (atomic_read(&nmi_active
) != 0)
356 /* are we lapic aware */
357 if (nmi_known_cpu() <= 0)
360 on_each_cpu(setup_apic_nmi_watchdog
, NULL
, 0, 1);
361 touch_nmi_watchdog();
364 void disable_timer_nmi_watchdog(void)
366 BUG_ON(nmi_watchdog
!= NMI_IO_APIC
);
368 if (atomic_read(&nmi_active
) <= 0)
372 on_each_cpu(stop_apic_nmi_watchdog
, NULL
, 0, 1);
374 BUG_ON(atomic_read(&nmi_active
) != 0);
377 void enable_timer_nmi_watchdog(void)
379 BUG_ON(nmi_watchdog
!= NMI_IO_APIC
);
381 if (atomic_read(&nmi_active
) == 0) {
382 touch_nmi_watchdog();
383 on_each_cpu(setup_apic_nmi_watchdog
, NULL
, 0, 1);
388 static void __acpi_nmi_disable(void *__unused
)
390 apic_write_around(APIC_LVT0
, APIC_DM_NMI
| APIC_LVT_MASKED
);
394 * Disable timer based NMIs on all CPUs:
396 void acpi_nmi_disable(void)
398 if (atomic_read(&nmi_active
) && nmi_watchdog
== NMI_IO_APIC
)
399 on_each_cpu(__acpi_nmi_disable
, NULL
, 0, 1);
402 static void __acpi_nmi_enable(void *__unused
)
404 apic_write_around(APIC_LVT0
, APIC_DM_NMI
);
408 * Enable timer based NMIs on all CPUs:
410 void acpi_nmi_enable(void)
412 if (atomic_read(&nmi_active
) && nmi_watchdog
== NMI_IO_APIC
)
413 on_each_cpu(__acpi_nmi_enable
, NULL
, 0, 1);
418 static int nmi_pm_active
; /* nmi_active before suspend */
420 static int lapic_nmi_suspend(struct sys_device
*dev
, pm_message_t state
)
422 /* only CPU0 goes here, other CPUs should be offline */
423 nmi_pm_active
= atomic_read(&nmi_active
);
424 stop_apic_nmi_watchdog(NULL
);
425 BUG_ON(atomic_read(&nmi_active
) != 0);
429 static int lapic_nmi_resume(struct sys_device
*dev
)
431 /* only CPU0 goes here, other CPUs should be offline */
432 if (nmi_pm_active
> 0) {
433 setup_apic_nmi_watchdog(NULL
);
434 touch_nmi_watchdog();
440 static struct sysdev_class nmi_sysclass
= {
441 set_kset_name("lapic_nmi"),
442 .resume
= lapic_nmi_resume
,
443 .suspend
= lapic_nmi_suspend
,
446 static struct sys_device device_lapic_nmi
= {
448 .cls
= &nmi_sysclass
,
451 static int __init
init_lapic_nmi_sysfs(void)
455 /* should really be a BUG_ON but b/c this is an
456 * init call, it just doesn't work. -dcz
458 if (nmi_watchdog
!= NMI_LOCAL_APIC
)
461 if ( atomic_read(&nmi_active
) < 0 )
464 error
= sysdev_class_register(&nmi_sysclass
);
466 error
= sysdev_register(&device_lapic_nmi
);
469 /* must come after the local APIC's device_initcall() */
470 late_initcall(init_lapic_nmi_sysfs
);
472 #endif /* CONFIG_PM */
475 * Activate the NMI watchdog via the local APIC.
476 * Original code written by Keith Owens.
479 static void write_watchdog_counter(unsigned int perfctr_msr
, const char *descr
)
481 u64 count
= (u64
)cpu_khz
* 1000;
483 do_div(count
, nmi_hz
);
485 Dprintk("setting %s to -0x%08Lx\n", descr
, count
);
486 wrmsrl(perfctr_msr
, 0 - count
);
489 static void write_watchdog_counter32(unsigned int perfctr_msr
,
492 u64 count
= (u64
)cpu_khz
* 1000;
494 do_div(count
, nmi_hz
);
496 Dprintk("setting %s to -0x%08Lx\n", descr
, count
);
497 wrmsr(perfctr_msr
, (u32
)(-count
), 0);
500 /* Note that these events don't tick when the CPU idles. This means
501 the frequency varies with CPU load. */
503 #define K7_EVNTSEL_ENABLE (1 << 22)
504 #define K7_EVNTSEL_INT (1 << 20)
505 #define K7_EVNTSEL_OS (1 << 17)
506 #define K7_EVNTSEL_USR (1 << 16)
507 #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
508 #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
510 static int setup_k7_watchdog(void)
512 unsigned int perfctr_msr
, evntsel_msr
;
513 unsigned int evntsel
;
514 struct nmi_watchdog_ctlblk
*wd
= &__get_cpu_var(nmi_watchdog_ctlblk
);
516 perfctr_msr
= MSR_K7_PERFCTR0
;
517 evntsel_msr
= MSR_K7_EVNTSEL0
;
518 if (!reserve_perfctr_nmi(perfctr_msr
))
521 if (!reserve_evntsel_nmi(evntsel_msr
))
524 wrmsrl(perfctr_msr
, 0UL);
526 evntsel
= K7_EVNTSEL_INT
531 /* setup the timer */
532 wrmsr(evntsel_msr
, evntsel
, 0);
533 write_watchdog_counter(perfctr_msr
, "K7_PERFCTR0");
534 apic_write(APIC_LVTPC
, APIC_DM_NMI
);
535 evntsel
|= K7_EVNTSEL_ENABLE
;
536 wrmsr(evntsel_msr
, evntsel
, 0);
538 wd
->perfctr_msr
= perfctr_msr
;
539 wd
->evntsel_msr
= evntsel_msr
;
540 wd
->cccr_msr
= 0; //unused
541 wd
->check_bit
= 1ULL<<63;
544 release_perfctr_nmi(perfctr_msr
);
549 static void stop_k7_watchdog(void)
551 struct nmi_watchdog_ctlblk
*wd
= &__get_cpu_var(nmi_watchdog_ctlblk
);
553 wrmsr(wd
->evntsel_msr
, 0, 0);
555 release_evntsel_nmi(wd
->evntsel_msr
);
556 release_perfctr_nmi(wd
->perfctr_msr
);
559 #define P6_EVNTSEL0_ENABLE (1 << 22)
560 #define P6_EVNTSEL_INT (1 << 20)
561 #define P6_EVNTSEL_OS (1 << 17)
562 #define P6_EVNTSEL_USR (1 << 16)
563 #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
564 #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
566 static int setup_p6_watchdog(void)
568 unsigned int perfctr_msr
, evntsel_msr
;
569 unsigned int evntsel
;
570 struct nmi_watchdog_ctlblk
*wd
= &__get_cpu_var(nmi_watchdog_ctlblk
);
572 perfctr_msr
= MSR_P6_PERFCTR0
;
573 evntsel_msr
= MSR_P6_EVNTSEL0
;
574 if (!reserve_perfctr_nmi(perfctr_msr
))
577 if (!reserve_evntsel_nmi(evntsel_msr
))
580 wrmsrl(perfctr_msr
, 0UL);
582 evntsel
= P6_EVNTSEL_INT
587 /* setup the timer */
588 wrmsr(evntsel_msr
, evntsel
, 0);
589 nmi_hz
= adjust_for_32bit_ctr(nmi_hz
);
590 write_watchdog_counter32(perfctr_msr
, "P6_PERFCTR0");
591 apic_write(APIC_LVTPC
, APIC_DM_NMI
);
592 evntsel
|= P6_EVNTSEL0_ENABLE
;
593 wrmsr(evntsel_msr
, evntsel
, 0);
595 wd
->perfctr_msr
= perfctr_msr
;
596 wd
->evntsel_msr
= evntsel_msr
;
597 wd
->cccr_msr
= 0; //unused
598 wd
->check_bit
= 1ULL<<39;
601 release_perfctr_nmi(perfctr_msr
);
606 static void stop_p6_watchdog(void)
608 struct nmi_watchdog_ctlblk
*wd
= &__get_cpu_var(nmi_watchdog_ctlblk
);
610 wrmsr(wd
->evntsel_msr
, 0, 0);
612 release_evntsel_nmi(wd
->evntsel_msr
);
613 release_perfctr_nmi(wd
->perfctr_msr
);
616 /* Note that these events don't tick when the CPU idles. This means
617 the frequency varies with CPU load. */
619 #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
620 #define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
621 #define P4_ESCR_OS (1<<3)
622 #define P4_ESCR_USR (1<<2)
623 #define P4_CCCR_OVF_PMI0 (1<<26)
624 #define P4_CCCR_OVF_PMI1 (1<<27)
625 #define P4_CCCR_THRESHOLD(N) ((N)<<20)
626 #define P4_CCCR_COMPLEMENT (1<<19)
627 #define P4_CCCR_COMPARE (1<<18)
628 #define P4_CCCR_REQUIRED (3<<16)
629 #define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
630 #define P4_CCCR_ENABLE (1<<12)
631 #define P4_CCCR_OVF (1<<31)
632 /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
633 CRU_ESCR0 (with any non-null event selector) through a complemented
634 max threshold. [IA32-Vol3, Section 14.9.9] */
636 static int setup_p4_watchdog(void)
638 unsigned int perfctr_msr
, evntsel_msr
, cccr_msr
;
639 unsigned int evntsel
, cccr_val
;
640 unsigned int misc_enable
, dummy
;
642 struct nmi_watchdog_ctlblk
*wd
= &__get_cpu_var(nmi_watchdog_ctlblk
);
644 rdmsr(MSR_IA32_MISC_ENABLE
, misc_enable
, dummy
);
645 if (!(misc_enable
& MSR_P4_MISC_ENABLE_PERF_AVAIL
))
649 /* detect which hyperthread we are on */
650 if (smp_num_siblings
== 2) {
651 unsigned int ebx
, apicid
;
654 apicid
= (ebx
>> 24) & 0xff;
660 /* performance counters are shared resources
661 * assign each hyperthread its own set
662 * (re-use the ESCR0 register, seems safe
663 * and keeps the cccr_val the same)
667 perfctr_msr
= MSR_P4_IQ_PERFCTR0
;
668 evntsel_msr
= MSR_P4_CRU_ESCR0
;
669 cccr_msr
= MSR_P4_IQ_CCCR0
;
670 cccr_val
= P4_CCCR_OVF_PMI0
| P4_CCCR_ESCR_SELECT(4);
673 perfctr_msr
= MSR_P4_IQ_PERFCTR1
;
674 evntsel_msr
= MSR_P4_CRU_ESCR0
;
675 cccr_msr
= MSR_P4_IQ_CCCR1
;
676 cccr_val
= P4_CCCR_OVF_PMI1
| P4_CCCR_ESCR_SELECT(4);
679 if (!reserve_perfctr_nmi(perfctr_msr
))
682 if (!reserve_evntsel_nmi(evntsel_msr
))
685 evntsel
= P4_ESCR_EVENT_SELECT(0x3F)
689 cccr_val
|= P4_CCCR_THRESHOLD(15)
694 wrmsr(evntsel_msr
, evntsel
, 0);
695 wrmsr(cccr_msr
, cccr_val
, 0);
696 write_watchdog_counter(perfctr_msr
, "P4_IQ_COUNTER0");
697 apic_write(APIC_LVTPC
, APIC_DM_NMI
);
698 cccr_val
|= P4_CCCR_ENABLE
;
699 wrmsr(cccr_msr
, cccr_val
, 0);
700 wd
->perfctr_msr
= perfctr_msr
;
701 wd
->evntsel_msr
= evntsel_msr
;
702 wd
->cccr_msr
= cccr_msr
;
703 wd
->check_bit
= 1ULL<<39;
706 release_perfctr_nmi(perfctr_msr
);
711 static void stop_p4_watchdog(void)
713 struct nmi_watchdog_ctlblk
*wd
= &__get_cpu_var(nmi_watchdog_ctlblk
);
715 wrmsr(wd
->cccr_msr
, 0, 0);
716 wrmsr(wd
->evntsel_msr
, 0, 0);
718 release_evntsel_nmi(wd
->evntsel_msr
);
719 release_perfctr_nmi(wd
->perfctr_msr
);
722 #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
723 #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
725 static int setup_intel_arch_watchdog(void)
728 union cpuid10_eax eax
;
730 unsigned int perfctr_msr
, evntsel_msr
;
731 unsigned int evntsel
;
732 struct nmi_watchdog_ctlblk
*wd
= &__get_cpu_var(nmi_watchdog_ctlblk
);
735 * Check whether the Architectural PerfMon supports
736 * Unhalted Core Cycles Event or not.
737 * NOTE: Corresponding bit = 0 in ebx indicates event present.
739 cpuid(10, &(eax
.full
), &ebx
, &unused
, &unused
);
740 if ((eax
.split
.mask_length
< (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX
+1)) ||
741 (ebx
& ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT
))
744 perfctr_msr
= MSR_ARCH_PERFMON_PERFCTR0
;
745 evntsel_msr
= MSR_ARCH_PERFMON_EVENTSEL0
;
747 if (!reserve_perfctr_nmi(perfctr_msr
))
750 if (!reserve_evntsel_nmi(evntsel_msr
))
753 wrmsrl(perfctr_msr
, 0UL);
755 evntsel
= ARCH_PERFMON_EVENTSEL_INT
756 | ARCH_PERFMON_EVENTSEL_OS
757 | ARCH_PERFMON_EVENTSEL_USR
758 | ARCH_PERFMON_NMI_EVENT_SEL
759 | ARCH_PERFMON_NMI_EVENT_UMASK
;
761 /* setup the timer */
762 wrmsr(evntsel_msr
, evntsel
, 0);
763 nmi_hz
= adjust_for_32bit_ctr(nmi_hz
);
764 write_watchdog_counter32(perfctr_msr
, "INTEL_ARCH_PERFCTR0");
765 apic_write(APIC_LVTPC
, APIC_DM_NMI
);
766 evntsel
|= ARCH_PERFMON_EVENTSEL0_ENABLE
;
767 wrmsr(evntsel_msr
, evntsel
, 0);
769 wd
->perfctr_msr
= perfctr_msr
;
770 wd
->evntsel_msr
= evntsel_msr
;
771 wd
->cccr_msr
= 0; //unused
772 wd
->check_bit
= 1ULL << (eax
.split
.bit_width
- 1);
775 release_perfctr_nmi(perfctr_msr
);
780 static void stop_intel_arch_watchdog(void)
783 union cpuid10_eax eax
;
785 struct nmi_watchdog_ctlblk
*wd
= &__get_cpu_var(nmi_watchdog_ctlblk
);
788 * Check whether the Architectural PerfMon supports
789 * Unhalted Core Cycles Event or not.
790 * NOTE: Corresponding bit = 0 in ebx indicates event present.
792 cpuid(10, &(eax
.full
), &ebx
, &unused
, &unused
);
793 if ((eax
.split
.mask_length
< (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX
+1)) ||
794 (ebx
& ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT
))
797 wrmsr(wd
->evntsel_msr
, 0, 0);
798 release_evntsel_nmi(wd
->evntsel_msr
);
799 release_perfctr_nmi(wd
->perfctr_msr
);
802 void setup_apic_nmi_watchdog (void *unused
)
804 struct nmi_watchdog_ctlblk
*wd
= &__get_cpu_var(nmi_watchdog_ctlblk
);
806 /* only support LOCAL and IO APICs for now */
807 if ((nmi_watchdog
!= NMI_LOCAL_APIC
) &&
808 (nmi_watchdog
!= NMI_IO_APIC
))
811 if (wd
->enabled
== 1)
814 /* cheap hack to support suspend/resume */
815 /* if cpu0 is not active neither should the other cpus */
816 if ((smp_processor_id() != 0) && (atomic_read(&nmi_active
) <= 0))
819 if (nmi_watchdog
== NMI_LOCAL_APIC
) {
820 switch (boot_cpu_data
.x86_vendor
) {
822 if (boot_cpu_data
.x86
!= 6 && boot_cpu_data
.x86
!= 15 &&
823 boot_cpu_data
.x86
!= 16)
825 if (!setup_k7_watchdog())
828 case X86_VENDOR_INTEL
:
829 if (cpu_has(&boot_cpu_data
, X86_FEATURE_ARCH_PERFMON
)) {
830 if (!setup_intel_arch_watchdog())
834 switch (boot_cpu_data
.x86
) {
836 if (boot_cpu_data
.x86_model
> 0xd)
839 if (!setup_p6_watchdog())
843 if (boot_cpu_data
.x86_model
> 0x4)
846 if (!setup_p4_watchdog())
858 atomic_inc(&nmi_active
);
861 void stop_apic_nmi_watchdog(void *unused
)
863 struct nmi_watchdog_ctlblk
*wd
= &__get_cpu_var(nmi_watchdog_ctlblk
);
865 /* only support LOCAL and IO APICs for now */
866 if ((nmi_watchdog
!= NMI_LOCAL_APIC
) &&
867 (nmi_watchdog
!= NMI_IO_APIC
))
870 if (wd
->enabled
== 0)
873 if (nmi_watchdog
== NMI_LOCAL_APIC
) {
874 switch (boot_cpu_data
.x86_vendor
) {
878 case X86_VENDOR_INTEL
:
879 if (cpu_has(&boot_cpu_data
, X86_FEATURE_ARCH_PERFMON
)) {
880 stop_intel_arch_watchdog();
883 switch (boot_cpu_data
.x86
) {
885 if (boot_cpu_data
.x86_model
> 0xd)
890 if (boot_cpu_data
.x86_model
> 0x4)
901 atomic_dec(&nmi_active
);
905 * the best way to detect whether a CPU has a 'hard lockup' problem
906 * is to check it's local APIC timer IRQ counts. If they are not
907 * changing then that CPU has some problem.
909 * as these watchdog NMI IRQs are generated on every CPU, we only
910 * have to check the current processor.
912 * since NMIs don't listen to _any_ locks, we have to be extremely
913 * careful not to rely on unsafe variables. The printk might lock
914 * up though, so we have to break up any console locks first ...
915 * [when there will be more tty-related locks, break them up
920 last_irq_sums
[NR_CPUS
],
921 alert_counter
[NR_CPUS
];
923 void touch_nmi_watchdog (void)
925 if (nmi_watchdog
> 0) {
929 * Just reset the alert counters, (other CPUs might be
930 * spinning on locks we hold):
932 for_each_present_cpu (cpu
)
933 alert_counter
[cpu
] = 0;
937 * Tickle the softlockup detector too:
939 touch_softlockup_watchdog();
941 EXPORT_SYMBOL(touch_nmi_watchdog
);
943 extern void die_nmi(struct pt_regs
*, const char *msg
);
945 __kprobes
int nmi_watchdog_tick(struct pt_regs
* regs
, unsigned reason
)
949 * Since current_thread_info()-> is always on the stack, and we
950 * always switch the stack NMI-atomically, it's safe to use
951 * smp_processor_id().
955 int cpu
= smp_processor_id();
956 struct nmi_watchdog_ctlblk
*wd
= &__get_cpu_var(nmi_watchdog_ctlblk
);
960 /* check for other users first */
961 if (notify_die(DIE_NMI
, "nmi", regs
, reason
, 2, SIGINT
)
967 if (cpu_isset(cpu
, backtrace_mask
)) {
968 static DEFINE_SPINLOCK(lock
); /* Serialise the printks */
971 printk("NMI backtrace for cpu %d\n", cpu
);
974 cpu_clear(cpu
, backtrace_mask
);
978 * Take the local apic timer and PIT/HPET into account. We don't
979 * know which one is active, when we have highres/dyntick on
981 sum
= per_cpu(irq_stat
, cpu
).apic_timer_irqs
+ kstat_irqs(0);
983 /* if the none of the timers isn't firing, this cpu isn't doing much */
984 if (!touched
&& last_irq_sums
[cpu
] == sum
) {
986 * Ayiee, looks like this CPU is stuck ...
987 * wait a few IRQs (5 seconds) before doing the oops ...
989 alert_counter
[cpu
]++;
990 if (alert_counter
[cpu
] == 5*nmi_hz
)
992 * die_nmi will return ONLY if NOTIFY_STOP happens..
994 die_nmi(regs
, "BUG: NMI Watchdog detected LOCKUP");
996 last_irq_sums
[cpu
] = sum
;
997 alert_counter
[cpu
] = 0;
999 /* see if the nmi watchdog went off */
1001 if (nmi_watchdog
== NMI_LOCAL_APIC
) {
1002 rdmsrl(wd
->perfctr_msr
, dummy
);
1003 if (dummy
& wd
->check_bit
){
1004 /* this wasn't a watchdog timer interrupt */
1008 /* only Intel P4 uses the cccr msr */
1009 if (wd
->cccr_msr
!= 0) {
1012 * - An overflown perfctr will assert its interrupt
1013 * until the OVF flag in its CCCR is cleared.
1014 * - LVTPC is masked on interrupt and must be
1015 * unmasked by the LVTPC handler.
1017 rdmsrl(wd
->cccr_msr
, dummy
);
1018 dummy
&= ~P4_CCCR_OVF
;
1019 wrmsrl(wd
->cccr_msr
, dummy
);
1020 apic_write(APIC_LVTPC
, APIC_DM_NMI
);
1021 /* start the cycle over again */
1022 write_watchdog_counter(wd
->perfctr_msr
, NULL
);
1024 else if (wd
->perfctr_msr
== MSR_P6_PERFCTR0
||
1025 wd
->perfctr_msr
== MSR_ARCH_PERFMON_PERFCTR0
) {
1026 /* P6 based Pentium M need to re-unmask
1027 * the apic vector but it doesn't hurt
1029 * ArchPerfom/Core Duo also needs this */
1030 apic_write(APIC_LVTPC
, APIC_DM_NMI
);
1031 /* P6/ARCH_PERFMON has 32 bit counter write */
1032 write_watchdog_counter32(wd
->perfctr_msr
, NULL
);
1034 /* start the cycle over again */
1035 write_watchdog_counter(wd
->perfctr_msr
, NULL
);
1038 } else if (nmi_watchdog
== NMI_IO_APIC
) {
1039 /* don't know how to accurately check for this.
1040 * just assume it was a watchdog timer interrupt
1041 * This matches the old behaviour.
1050 int do_nmi_callback(struct pt_regs
* regs
, int cpu
)
1052 #ifdef CONFIG_SYSCTL
1053 if (unknown_nmi_panic
)
1054 return unknown_nmi_panic_callback(regs
, cpu
);
1059 #ifdef CONFIG_SYSCTL
1061 static int unknown_nmi_panic_callback(struct pt_regs
*regs
, int cpu
)
1063 unsigned char reason
= get_nmi_reason();
1066 sprintf(buf
, "NMI received for unknown reason %02x\n", reason
);
1072 * proc handler for /proc/sys/kernel/nmi
1074 int proc_nmi_enabled(struct ctl_table
*table
, int write
, struct file
*file
,
1075 void __user
*buffer
, size_t *length
, loff_t
*ppos
)
1079 nmi_watchdog_enabled
= (atomic_read(&nmi_active
) > 0) ? 1 : 0;
1080 old_state
= nmi_watchdog_enabled
;
1081 proc_dointvec(table
, write
, file
, buffer
, length
, ppos
);
1082 if (!!old_state
== !!nmi_watchdog_enabled
)
1085 if (atomic_read(&nmi_active
) < 0) {
1086 printk( KERN_WARNING
"NMI watchdog is permanently disabled\n");
1090 if (nmi_watchdog
== NMI_DEFAULT
) {
1091 if (nmi_known_cpu() > 0)
1092 nmi_watchdog
= NMI_LOCAL_APIC
;
1094 nmi_watchdog
= NMI_IO_APIC
;
1097 if (nmi_watchdog
== NMI_LOCAL_APIC
) {
1098 if (nmi_watchdog_enabled
)
1099 enable_lapic_nmi_watchdog();
1101 disable_lapic_nmi_watchdog();
1103 printk( KERN_WARNING
1104 "NMI watchdog doesn't know what hardware to touch\n");
1112 void __trigger_all_cpu_backtrace(void)
1116 backtrace_mask
= cpu_online_map
;
1117 /* Wait for up to 10 seconds for all CPUs to do the backtrace */
1118 for (i
= 0; i
< 10 * 1000; i
++) {
1119 if (cpus_empty(backtrace_mask
))
1125 EXPORT_SYMBOL(nmi_active
);
1126 EXPORT_SYMBOL(nmi_watchdog
);
1127 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi
);
1128 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit
);
1129 EXPORT_SYMBOL(reserve_perfctr_nmi
);
1130 EXPORT_SYMBOL(release_perfctr_nmi
);
1131 EXPORT_SYMBOL(reserve_evntsel_nmi
);
1132 EXPORT_SYMBOL(release_evntsel_nmi
);
1133 EXPORT_SYMBOL(disable_timer_nmi_watchdog
);
1134 EXPORT_SYMBOL(enable_timer_nmi_watchdog
);