4 * started by Ingo Molnar, Copyright (C) 2005, 2006 Red Hat, Inc.
6 * this code detects soft lockups: incidents in where on a CPU
7 * the kernel does not reschedule for 10 seconds or more.
10 #include <linux/cpu.h>
11 #include <linux/nmi.h>
12 #include <linux/init.h>
13 #include <linux/delay.h>
14 #include <linux/freezer.h>
15 #include <linux/kthread.h>
16 #include <linux/lockdep.h>
17 #include <linux/notifier.h>
18 #include <linux/module.h>
19 #include <linux/sysctl.h>
21 #include <asm/irq_regs.h>
23 static DEFINE_SPINLOCK(print_lock
);
25 static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts
); /* touch timestamp */
26 static DEFINE_PER_CPU(unsigned long, softlockup_print_ts
); /* print timestamp */
27 static DEFINE_PER_CPU(struct task_struct
*, softlockup_watchdog
);
28 static DEFINE_PER_CPU(bool, softlock_touch_sync
);
30 static int __read_mostly did_panic
;
31 int __read_mostly softlockup_thresh
= 60;
34 * Should we panic (and reboot, if panic_timeout= is set) when a
37 unsigned int __read_mostly softlockup_panic
=
38 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
;
40 static int __init
softlockup_panic_setup(char *str
)
42 softlockup_panic
= simple_strtoul(str
, NULL
, 0);
46 __setup("softlockup_panic=", softlockup_panic_setup
);
49 softlock_panic(struct notifier_block
*this, unsigned long event
, void *ptr
)
56 static struct notifier_block panic_block
= {
57 .notifier_call
= softlock_panic
,
61 * Returns seconds, approximately. We don't need nanosecond
62 * resolution, and we don't need to waste time with a big divide when
65 static unsigned long get_timestamp(int this_cpu
)
67 return cpu_clock(this_cpu
) >> 30LL; /* 2^30 ~= 10^9 */
70 static void __touch_softlockup_watchdog(void)
72 int this_cpu
= raw_smp_processor_id();
74 __raw_get_cpu_var(softlockup_touch_ts
) = get_timestamp(this_cpu
);
77 void touch_softlockup_watchdog(void)
79 __raw_get_cpu_var(softlockup_touch_ts
) = 0;
81 EXPORT_SYMBOL(touch_softlockup_watchdog
);
83 void touch_softlockup_watchdog_sync(void)
85 __raw_get_cpu_var(softlock_touch_sync
) = true;
86 __raw_get_cpu_var(softlockup_touch_ts
) = 0;
89 void touch_all_softlockup_watchdogs(void)
93 /* Cause each CPU to re-update its timestamp rather than complain */
94 for_each_online_cpu(cpu
)
95 per_cpu(softlockup_touch_ts
, cpu
) = 0;
97 EXPORT_SYMBOL(touch_all_softlockup_watchdogs
);
99 int proc_dosoftlockup_thresh(struct ctl_table
*table
, int write
,
101 size_t *lenp
, loff_t
*ppos
)
103 touch_all_softlockup_watchdogs();
104 return proc_dointvec_minmax(table
, write
, buffer
, lenp
, ppos
);
108 * This callback runs from the timer interrupt, and checks
109 * whether the watchdog thread has hung or not:
111 void softlockup_tick(void)
113 int this_cpu
= smp_processor_id();
114 unsigned long touch_ts
= per_cpu(softlockup_touch_ts
, this_cpu
);
115 unsigned long print_ts
;
116 struct pt_regs
*regs
= get_irq_regs();
119 /* Is detection switched off? */
120 if (!per_cpu(softlockup_watchdog
, this_cpu
) || softlockup_thresh
<= 0) {
121 /* Be sure we don't false trigger if switched back on */
123 per_cpu(softlockup_touch_ts
, this_cpu
) = 0;
128 if (unlikely(per_cpu(softlock_touch_sync
, this_cpu
))) {
130 * If the time stamp was touched atomically
131 * make sure the scheduler tick is up to date.
133 per_cpu(softlock_touch_sync
, this_cpu
) = false;
136 __touch_softlockup_watchdog();
140 print_ts
= per_cpu(softlockup_print_ts
, this_cpu
);
142 /* report at most once a second */
143 if (print_ts
== touch_ts
|| did_panic
)
146 /* do not print during early bootup: */
147 if (unlikely(system_state
!= SYSTEM_RUNNING
)) {
148 __touch_softlockup_watchdog();
152 now
= get_timestamp(this_cpu
);
155 * Wake up the high-prio watchdog task twice per
156 * threshold timespan.
158 if (now
> touch_ts
+ softlockup_thresh
/2)
159 wake_up_process(per_cpu(softlockup_watchdog
, this_cpu
));
161 /* Warn about unreasonable delays: */
162 if (now
<= (touch_ts
+ softlockup_thresh
))
165 per_cpu(softlockup_print_ts
, this_cpu
) = touch_ts
;
167 spin_lock(&print_lock
);
168 printk(KERN_ERR
"BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n",
169 this_cpu
, now
- touch_ts
,
170 current
->comm
, task_pid_nr(current
));
172 print_irqtrace_events(current
);
177 spin_unlock(&print_lock
);
179 if (softlockup_panic
)
180 panic("softlockup: hung tasks");
184 * The watchdog thread - runs every second and touches the timestamp.
186 static int watchdog(void *__bind_cpu
)
188 struct sched_param param
= { .sched_priority
= MAX_RT_PRIO
-1 };
190 sched_setscheduler(current
, SCHED_FIFO
, ¶m
);
192 /* initialize timestamp */
193 __touch_softlockup_watchdog();
195 set_current_state(TASK_INTERRUPTIBLE
);
197 * Run briefly once per second to reset the softlockup timestamp.
198 * If this gets delayed for more than 60 seconds then the
199 * debug-printout triggers in softlockup_tick().
201 while (!kthread_should_stop()) {
202 __touch_softlockup_watchdog();
205 if (kthread_should_stop())
208 set_current_state(TASK_INTERRUPTIBLE
);
210 __set_current_state(TASK_RUNNING
);
216 * Create/destroy watchdog threads as CPUs come and go:
219 cpu_callback(struct notifier_block
*nfb
, unsigned long action
, void *hcpu
)
221 int hotcpu
= (unsigned long)hcpu
;
222 struct task_struct
*p
;
226 case CPU_UP_PREPARE_FROZEN
:
227 BUG_ON(per_cpu(softlockup_watchdog
, hotcpu
));
228 p
= kthread_create(watchdog
, hcpu
, "watchdog/%d", hotcpu
);
230 printk(KERN_ERR
"watchdog for %i failed\n", hotcpu
);
233 per_cpu(softlockup_touch_ts
, hotcpu
) = 0;
234 per_cpu(softlockup_watchdog
, hotcpu
) = p
;
235 kthread_bind(p
, hotcpu
);
238 case CPU_ONLINE_FROZEN
:
239 wake_up_process(per_cpu(softlockup_watchdog
, hotcpu
));
241 #ifdef CONFIG_HOTPLUG_CPU
242 case CPU_UP_CANCELED
:
243 case CPU_UP_CANCELED_FROZEN
:
244 if (!per_cpu(softlockup_watchdog
, hotcpu
))
246 /* Unbind so it can run. Fall thru. */
247 kthread_bind(per_cpu(softlockup_watchdog
, hotcpu
),
248 cpumask_any(cpu_online_mask
));
250 case CPU_DEAD_FROZEN
:
251 p
= per_cpu(softlockup_watchdog
, hotcpu
);
252 per_cpu(softlockup_watchdog
, hotcpu
) = NULL
;
255 #endif /* CONFIG_HOTPLUG_CPU */
260 static struct notifier_block __cpuinitdata cpu_nfb
= {
261 .notifier_call
= cpu_callback
264 static int __initdata nosoftlockup
;
266 static int __init
nosoftlockup_setup(char *str
)
271 __setup("nosoftlockup", nosoftlockup_setup
);
273 static int __init
spawn_softlockup_task(void)
275 void *cpu
= (void *)(long)smp_processor_id();
281 err
= cpu_callback(&cpu_nfb
, CPU_UP_PREPARE
, cpu
);
282 if (err
== NOTIFY_BAD
) {
286 cpu_callback(&cpu_nfb
, CPU_ONLINE
, cpu
);
287 register_cpu_notifier(&cpu_nfb
);
289 atomic_notifier_chain_register(&panic_notifier_list
, &panic_block
);
293 early_initcall(spawn_softlockup_task
);