NOHZ: restart tick device from irq_enter()
[linux-2.6/x86.git] / kernel / softirq.c
blobd410014279e776821fea52a0d4e971c6aae8869b
1 /*
2 * linux/kernel/softirq.c
4 * Copyright (C) 1992 Linus Torvalds
6 * Distribute under GPLv2.
8 * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
9 */
11 #include <linux/module.h>
12 #include <linux/kernel_stat.h>
13 #include <linux/interrupt.h>
14 #include <linux/init.h>
15 #include <linux/mm.h>
16 #include <linux/notifier.h>
17 #include <linux/percpu.h>
18 #include <linux/cpu.h>
19 #include <linux/freezer.h>
20 #include <linux/kthread.h>
21 #include <linux/rcupdate.h>
22 #include <linux/smp.h>
23 #include <linux/tick.h>
25 #include <asm/irq.h>
27 - No shared variables, all the data are CPU local.
28 - If a softirq needs serialization, let it serialize itself
29 by its own spinlocks.
30 - Even if softirq is serialized, only local cpu is marked for
31 execution. Hence, we get something sort of weak cpu binding.
32 Though it is still not clear, will it result in better locality
33 or will not.
35 Examples:
36 - NET RX softirq. It is multithreaded and does not require
37 any global serialization.
38 - NET TX softirq. It kicks software netdevice queues, hence
39 it is logically serialized per device, but this serialization
40 is invisible to common code.
41 - Tasklets: serialized wrt itself.
44 #ifndef __ARCH_IRQ_STAT
45 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
46 EXPORT_SYMBOL(irq_stat);
47 #endif
49 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
51 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
54 * we cannot loop indefinitely here to avoid userspace starvation,
55 * but we also don't want to introduce a worst case 1/HZ latency
56 * to the pending events, so lets the scheduler to balance
57 * the softirq load for us.
59 static inline void wakeup_softirqd(void)
61 /* Interrupts are disabled: no need to stop preemption */
62 struct task_struct *tsk = __get_cpu_var(ksoftirqd);
64 if (tsk && tsk->state != TASK_RUNNING)
65 wake_up_process(tsk);
69 * This one is for softirq.c-internal use,
70 * where hardirqs are disabled legitimately:
72 #ifdef CONFIG_TRACE_IRQFLAGS
73 static void __local_bh_disable(unsigned long ip)
75 unsigned long flags;
77 WARN_ON_ONCE(in_irq());
79 raw_local_irq_save(flags);
80 add_preempt_count(SOFTIRQ_OFFSET);
82 * Were softirqs turned off above:
84 if (softirq_count() == SOFTIRQ_OFFSET)
85 trace_softirqs_off(ip);
86 raw_local_irq_restore(flags);
88 #else /* !CONFIG_TRACE_IRQFLAGS */
89 static inline void __local_bh_disable(unsigned long ip)
91 add_preempt_count(SOFTIRQ_OFFSET);
92 barrier();
94 #endif /* CONFIG_TRACE_IRQFLAGS */
96 void local_bh_disable(void)
98 __local_bh_disable((unsigned long)__builtin_return_address(0));
101 EXPORT_SYMBOL(local_bh_disable);
103 void __local_bh_enable(void)
105 WARN_ON_ONCE(in_irq());
108 * softirqs should never be enabled by __local_bh_enable(),
109 * it always nests inside local_bh_enable() sections:
111 WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
113 sub_preempt_count(SOFTIRQ_OFFSET);
115 EXPORT_SYMBOL_GPL(__local_bh_enable);
118 * Special-case - softirqs can safely be enabled in
119 * cond_resched_softirq(), or by __do_softirq(),
120 * without processing still-pending softirqs:
122 void _local_bh_enable(void)
124 WARN_ON_ONCE(in_irq());
125 WARN_ON_ONCE(!irqs_disabled());
127 if (softirq_count() == SOFTIRQ_OFFSET)
128 trace_softirqs_on((unsigned long)__builtin_return_address(0));
129 sub_preempt_count(SOFTIRQ_OFFSET);
132 EXPORT_SYMBOL(_local_bh_enable);
134 static inline void _local_bh_enable_ip(unsigned long ip)
136 WARN_ON_ONCE(in_irq() || irqs_disabled());
137 #ifdef CONFIG_TRACE_IRQFLAGS
138 local_irq_disable();
139 #endif
141 * Are softirqs going to be turned on now:
143 if (softirq_count() == SOFTIRQ_OFFSET)
144 trace_softirqs_on(ip);
146 * Keep preemption disabled until we are done with
147 * softirq processing:
149 sub_preempt_count(SOFTIRQ_OFFSET - 1);
151 if (unlikely(!in_interrupt() && local_softirq_pending()))
152 do_softirq();
154 dec_preempt_count();
155 #ifdef CONFIG_TRACE_IRQFLAGS
156 local_irq_enable();
157 #endif
158 preempt_check_resched();
161 void local_bh_enable(void)
163 _local_bh_enable_ip((unsigned long)__builtin_return_address(0));
165 EXPORT_SYMBOL(local_bh_enable);
167 void local_bh_enable_ip(unsigned long ip)
169 _local_bh_enable_ip(ip);
171 EXPORT_SYMBOL(local_bh_enable_ip);
174 * We restart softirq processing MAX_SOFTIRQ_RESTART times,
175 * and we fall back to softirqd after that.
177 * This number has been established via experimentation.
178 * The two things to balance is latency against fairness -
179 * we want to handle softirqs as soon as possible, but they
180 * should not be able to lock up the box.
182 #define MAX_SOFTIRQ_RESTART 10
184 asmlinkage void __do_softirq(void)
186 struct softirq_action *h;
187 __u32 pending;
188 int max_restart = MAX_SOFTIRQ_RESTART;
189 int cpu;
191 pending = local_softirq_pending();
192 account_system_vtime(current);
194 __local_bh_disable((unsigned long)__builtin_return_address(0));
195 trace_softirq_enter();
197 cpu = smp_processor_id();
198 restart:
199 /* Reset the pending bitmask before enabling irqs */
200 set_softirq_pending(0);
202 local_irq_enable();
204 h = softirq_vec;
206 do {
207 if (pending & 1) {
208 int prev_count = preempt_count();
210 h->action(h);
212 if (unlikely(prev_count != preempt_count())) {
213 printk(KERN_ERR "huh, entered softirq %td %p"
214 "with preempt_count %08x,"
215 " exited with %08x?\n", h - softirq_vec,
216 h->action, prev_count, preempt_count());
217 preempt_count() = prev_count;
220 rcu_bh_qsctr_inc(cpu);
222 h++;
223 pending >>= 1;
224 } while (pending);
226 local_irq_disable();
228 pending = local_softirq_pending();
229 if (pending && --max_restart)
230 goto restart;
232 if (pending)
233 wakeup_softirqd();
235 trace_softirq_exit();
237 account_system_vtime(current);
238 _local_bh_enable();
241 #ifndef __ARCH_HAS_DO_SOFTIRQ
243 asmlinkage void do_softirq(void)
245 __u32 pending;
246 unsigned long flags;
248 if (in_interrupt())
249 return;
251 local_irq_save(flags);
253 pending = local_softirq_pending();
255 if (pending)
256 __do_softirq();
258 local_irq_restore(flags);
261 #endif
264 * Enter an interrupt context.
266 void irq_enter(void)
268 int cpu = smp_processor_id();
270 if (idle_cpu(cpu) && !in_interrupt())
271 tick_check_idle(cpu);
273 __irq_enter();
276 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
277 # define invoke_softirq() __do_softirq()
278 #else
279 # define invoke_softirq() do_softirq()
280 #endif
283 * Exit an interrupt context. Process softirqs if needed and possible:
285 void irq_exit(void)
287 account_system_vtime(current);
288 trace_hardirq_exit();
289 sub_preempt_count(IRQ_EXIT_OFFSET);
290 if (!in_interrupt() && local_softirq_pending())
291 invoke_softirq();
293 #ifdef CONFIG_NO_HZ
294 /* Make sure that timer wheel updates are propagated */
295 if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
296 tick_nohz_stop_sched_tick(0);
297 rcu_irq_exit();
298 #endif
299 preempt_enable_no_resched();
303 * This function must run with irqs disabled!
305 inline void raise_softirq_irqoff(unsigned int nr)
307 __raise_softirq_irqoff(nr);
310 * If we're in an interrupt or softirq, we're done
311 * (this also catches softirq-disabled code). We will
312 * actually run the softirq once we return from
313 * the irq or softirq.
315 * Otherwise we wake up ksoftirqd to make sure we
316 * schedule the softirq soon.
318 if (!in_interrupt())
319 wakeup_softirqd();
322 void raise_softirq(unsigned int nr)
324 unsigned long flags;
326 local_irq_save(flags);
327 raise_softirq_irqoff(nr);
328 local_irq_restore(flags);
331 void open_softirq(int nr, void (*action)(struct softirq_action *))
333 softirq_vec[nr].action = action;
336 /* Tasklets */
337 struct tasklet_head
339 struct tasklet_struct *head;
340 struct tasklet_struct **tail;
343 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
344 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
346 void __tasklet_schedule(struct tasklet_struct *t)
348 unsigned long flags;
350 local_irq_save(flags);
351 t->next = NULL;
352 *__get_cpu_var(tasklet_vec).tail = t;
353 __get_cpu_var(tasklet_vec).tail = &(t->next);
354 raise_softirq_irqoff(TASKLET_SOFTIRQ);
355 local_irq_restore(flags);
358 EXPORT_SYMBOL(__tasklet_schedule);
360 void __tasklet_hi_schedule(struct tasklet_struct *t)
362 unsigned long flags;
364 local_irq_save(flags);
365 t->next = NULL;
366 *__get_cpu_var(tasklet_hi_vec).tail = t;
367 __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
368 raise_softirq_irqoff(HI_SOFTIRQ);
369 local_irq_restore(flags);
372 EXPORT_SYMBOL(__tasklet_hi_schedule);
374 static void tasklet_action(struct softirq_action *a)
376 struct tasklet_struct *list;
378 local_irq_disable();
379 list = __get_cpu_var(tasklet_vec).head;
380 __get_cpu_var(tasklet_vec).head = NULL;
381 __get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head;
382 local_irq_enable();
384 while (list) {
385 struct tasklet_struct *t = list;
387 list = list->next;
389 if (tasklet_trylock(t)) {
390 if (!atomic_read(&t->count)) {
391 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
392 BUG();
393 t->func(t->data);
394 tasklet_unlock(t);
395 continue;
397 tasklet_unlock(t);
400 local_irq_disable();
401 t->next = NULL;
402 *__get_cpu_var(tasklet_vec).tail = t;
403 __get_cpu_var(tasklet_vec).tail = &(t->next);
404 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
405 local_irq_enable();
409 static void tasklet_hi_action(struct softirq_action *a)
411 struct tasklet_struct *list;
413 local_irq_disable();
414 list = __get_cpu_var(tasklet_hi_vec).head;
415 __get_cpu_var(tasklet_hi_vec).head = NULL;
416 __get_cpu_var(tasklet_hi_vec).tail = &__get_cpu_var(tasklet_hi_vec).head;
417 local_irq_enable();
419 while (list) {
420 struct tasklet_struct *t = list;
422 list = list->next;
424 if (tasklet_trylock(t)) {
425 if (!atomic_read(&t->count)) {
426 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
427 BUG();
428 t->func(t->data);
429 tasklet_unlock(t);
430 continue;
432 tasklet_unlock(t);
435 local_irq_disable();
436 t->next = NULL;
437 *__get_cpu_var(tasklet_hi_vec).tail = t;
438 __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
439 __raise_softirq_irqoff(HI_SOFTIRQ);
440 local_irq_enable();
445 void tasklet_init(struct tasklet_struct *t,
446 void (*func)(unsigned long), unsigned long data)
448 t->next = NULL;
449 t->state = 0;
450 atomic_set(&t->count, 0);
451 t->func = func;
452 t->data = data;
455 EXPORT_SYMBOL(tasklet_init);
457 void tasklet_kill(struct tasklet_struct *t)
459 if (in_interrupt())
460 printk("Attempt to kill tasklet from interrupt\n");
462 while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
464 yield();
465 while (test_bit(TASKLET_STATE_SCHED, &t->state));
467 tasklet_unlock_wait(t);
468 clear_bit(TASKLET_STATE_SCHED, &t->state);
471 EXPORT_SYMBOL(tasklet_kill);
473 void __init softirq_init(void)
475 int cpu;
477 for_each_possible_cpu(cpu) {
478 per_cpu(tasklet_vec, cpu).tail =
479 &per_cpu(tasklet_vec, cpu).head;
480 per_cpu(tasklet_hi_vec, cpu).tail =
481 &per_cpu(tasklet_hi_vec, cpu).head;
484 open_softirq(TASKLET_SOFTIRQ, tasklet_action);
485 open_softirq(HI_SOFTIRQ, tasklet_hi_action);
488 static int ksoftirqd(void * __bind_cpu)
490 set_current_state(TASK_INTERRUPTIBLE);
492 while (!kthread_should_stop()) {
493 preempt_disable();
494 if (!local_softirq_pending()) {
495 preempt_enable_no_resched();
496 schedule();
497 preempt_disable();
500 __set_current_state(TASK_RUNNING);
502 while (local_softirq_pending()) {
503 /* Preempt disable stops cpu going offline.
504 If already offline, we'll be on wrong CPU:
505 don't process */
506 if (cpu_is_offline((long)__bind_cpu))
507 goto wait_to_die;
508 do_softirq();
509 preempt_enable_no_resched();
510 cond_resched();
511 preempt_disable();
513 preempt_enable();
514 set_current_state(TASK_INTERRUPTIBLE);
516 __set_current_state(TASK_RUNNING);
517 return 0;
519 wait_to_die:
520 preempt_enable();
521 /* Wait for kthread_stop */
522 set_current_state(TASK_INTERRUPTIBLE);
523 while (!kthread_should_stop()) {
524 schedule();
525 set_current_state(TASK_INTERRUPTIBLE);
527 __set_current_state(TASK_RUNNING);
528 return 0;
531 #ifdef CONFIG_HOTPLUG_CPU
533 * tasklet_kill_immediate is called to remove a tasklet which can already be
534 * scheduled for execution on @cpu.
536 * Unlike tasklet_kill, this function removes the tasklet
537 * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
539 * When this function is called, @cpu must be in the CPU_DEAD state.
541 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
543 struct tasklet_struct **i;
545 BUG_ON(cpu_online(cpu));
546 BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
548 if (!test_bit(TASKLET_STATE_SCHED, &t->state))
549 return;
551 /* CPU is dead, so no lock needed. */
552 for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
553 if (*i == t) {
554 *i = t->next;
555 /* If this was the tail element, move the tail ptr */
556 if (*i == NULL)
557 per_cpu(tasklet_vec, cpu).tail = i;
558 return;
561 BUG();
564 static void takeover_tasklets(unsigned int cpu)
566 /* CPU is dead, so no lock needed. */
567 local_irq_disable();
569 /* Find end, append list for that CPU. */
570 if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
571 *(__get_cpu_var(tasklet_vec).tail) = per_cpu(tasklet_vec, cpu).head;
572 __get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).tail;
573 per_cpu(tasklet_vec, cpu).head = NULL;
574 per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
576 raise_softirq_irqoff(TASKLET_SOFTIRQ);
578 if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
579 *__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).head;
580 __get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).tail;
581 per_cpu(tasklet_hi_vec, cpu).head = NULL;
582 per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
584 raise_softirq_irqoff(HI_SOFTIRQ);
586 local_irq_enable();
588 #endif /* CONFIG_HOTPLUG_CPU */
590 static int __cpuinit cpu_callback(struct notifier_block *nfb,
591 unsigned long action,
592 void *hcpu)
594 int hotcpu = (unsigned long)hcpu;
595 struct task_struct *p;
597 switch (action) {
598 case CPU_UP_PREPARE:
599 case CPU_UP_PREPARE_FROZEN:
600 p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
601 if (IS_ERR(p)) {
602 printk("ksoftirqd for %i failed\n", hotcpu);
603 return NOTIFY_BAD;
605 kthread_bind(p, hotcpu);
606 per_cpu(ksoftirqd, hotcpu) = p;
607 break;
608 case CPU_ONLINE:
609 case CPU_ONLINE_FROZEN:
610 wake_up_process(per_cpu(ksoftirqd, hotcpu));
611 break;
612 #ifdef CONFIG_HOTPLUG_CPU
613 case CPU_UP_CANCELED:
614 case CPU_UP_CANCELED_FROZEN:
615 if (!per_cpu(ksoftirqd, hotcpu))
616 break;
617 /* Unbind so it can run. Fall thru. */
618 kthread_bind(per_cpu(ksoftirqd, hotcpu),
619 any_online_cpu(cpu_online_map));
620 case CPU_DEAD:
621 case CPU_DEAD_FROZEN: {
622 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
624 p = per_cpu(ksoftirqd, hotcpu);
625 per_cpu(ksoftirqd, hotcpu) = NULL;
626 sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
627 kthread_stop(p);
628 takeover_tasklets(hotcpu);
629 break;
631 #endif /* CONFIG_HOTPLUG_CPU */
633 return NOTIFY_OK;
636 static struct notifier_block __cpuinitdata cpu_nfb = {
637 .notifier_call = cpu_callback
640 static __init int spawn_ksoftirqd(void)
642 void *cpu = (void *)(long)smp_processor_id();
643 int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
645 BUG_ON(err == NOTIFY_BAD);
646 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
647 register_cpu_notifier(&cpu_nfb);
648 return 0;
650 early_initcall(spawn_ksoftirqd);
652 #ifdef CONFIG_SMP
654 * Call a function on all processors
656 int on_each_cpu(void (*func) (void *info), void *info, int wait)
658 int ret = 0;
660 preempt_disable();
661 ret = smp_call_function(func, info, wait);
662 local_irq_disable();
663 func(info);
664 local_irq_enable();
665 preempt_enable();
666 return ret;
668 EXPORT_SYMBOL(on_each_cpu);
669 #endif