"Tree RCU": scalable classic RCU implementation
[linux-2.6/mini2440.git] / kernel / softirq.c
blob80d323e6f61a352a2a25a7e8df82d337df995464
1 /*
2 * linux/kernel/softirq.c
4 * Copyright (C) 1992 Linus Torvalds
6 * Distribute under GPLv2.
8 * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
10 * Remote softirq infrastructure is by Jens Axboe.
13 #include <linux/module.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/interrupt.h>
16 #include <linux/init.h>
17 #include <linux/mm.h>
18 #include <linux/notifier.h>
19 #include <linux/percpu.h>
20 #include <linux/cpu.h>
21 #include <linux/freezer.h>
22 #include <linux/kthread.h>
23 #include <linux/rcupdate.h>
24 #include <linux/smp.h>
25 #include <linux/tick.h>
27 #include <asm/irq.h>
29 - No shared variables, all the data are CPU local.
30 - If a softirq needs serialization, let it serialize itself
31 by its own spinlocks.
32 - Even if softirq is serialized, only local cpu is marked for
33 execution. Hence, we get something sort of weak cpu binding.
34 Though it is still not clear, will it result in better locality
35 or will not.
37 Examples:
38 - NET RX softirq. It is multithreaded and does not require
39 any global serialization.
40 - NET TX softirq. It kicks software netdevice queues, hence
41 it is logically serialized per device, but this serialization
42 is invisible to common code.
43 - Tasklets: serialized wrt itself.
46 #ifndef __ARCH_IRQ_STAT
47 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
48 EXPORT_SYMBOL(irq_stat);
49 #endif
51 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
53 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
56 * we cannot loop indefinitely here to avoid userspace starvation,
57 * but we also don't want to introduce a worst case 1/HZ latency
58 * to the pending events, so lets the scheduler to balance
59 * the softirq load for us.
61 static inline void wakeup_softirqd(void)
63 /* Interrupts are disabled: no need to stop preemption */
64 struct task_struct *tsk = __get_cpu_var(ksoftirqd);
66 if (tsk && tsk->state != TASK_RUNNING)
67 wake_up_process(tsk);
71 * This one is for softirq.c-internal use,
72 * where hardirqs are disabled legitimately:
74 #ifdef CONFIG_TRACE_IRQFLAGS
75 static void __local_bh_disable(unsigned long ip)
77 unsigned long flags;
79 WARN_ON_ONCE(in_irq());
81 raw_local_irq_save(flags);
82 add_preempt_count(SOFTIRQ_OFFSET);
84 * Were softirqs turned off above:
86 if (softirq_count() == SOFTIRQ_OFFSET)
87 trace_softirqs_off(ip);
88 raw_local_irq_restore(flags);
90 #else /* !CONFIG_TRACE_IRQFLAGS */
91 static inline void __local_bh_disable(unsigned long ip)
93 add_preempt_count(SOFTIRQ_OFFSET);
94 barrier();
96 #endif /* CONFIG_TRACE_IRQFLAGS */
98 void local_bh_disable(void)
100 __local_bh_disable((unsigned long)__builtin_return_address(0));
103 EXPORT_SYMBOL(local_bh_disable);
105 void __local_bh_enable(void)
107 WARN_ON_ONCE(in_irq());
110 * softirqs should never be enabled by __local_bh_enable(),
111 * it always nests inside local_bh_enable() sections:
113 WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
115 sub_preempt_count(SOFTIRQ_OFFSET);
117 EXPORT_SYMBOL_GPL(__local_bh_enable);
120 * Special-case - softirqs can safely be enabled in
121 * cond_resched_softirq(), or by __do_softirq(),
122 * without processing still-pending softirqs:
124 void _local_bh_enable(void)
126 WARN_ON_ONCE(in_irq());
127 WARN_ON_ONCE(!irqs_disabled());
129 if (softirq_count() == SOFTIRQ_OFFSET)
130 trace_softirqs_on((unsigned long)__builtin_return_address(0));
131 sub_preempt_count(SOFTIRQ_OFFSET);
134 EXPORT_SYMBOL(_local_bh_enable);
136 static inline void _local_bh_enable_ip(unsigned long ip)
138 WARN_ON_ONCE(in_irq() || irqs_disabled());
139 #ifdef CONFIG_TRACE_IRQFLAGS
140 local_irq_disable();
141 #endif
143 * Are softirqs going to be turned on now:
145 if (softirq_count() == SOFTIRQ_OFFSET)
146 trace_softirqs_on(ip);
148 * Keep preemption disabled until we are done with
149 * softirq processing:
151 sub_preempt_count(SOFTIRQ_OFFSET - 1);
153 if (unlikely(!in_interrupt() && local_softirq_pending()))
154 do_softirq();
156 dec_preempt_count();
157 #ifdef CONFIG_TRACE_IRQFLAGS
158 local_irq_enable();
159 #endif
160 preempt_check_resched();
163 void local_bh_enable(void)
165 _local_bh_enable_ip((unsigned long)__builtin_return_address(0));
167 EXPORT_SYMBOL(local_bh_enable);
169 void local_bh_enable_ip(unsigned long ip)
171 _local_bh_enable_ip(ip);
173 EXPORT_SYMBOL(local_bh_enable_ip);
176 * We restart softirq processing MAX_SOFTIRQ_RESTART times,
177 * and we fall back to softirqd after that.
179 * This number has been established via experimentation.
180 * The two things to balance is latency against fairness -
181 * we want to handle softirqs as soon as possible, but they
182 * should not be able to lock up the box.
184 #define MAX_SOFTIRQ_RESTART 10
186 asmlinkage void __do_softirq(void)
188 struct softirq_action *h;
189 __u32 pending;
190 int max_restart = MAX_SOFTIRQ_RESTART;
191 int cpu;
193 pending = local_softirq_pending();
194 account_system_vtime(current);
196 __local_bh_disable((unsigned long)__builtin_return_address(0));
197 trace_softirq_enter();
199 cpu = smp_processor_id();
200 restart:
201 /* Reset the pending bitmask before enabling irqs */
202 set_softirq_pending(0);
204 local_irq_enable();
206 h = softirq_vec;
208 do {
209 if (pending & 1) {
210 int prev_count = preempt_count();
212 h->action(h);
214 if (unlikely(prev_count != preempt_count())) {
215 printk(KERN_ERR "huh, entered softirq %td %p"
216 "with preempt_count %08x,"
217 " exited with %08x?\n", h - softirq_vec,
218 h->action, prev_count, preempt_count());
219 preempt_count() = prev_count;
222 rcu_bh_qsctr_inc(cpu);
224 h++;
225 pending >>= 1;
226 } while (pending);
228 local_irq_disable();
230 pending = local_softirq_pending();
231 if (pending && --max_restart)
232 goto restart;
234 if (pending)
235 wakeup_softirqd();
237 trace_softirq_exit();
239 account_system_vtime(current);
240 _local_bh_enable();
243 #ifndef __ARCH_HAS_DO_SOFTIRQ
245 asmlinkage void do_softirq(void)
247 __u32 pending;
248 unsigned long flags;
250 if (in_interrupt())
251 return;
253 local_irq_save(flags);
255 pending = local_softirq_pending();
257 if (pending)
258 __do_softirq();
260 local_irq_restore(flags);
263 #endif
266 * Enter an interrupt context.
268 void irq_enter(void)
270 int cpu = smp_processor_id();
272 rcu_irq_enter();
273 if (idle_cpu(cpu) && !in_interrupt()) {
274 __irq_enter();
275 tick_check_idle(cpu);
276 } else
277 __irq_enter();
280 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
281 # define invoke_softirq() __do_softirq()
282 #else
283 # define invoke_softirq() do_softirq()
284 #endif
287 * Exit an interrupt context. Process softirqs if needed and possible:
289 void irq_exit(void)
291 account_system_vtime(current);
292 trace_hardirq_exit();
293 sub_preempt_count(IRQ_EXIT_OFFSET);
294 if (!in_interrupt() && local_softirq_pending())
295 invoke_softirq();
297 #ifdef CONFIG_NO_HZ
298 /* Make sure that timer wheel updates are propagated */
299 rcu_irq_exit();
300 if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
301 tick_nohz_stop_sched_tick(0);
302 #endif
303 preempt_enable_no_resched();
307 * This function must run with irqs disabled!
309 inline void raise_softirq_irqoff(unsigned int nr)
311 __raise_softirq_irqoff(nr);
314 * If we're in an interrupt or softirq, we're done
315 * (this also catches softirq-disabled code). We will
316 * actually run the softirq once we return from
317 * the irq or softirq.
319 * Otherwise we wake up ksoftirqd to make sure we
320 * schedule the softirq soon.
322 if (!in_interrupt())
323 wakeup_softirqd();
326 void raise_softirq(unsigned int nr)
328 unsigned long flags;
330 local_irq_save(flags);
331 raise_softirq_irqoff(nr);
332 local_irq_restore(flags);
335 void open_softirq(int nr, void (*action)(struct softirq_action *))
337 softirq_vec[nr].action = action;
340 /* Tasklets */
341 struct tasklet_head
343 struct tasklet_struct *head;
344 struct tasklet_struct **tail;
347 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
348 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
350 void __tasklet_schedule(struct tasklet_struct *t)
352 unsigned long flags;
354 local_irq_save(flags);
355 t->next = NULL;
356 *__get_cpu_var(tasklet_vec).tail = t;
357 __get_cpu_var(tasklet_vec).tail = &(t->next);
358 raise_softirq_irqoff(TASKLET_SOFTIRQ);
359 local_irq_restore(flags);
362 EXPORT_SYMBOL(__tasklet_schedule);
364 void __tasklet_hi_schedule(struct tasklet_struct *t)
366 unsigned long flags;
368 local_irq_save(flags);
369 t->next = NULL;
370 *__get_cpu_var(tasklet_hi_vec).tail = t;
371 __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
372 raise_softirq_irqoff(HI_SOFTIRQ);
373 local_irq_restore(flags);
376 EXPORT_SYMBOL(__tasklet_hi_schedule);
378 static void tasklet_action(struct softirq_action *a)
380 struct tasklet_struct *list;
382 local_irq_disable();
383 list = __get_cpu_var(tasklet_vec).head;
384 __get_cpu_var(tasklet_vec).head = NULL;
385 __get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head;
386 local_irq_enable();
388 while (list) {
389 struct tasklet_struct *t = list;
391 list = list->next;
393 if (tasklet_trylock(t)) {
394 if (!atomic_read(&t->count)) {
395 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
396 BUG();
397 t->func(t->data);
398 tasklet_unlock(t);
399 continue;
401 tasklet_unlock(t);
404 local_irq_disable();
405 t->next = NULL;
406 *__get_cpu_var(tasklet_vec).tail = t;
407 __get_cpu_var(tasklet_vec).tail = &(t->next);
408 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
409 local_irq_enable();
413 static void tasklet_hi_action(struct softirq_action *a)
415 struct tasklet_struct *list;
417 local_irq_disable();
418 list = __get_cpu_var(tasklet_hi_vec).head;
419 __get_cpu_var(tasklet_hi_vec).head = NULL;
420 __get_cpu_var(tasklet_hi_vec).tail = &__get_cpu_var(tasklet_hi_vec).head;
421 local_irq_enable();
423 while (list) {
424 struct tasklet_struct *t = list;
426 list = list->next;
428 if (tasklet_trylock(t)) {
429 if (!atomic_read(&t->count)) {
430 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
431 BUG();
432 t->func(t->data);
433 tasklet_unlock(t);
434 continue;
436 tasklet_unlock(t);
439 local_irq_disable();
440 t->next = NULL;
441 *__get_cpu_var(tasklet_hi_vec).tail = t;
442 __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
443 __raise_softirq_irqoff(HI_SOFTIRQ);
444 local_irq_enable();
449 void tasklet_init(struct tasklet_struct *t,
450 void (*func)(unsigned long), unsigned long data)
452 t->next = NULL;
453 t->state = 0;
454 atomic_set(&t->count, 0);
455 t->func = func;
456 t->data = data;
459 EXPORT_SYMBOL(tasklet_init);
461 void tasklet_kill(struct tasklet_struct *t)
463 if (in_interrupt())
464 printk("Attempt to kill tasklet from interrupt\n");
466 while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
468 yield();
469 while (test_bit(TASKLET_STATE_SCHED, &t->state));
471 tasklet_unlock_wait(t);
472 clear_bit(TASKLET_STATE_SCHED, &t->state);
475 EXPORT_SYMBOL(tasklet_kill);
477 DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
478 EXPORT_PER_CPU_SYMBOL(softirq_work_list);
480 static void __local_trigger(struct call_single_data *cp, int softirq)
482 struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]);
484 list_add_tail(&cp->list, head);
486 /* Trigger the softirq only if the list was previously empty. */
487 if (head->next == &cp->list)
488 raise_softirq_irqoff(softirq);
491 #ifdef CONFIG_USE_GENERIC_SMP_HELPERS
492 static void remote_softirq_receive(void *data)
494 struct call_single_data *cp = data;
495 unsigned long flags;
496 int softirq;
498 softirq = cp->priv;
500 local_irq_save(flags);
501 __local_trigger(cp, softirq);
502 local_irq_restore(flags);
505 static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
507 if (cpu_online(cpu)) {
508 cp->func = remote_softirq_receive;
509 cp->info = cp;
510 cp->flags = 0;
511 cp->priv = softirq;
513 __smp_call_function_single(cpu, cp);
514 return 0;
516 return 1;
518 #else /* CONFIG_USE_GENERIC_SMP_HELPERS */
519 static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
521 return 1;
523 #endif
526 * __send_remote_softirq - try to schedule softirq work on a remote cpu
527 * @cp: private SMP call function data area
528 * @cpu: the remote cpu
529 * @this_cpu: the currently executing cpu
530 * @softirq: the softirq for the work
532 * Attempt to schedule softirq work on a remote cpu. If this cannot be
533 * done, the work is instead queued up on the local cpu.
535 * Interrupts must be disabled.
537 void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq)
539 if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq))
540 __local_trigger(cp, softirq);
542 EXPORT_SYMBOL(__send_remote_softirq);
545 * send_remote_softirq - try to schedule softirq work on a remote cpu
546 * @cp: private SMP call function data area
547 * @cpu: the remote cpu
548 * @softirq: the softirq for the work
550 * Like __send_remote_softirq except that disabling interrupts and
551 * computing the current cpu is done for the caller.
553 void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
555 unsigned long flags;
556 int this_cpu;
558 local_irq_save(flags);
559 this_cpu = smp_processor_id();
560 __send_remote_softirq(cp, cpu, this_cpu, softirq);
561 local_irq_restore(flags);
563 EXPORT_SYMBOL(send_remote_softirq);
565 static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self,
566 unsigned long action, void *hcpu)
569 * If a CPU goes away, splice its entries to the current CPU
570 * and trigger a run of the softirq
572 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
573 int cpu = (unsigned long) hcpu;
574 int i;
576 local_irq_disable();
577 for (i = 0; i < NR_SOFTIRQS; i++) {
578 struct list_head *head = &per_cpu(softirq_work_list[i], cpu);
579 struct list_head *local_head;
581 if (list_empty(head))
582 continue;
584 local_head = &__get_cpu_var(softirq_work_list[i]);
585 list_splice_init(head, local_head);
586 raise_softirq_irqoff(i);
588 local_irq_enable();
591 return NOTIFY_OK;
594 static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier = {
595 .notifier_call = remote_softirq_cpu_notify,
598 void __init softirq_init(void)
600 int cpu;
602 for_each_possible_cpu(cpu) {
603 int i;
605 per_cpu(tasklet_vec, cpu).tail =
606 &per_cpu(tasklet_vec, cpu).head;
607 per_cpu(tasklet_hi_vec, cpu).tail =
608 &per_cpu(tasklet_hi_vec, cpu).head;
609 for (i = 0; i < NR_SOFTIRQS; i++)
610 INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu));
613 register_hotcpu_notifier(&remote_softirq_cpu_notifier);
615 open_softirq(TASKLET_SOFTIRQ, tasklet_action);
616 open_softirq(HI_SOFTIRQ, tasklet_hi_action);
619 static int ksoftirqd(void * __bind_cpu)
621 set_current_state(TASK_INTERRUPTIBLE);
623 while (!kthread_should_stop()) {
624 preempt_disable();
625 if (!local_softirq_pending()) {
626 preempt_enable_no_resched();
627 schedule();
628 preempt_disable();
631 __set_current_state(TASK_RUNNING);
633 while (local_softirq_pending()) {
634 /* Preempt disable stops cpu going offline.
635 If already offline, we'll be on wrong CPU:
636 don't process */
637 if (cpu_is_offline((long)__bind_cpu))
638 goto wait_to_die;
639 do_softirq();
640 preempt_enable_no_resched();
641 cond_resched();
642 preempt_disable();
644 preempt_enable();
645 set_current_state(TASK_INTERRUPTIBLE);
647 __set_current_state(TASK_RUNNING);
648 return 0;
650 wait_to_die:
651 preempt_enable();
652 /* Wait for kthread_stop */
653 set_current_state(TASK_INTERRUPTIBLE);
654 while (!kthread_should_stop()) {
655 schedule();
656 set_current_state(TASK_INTERRUPTIBLE);
658 __set_current_state(TASK_RUNNING);
659 return 0;
662 #ifdef CONFIG_HOTPLUG_CPU
664 * tasklet_kill_immediate is called to remove a tasklet which can already be
665 * scheduled for execution on @cpu.
667 * Unlike tasklet_kill, this function removes the tasklet
668 * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
670 * When this function is called, @cpu must be in the CPU_DEAD state.
672 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
674 struct tasklet_struct **i;
676 BUG_ON(cpu_online(cpu));
677 BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
679 if (!test_bit(TASKLET_STATE_SCHED, &t->state))
680 return;
682 /* CPU is dead, so no lock needed. */
683 for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
684 if (*i == t) {
685 *i = t->next;
686 /* If this was the tail element, move the tail ptr */
687 if (*i == NULL)
688 per_cpu(tasklet_vec, cpu).tail = i;
689 return;
692 BUG();
695 static void takeover_tasklets(unsigned int cpu)
697 /* CPU is dead, so no lock needed. */
698 local_irq_disable();
700 /* Find end, append list for that CPU. */
701 if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
702 *(__get_cpu_var(tasklet_vec).tail) = per_cpu(tasklet_vec, cpu).head;
703 __get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).tail;
704 per_cpu(tasklet_vec, cpu).head = NULL;
705 per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
707 raise_softirq_irqoff(TASKLET_SOFTIRQ);
709 if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
710 *__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).head;
711 __get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).tail;
712 per_cpu(tasklet_hi_vec, cpu).head = NULL;
713 per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
715 raise_softirq_irqoff(HI_SOFTIRQ);
717 local_irq_enable();
719 #endif /* CONFIG_HOTPLUG_CPU */
721 static int __cpuinit cpu_callback(struct notifier_block *nfb,
722 unsigned long action,
723 void *hcpu)
725 int hotcpu = (unsigned long)hcpu;
726 struct task_struct *p;
728 switch (action) {
729 case CPU_UP_PREPARE:
730 case CPU_UP_PREPARE_FROZEN:
731 p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
732 if (IS_ERR(p)) {
733 printk("ksoftirqd for %i failed\n", hotcpu);
734 return NOTIFY_BAD;
736 kthread_bind(p, hotcpu);
737 per_cpu(ksoftirqd, hotcpu) = p;
738 break;
739 case CPU_ONLINE:
740 case CPU_ONLINE_FROZEN:
741 wake_up_process(per_cpu(ksoftirqd, hotcpu));
742 break;
743 #ifdef CONFIG_HOTPLUG_CPU
744 case CPU_UP_CANCELED:
745 case CPU_UP_CANCELED_FROZEN:
746 if (!per_cpu(ksoftirqd, hotcpu))
747 break;
748 /* Unbind so it can run. Fall thru. */
749 kthread_bind(per_cpu(ksoftirqd, hotcpu),
750 any_online_cpu(cpu_online_map));
751 case CPU_DEAD:
752 case CPU_DEAD_FROZEN: {
753 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
755 p = per_cpu(ksoftirqd, hotcpu);
756 per_cpu(ksoftirqd, hotcpu) = NULL;
757 sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
758 kthread_stop(p);
759 takeover_tasklets(hotcpu);
760 break;
762 #endif /* CONFIG_HOTPLUG_CPU */
764 return NOTIFY_OK;
767 static struct notifier_block __cpuinitdata cpu_nfb = {
768 .notifier_call = cpu_callback
771 static __init int spawn_ksoftirqd(void)
773 void *cpu = (void *)(long)smp_processor_id();
774 int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
776 BUG_ON(err == NOTIFY_BAD);
777 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
778 register_cpu_notifier(&cpu_nfb);
779 return 0;
781 early_initcall(spawn_ksoftirqd);
783 #ifdef CONFIG_SMP
785 * Call a function on all processors
787 int on_each_cpu(void (*func) (void *info), void *info, int wait)
789 int ret = 0;
791 preempt_disable();
792 ret = smp_call_function(func, info, wait);
793 local_irq_disable();
794 func(info);
795 local_irq_enable();
796 preempt_enable();
797 return ret;
799 EXPORT_SYMBOL(on_each_cpu);
800 #endif