2 * linux/kernel/softirq.c
4 * Copyright (C) 1992 Linus Torvalds
6 * Distribute under GPLv2.
8 * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
10 * Remote softirq infrastructure is by Jens Axboe.
13 #include <linux/module.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/interrupt.h>
16 #include <linux/init.h>
18 #include <linux/notifier.h>
19 #include <linux/percpu.h>
20 #include <linux/cpu.h>
21 #include <linux/freezer.h>
22 #include <linux/kthread.h>
23 #include <linux/rcupdate.h>
24 #include <linux/ftrace.h>
25 #include <linux/smp.h>
26 #include <linux/tick.h>
30 - No shared variables, all the data are CPU local.
31 - If a softirq needs serialization, let it serialize itself
33 - Even if softirq is serialized, only local cpu is marked for
34 execution. Hence, we get something sort of weak cpu binding.
35 Though it is still not clear, will it result in better locality
39 - NET RX softirq. It is multithreaded and does not require
40 any global serialization.
41 - NET TX softirq. It kicks software netdevice queues, hence
42 it is logically serialized per device, but this serialization
43 is invisible to common code.
44 - Tasklets: serialized wrt itself.
47 #ifndef __ARCH_IRQ_STAT
48 irq_cpustat_t irq_stat
[NR_CPUS
] ____cacheline_aligned
;
49 EXPORT_SYMBOL(irq_stat
);
52 static struct softirq_action softirq_vec
[NR_SOFTIRQS
] __cacheline_aligned_in_smp
;
54 static DEFINE_PER_CPU(struct task_struct
*, ksoftirqd
);
56 char *softirq_to_name
[NR_SOFTIRQS
] = {
57 "HI_SOFTIRQ", "TIMER_SOFTIRQ", "NET_TX_SOFTIRQ", "NET_RX_SOFTIRQ",
58 "BLOCK_SOFTIRQ", "TASKLET_SOFTIRQ", "SCHED_SOFTIRQ", "HRTIMER_SOFTIRQ",
63 * we cannot loop indefinitely here to avoid userspace starvation,
64 * but we also don't want to introduce a worst case 1/HZ latency
65 * to the pending events, so lets the scheduler to balance
66 * the softirq load for us.
68 static inline void wakeup_softirqd(void)
70 /* Interrupts are disabled: no need to stop preemption */
71 struct task_struct
*tsk
= __get_cpu_var(ksoftirqd
);
73 if (tsk
&& tsk
->state
!= TASK_RUNNING
)
78 * This one is for softirq.c-internal use,
79 * where hardirqs are disabled legitimately:
81 #ifdef CONFIG_TRACE_IRQFLAGS
82 static void __local_bh_disable(unsigned long ip
)
86 WARN_ON_ONCE(in_irq());
88 raw_local_irq_save(flags
);
90 * The preempt tracer hooks into add_preempt_count and will break
91 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
92 * is set and before current->softirq_enabled is cleared.
93 * We must manually increment preempt_count here and manually
94 * call the trace_preempt_off later.
96 preempt_count() += SOFTIRQ_OFFSET
;
98 * Were softirqs turned off above:
100 if (softirq_count() == SOFTIRQ_OFFSET
)
101 trace_softirqs_off(ip
);
102 raw_local_irq_restore(flags
);
104 if (preempt_count() == SOFTIRQ_OFFSET
)
105 trace_preempt_off(CALLER_ADDR0
, get_parent_ip(CALLER_ADDR1
));
107 #else /* !CONFIG_TRACE_IRQFLAGS */
108 static inline void __local_bh_disable(unsigned long ip
)
110 add_preempt_count(SOFTIRQ_OFFSET
);
113 #endif /* CONFIG_TRACE_IRQFLAGS */
115 void local_bh_disable(void)
117 __local_bh_disable((unsigned long)__builtin_return_address(0));
120 EXPORT_SYMBOL(local_bh_disable
);
123 * Special-case - softirqs can safely be enabled in
124 * cond_resched_softirq(), or by __do_softirq(),
125 * without processing still-pending softirqs:
127 void _local_bh_enable(void)
129 WARN_ON_ONCE(in_irq());
130 WARN_ON_ONCE(!irqs_disabled());
132 if (softirq_count() == SOFTIRQ_OFFSET
)
133 trace_softirqs_on((unsigned long)__builtin_return_address(0));
134 sub_preempt_count(SOFTIRQ_OFFSET
);
137 EXPORT_SYMBOL(_local_bh_enable
);
139 static inline void _local_bh_enable_ip(unsigned long ip
)
141 WARN_ON_ONCE(in_irq() || irqs_disabled());
142 #ifdef CONFIG_TRACE_IRQFLAGS
146 * Are softirqs going to be turned on now:
148 if (softirq_count() == SOFTIRQ_OFFSET
)
149 trace_softirqs_on(ip
);
151 * Keep preemption disabled until we are done with
152 * softirq processing:
154 sub_preempt_count(SOFTIRQ_OFFSET
- 1);
156 if (unlikely(!in_interrupt() && local_softirq_pending()))
160 #ifdef CONFIG_TRACE_IRQFLAGS
163 preempt_check_resched();
166 void local_bh_enable(void)
168 _local_bh_enable_ip((unsigned long)__builtin_return_address(0));
170 EXPORT_SYMBOL(local_bh_enable
);
172 void local_bh_enable_ip(unsigned long ip
)
174 _local_bh_enable_ip(ip
);
176 EXPORT_SYMBOL(local_bh_enable_ip
);
179 * We restart softirq processing MAX_SOFTIRQ_RESTART times,
180 * and we fall back to softirqd after that.
182 * This number has been established via experimentation.
183 * The two things to balance is latency against fairness -
184 * we want to handle softirqs as soon as possible, but they
185 * should not be able to lock up the box.
187 #define MAX_SOFTIRQ_RESTART 10
189 asmlinkage
void __do_softirq(void)
191 struct softirq_action
*h
;
193 int max_restart
= MAX_SOFTIRQ_RESTART
;
196 pending
= local_softirq_pending();
197 account_system_vtime(current
);
199 __local_bh_disable((unsigned long)__builtin_return_address(0));
200 lockdep_softirq_enter();
202 cpu
= smp_processor_id();
204 /* Reset the pending bitmask before enabling irqs */
205 set_softirq_pending(0);
213 int prev_count
= preempt_count();
217 if (unlikely(prev_count
!= preempt_count())) {
218 printk(KERN_ERR
"huh, entered softirq %td %s %p"
219 "with preempt_count %08x,"
220 " exited with %08x?\n", h
- softirq_vec
,
221 softirq_to_name
[h
- softirq_vec
],
222 h
->action
, prev_count
, preempt_count());
223 preempt_count() = prev_count
;
226 rcu_bh_qsctr_inc(cpu
);
234 pending
= local_softirq_pending();
235 if (pending
&& --max_restart
)
241 lockdep_softirq_exit();
243 account_system_vtime(current
);
247 #ifndef __ARCH_HAS_DO_SOFTIRQ
249 asmlinkage
void do_softirq(void)
257 local_irq_save(flags
);
259 pending
= local_softirq_pending();
264 local_irq_restore(flags
);
270 * Enter an interrupt context.
274 int cpu
= smp_processor_id();
277 if (idle_cpu(cpu
) && !in_interrupt()) {
279 tick_check_idle(cpu
);
284 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
285 # define invoke_softirq() __do_softirq()
287 # define invoke_softirq() do_softirq()
291 * Exit an interrupt context. Process softirqs if needed and possible:
295 account_system_vtime(current
);
296 trace_hardirq_exit();
297 sub_preempt_count(IRQ_EXIT_OFFSET
);
298 if (!in_interrupt() && local_softirq_pending())
302 /* Make sure that timer wheel updates are propagated */
304 if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
305 tick_nohz_stop_sched_tick(0);
307 preempt_enable_no_resched();
311 * This function must run with irqs disabled!
313 inline void raise_softirq_irqoff(unsigned int nr
)
315 __raise_softirq_irqoff(nr
);
318 * If we're in an interrupt or softirq, we're done
319 * (this also catches softirq-disabled code). We will
320 * actually run the softirq once we return from
321 * the irq or softirq.
323 * Otherwise we wake up ksoftirqd to make sure we
324 * schedule the softirq soon.
330 void raise_softirq(unsigned int nr
)
334 local_irq_save(flags
);
335 raise_softirq_irqoff(nr
);
336 local_irq_restore(flags
);
339 void open_softirq(int nr
, void (*action
)(struct softirq_action
*))
341 softirq_vec
[nr
].action
= action
;
347 struct tasklet_struct
*head
;
348 struct tasklet_struct
**tail
;
351 static DEFINE_PER_CPU(struct tasklet_head
, tasklet_vec
);
352 static DEFINE_PER_CPU(struct tasklet_head
, tasklet_hi_vec
);
354 void __tasklet_schedule(struct tasklet_struct
*t
)
358 local_irq_save(flags
);
360 *__get_cpu_var(tasklet_vec
).tail
= t
;
361 __get_cpu_var(tasklet_vec
).tail
= &(t
->next
);
362 raise_softirq_irqoff(TASKLET_SOFTIRQ
);
363 local_irq_restore(flags
);
366 EXPORT_SYMBOL(__tasklet_schedule
);
368 void __tasklet_hi_schedule(struct tasklet_struct
*t
)
372 local_irq_save(flags
);
374 *__get_cpu_var(tasklet_hi_vec
).tail
= t
;
375 __get_cpu_var(tasklet_hi_vec
).tail
= &(t
->next
);
376 raise_softirq_irqoff(HI_SOFTIRQ
);
377 local_irq_restore(flags
);
380 EXPORT_SYMBOL(__tasklet_hi_schedule
);
382 static void tasklet_action(struct softirq_action
*a
)
384 struct tasklet_struct
*list
;
387 list
= __get_cpu_var(tasklet_vec
).head
;
388 __get_cpu_var(tasklet_vec
).head
= NULL
;
389 __get_cpu_var(tasklet_vec
).tail
= &__get_cpu_var(tasklet_vec
).head
;
393 struct tasklet_struct
*t
= list
;
397 if (tasklet_trylock(t
)) {
398 if (!atomic_read(&t
->count
)) {
399 if (!test_and_clear_bit(TASKLET_STATE_SCHED
, &t
->state
))
410 *__get_cpu_var(tasklet_vec
).tail
= t
;
411 __get_cpu_var(tasklet_vec
).tail
= &(t
->next
);
412 __raise_softirq_irqoff(TASKLET_SOFTIRQ
);
417 static void tasklet_hi_action(struct softirq_action
*a
)
419 struct tasklet_struct
*list
;
422 list
= __get_cpu_var(tasklet_hi_vec
).head
;
423 __get_cpu_var(tasklet_hi_vec
).head
= NULL
;
424 __get_cpu_var(tasklet_hi_vec
).tail
= &__get_cpu_var(tasklet_hi_vec
).head
;
428 struct tasklet_struct
*t
= list
;
432 if (tasklet_trylock(t
)) {
433 if (!atomic_read(&t
->count
)) {
434 if (!test_and_clear_bit(TASKLET_STATE_SCHED
, &t
->state
))
445 *__get_cpu_var(tasklet_hi_vec
).tail
= t
;
446 __get_cpu_var(tasklet_hi_vec
).tail
= &(t
->next
);
447 __raise_softirq_irqoff(HI_SOFTIRQ
);
453 void tasklet_init(struct tasklet_struct
*t
,
454 void (*func
)(unsigned long), unsigned long data
)
458 atomic_set(&t
->count
, 0);
463 EXPORT_SYMBOL(tasklet_init
);
465 void tasklet_kill(struct tasklet_struct
*t
)
468 printk("Attempt to kill tasklet from interrupt\n");
470 while (test_and_set_bit(TASKLET_STATE_SCHED
, &t
->state
)) {
473 while (test_bit(TASKLET_STATE_SCHED
, &t
->state
));
475 tasklet_unlock_wait(t
);
476 clear_bit(TASKLET_STATE_SCHED
, &t
->state
);
479 EXPORT_SYMBOL(tasklet_kill
);
481 DEFINE_PER_CPU(struct list_head
[NR_SOFTIRQS
], softirq_work_list
);
482 EXPORT_PER_CPU_SYMBOL(softirq_work_list
);
484 static void __local_trigger(struct call_single_data
*cp
, int softirq
)
486 struct list_head
*head
= &__get_cpu_var(softirq_work_list
[softirq
]);
488 list_add_tail(&cp
->list
, head
);
490 /* Trigger the softirq only if the list was previously empty. */
491 if (head
->next
== &cp
->list
)
492 raise_softirq_irqoff(softirq
);
495 #ifdef CONFIG_USE_GENERIC_SMP_HELPERS
496 static void remote_softirq_receive(void *data
)
498 struct call_single_data
*cp
= data
;
504 local_irq_save(flags
);
505 __local_trigger(cp
, softirq
);
506 local_irq_restore(flags
);
509 static int __try_remote_softirq(struct call_single_data
*cp
, int cpu
, int softirq
)
511 if (cpu_online(cpu
)) {
512 cp
->func
= remote_softirq_receive
;
517 __smp_call_function_single(cpu
, cp
);
522 #else /* CONFIG_USE_GENERIC_SMP_HELPERS */
523 static int __try_remote_softirq(struct call_single_data
*cp
, int cpu
, int softirq
)
530 * __send_remote_softirq - try to schedule softirq work on a remote cpu
531 * @cp: private SMP call function data area
532 * @cpu: the remote cpu
533 * @this_cpu: the currently executing cpu
534 * @softirq: the softirq for the work
536 * Attempt to schedule softirq work on a remote cpu. If this cannot be
537 * done, the work is instead queued up on the local cpu.
539 * Interrupts must be disabled.
541 void __send_remote_softirq(struct call_single_data
*cp
, int cpu
, int this_cpu
, int softirq
)
543 if (cpu
== this_cpu
|| __try_remote_softirq(cp
, cpu
, softirq
))
544 __local_trigger(cp
, softirq
);
546 EXPORT_SYMBOL(__send_remote_softirq
);
549 * send_remote_softirq - try to schedule softirq work on a remote cpu
550 * @cp: private SMP call function data area
551 * @cpu: the remote cpu
552 * @softirq: the softirq for the work
554 * Like __send_remote_softirq except that disabling interrupts and
555 * computing the current cpu is done for the caller.
557 void send_remote_softirq(struct call_single_data
*cp
, int cpu
, int softirq
)
562 local_irq_save(flags
);
563 this_cpu
= smp_processor_id();
564 __send_remote_softirq(cp
, cpu
, this_cpu
, softirq
);
565 local_irq_restore(flags
);
567 EXPORT_SYMBOL(send_remote_softirq
);
569 static int __cpuinit
remote_softirq_cpu_notify(struct notifier_block
*self
,
570 unsigned long action
, void *hcpu
)
573 * If a CPU goes away, splice its entries to the current CPU
574 * and trigger a run of the softirq
576 if (action
== CPU_DEAD
|| action
== CPU_DEAD_FROZEN
) {
577 int cpu
= (unsigned long) hcpu
;
581 for (i
= 0; i
< NR_SOFTIRQS
; i
++) {
582 struct list_head
*head
= &per_cpu(softirq_work_list
[i
], cpu
);
583 struct list_head
*local_head
;
585 if (list_empty(head
))
588 local_head
= &__get_cpu_var(softirq_work_list
[i
]);
589 list_splice_init(head
, local_head
);
590 raise_softirq_irqoff(i
);
598 static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier
= {
599 .notifier_call
= remote_softirq_cpu_notify
,
602 void __init
softirq_init(void)
606 for_each_possible_cpu(cpu
) {
609 per_cpu(tasklet_vec
, cpu
).tail
=
610 &per_cpu(tasklet_vec
, cpu
).head
;
611 per_cpu(tasklet_hi_vec
, cpu
).tail
=
612 &per_cpu(tasklet_hi_vec
, cpu
).head
;
613 for (i
= 0; i
< NR_SOFTIRQS
; i
++)
614 INIT_LIST_HEAD(&per_cpu(softirq_work_list
[i
], cpu
));
617 register_hotcpu_notifier(&remote_softirq_cpu_notifier
);
619 open_softirq(TASKLET_SOFTIRQ
, tasklet_action
);
620 open_softirq(HI_SOFTIRQ
, tasklet_hi_action
);
623 static int ksoftirqd(void * __bind_cpu
)
625 set_current_state(TASK_INTERRUPTIBLE
);
627 while (!kthread_should_stop()) {
629 if (!local_softirq_pending()) {
630 preempt_enable_no_resched();
635 __set_current_state(TASK_RUNNING
);
637 while (local_softirq_pending()) {
638 /* Preempt disable stops cpu going offline.
639 If already offline, we'll be on wrong CPU:
641 if (cpu_is_offline((long)__bind_cpu
))
644 preempt_enable_no_resched();
647 rcu_qsctr_inc((long)__bind_cpu
);
650 set_current_state(TASK_INTERRUPTIBLE
);
652 __set_current_state(TASK_RUNNING
);
657 /* Wait for kthread_stop */
658 set_current_state(TASK_INTERRUPTIBLE
);
659 while (!kthread_should_stop()) {
661 set_current_state(TASK_INTERRUPTIBLE
);
663 __set_current_state(TASK_RUNNING
);
667 #ifdef CONFIG_HOTPLUG_CPU
669 * tasklet_kill_immediate is called to remove a tasklet which can already be
670 * scheduled for execution on @cpu.
672 * Unlike tasklet_kill, this function removes the tasklet
673 * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
675 * When this function is called, @cpu must be in the CPU_DEAD state.
677 void tasklet_kill_immediate(struct tasklet_struct
*t
, unsigned int cpu
)
679 struct tasklet_struct
**i
;
681 BUG_ON(cpu_online(cpu
));
682 BUG_ON(test_bit(TASKLET_STATE_RUN
, &t
->state
));
684 if (!test_bit(TASKLET_STATE_SCHED
, &t
->state
))
687 /* CPU is dead, so no lock needed. */
688 for (i
= &per_cpu(tasklet_vec
, cpu
).head
; *i
; i
= &(*i
)->next
) {
691 /* If this was the tail element, move the tail ptr */
693 per_cpu(tasklet_vec
, cpu
).tail
= i
;
700 static void takeover_tasklets(unsigned int cpu
)
702 /* CPU is dead, so no lock needed. */
705 /* Find end, append list for that CPU. */
706 if (&per_cpu(tasklet_vec
, cpu
).head
!= per_cpu(tasklet_vec
, cpu
).tail
) {
707 *(__get_cpu_var(tasklet_vec
).tail
) = per_cpu(tasklet_vec
, cpu
).head
;
708 __get_cpu_var(tasklet_vec
).tail
= per_cpu(tasklet_vec
, cpu
).tail
;
709 per_cpu(tasklet_vec
, cpu
).head
= NULL
;
710 per_cpu(tasklet_vec
, cpu
).tail
= &per_cpu(tasklet_vec
, cpu
).head
;
712 raise_softirq_irqoff(TASKLET_SOFTIRQ
);
714 if (&per_cpu(tasklet_hi_vec
, cpu
).head
!= per_cpu(tasklet_hi_vec
, cpu
).tail
) {
715 *__get_cpu_var(tasklet_hi_vec
).tail
= per_cpu(tasklet_hi_vec
, cpu
).head
;
716 __get_cpu_var(tasklet_hi_vec
).tail
= per_cpu(tasklet_hi_vec
, cpu
).tail
;
717 per_cpu(tasklet_hi_vec
, cpu
).head
= NULL
;
718 per_cpu(tasklet_hi_vec
, cpu
).tail
= &per_cpu(tasklet_hi_vec
, cpu
).head
;
720 raise_softirq_irqoff(HI_SOFTIRQ
);
724 #endif /* CONFIG_HOTPLUG_CPU */
726 static int __cpuinit
cpu_callback(struct notifier_block
*nfb
,
727 unsigned long action
,
730 int hotcpu
= (unsigned long)hcpu
;
731 struct task_struct
*p
;
735 case CPU_UP_PREPARE_FROZEN
:
736 p
= kthread_create(ksoftirqd
, hcpu
, "ksoftirqd/%d", hotcpu
);
738 printk("ksoftirqd for %i failed\n", hotcpu
);
741 kthread_bind(p
, hotcpu
);
742 per_cpu(ksoftirqd
, hotcpu
) = p
;
745 case CPU_ONLINE_FROZEN
:
746 wake_up_process(per_cpu(ksoftirqd
, hotcpu
));
748 #ifdef CONFIG_HOTPLUG_CPU
749 case CPU_UP_CANCELED
:
750 case CPU_UP_CANCELED_FROZEN
:
751 if (!per_cpu(ksoftirqd
, hotcpu
))
753 /* Unbind so it can run. Fall thru. */
754 kthread_bind(per_cpu(ksoftirqd
, hotcpu
),
755 cpumask_any(cpu_online_mask
));
757 case CPU_DEAD_FROZEN
: {
758 struct sched_param param
= { .sched_priority
= MAX_RT_PRIO
-1 };
760 p
= per_cpu(ksoftirqd
, hotcpu
);
761 per_cpu(ksoftirqd
, hotcpu
) = NULL
;
762 sched_setscheduler_nocheck(p
, SCHED_FIFO
, ¶m
);
764 takeover_tasklets(hotcpu
);
767 #endif /* CONFIG_HOTPLUG_CPU */
772 static struct notifier_block __cpuinitdata cpu_nfb
= {
773 .notifier_call
= cpu_callback
776 static __init
int spawn_ksoftirqd(void)
778 void *cpu
= (void *)(long)smp_processor_id();
779 int err
= cpu_callback(&cpu_nfb
, CPU_UP_PREPARE
, cpu
);
781 BUG_ON(err
== NOTIFY_BAD
);
782 cpu_callback(&cpu_nfb
, CPU_ONLINE
, cpu
);
783 register_cpu_notifier(&cpu_nfb
);
786 early_initcall(spawn_ksoftirqd
);
790 * Call a function on all processors
792 int on_each_cpu(void (*func
) (void *info
), void *info
, int wait
)
797 ret
= smp_call_function(func
, info
, wait
);
804 EXPORT_SYMBOL(on_each_cpu
);
808 * [ These __weak aliases are kept in a separate compilation unit, so that
809 * GCC does not inline them incorrectly. ]
812 int __init __weak
early_irq_init(void)
817 int __init __weak
arch_probe_nr_irqs(void)
822 int __init __weak
arch_early_irq_init(void)
827 int __weak
arch_init_chip_data(struct irq_desc
*desc
, int cpu
)