2 * linux/kernel/softirq.c
4 * Copyright (C) 1992 Linus Torvalds
6 * Distribute under GPLv2.
8 * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
10 * Remote softirq infrastructure is by Jens Axboe.
13 #include <linux/module.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/interrupt.h>
16 #include <linux/init.h>
18 #include <linux/notifier.h>
19 #include <linux/percpu.h>
20 #include <linux/cpu.h>
21 #include <linux/freezer.h>
22 #include <linux/kthread.h>
23 #include <linux/rcupdate.h>
24 #include <linux/ftrace.h>
25 #include <linux/smp.h>
26 #include <linux/tick.h>
28 #define CREATE_TRACE_POINTS
29 #include <trace/events/irq.h>
33 - No shared variables, all the data are CPU local.
34 - If a softirq needs serialization, let it serialize itself
36 - Even if softirq is serialized, only local cpu is marked for
37 execution. Hence, we get something sort of weak cpu binding.
38 Though it is still not clear, will it result in better locality
42 - NET RX softirq. It is multithreaded and does not require
43 any global serialization.
44 - NET TX softirq. It kicks software netdevice queues, hence
45 it is logically serialized per device, but this serialization
46 is invisible to common code.
47 - Tasklets: serialized wrt itself.
50 #ifndef __ARCH_IRQ_STAT
51 irq_cpustat_t irq_stat
[NR_CPUS
] ____cacheline_aligned
;
52 EXPORT_SYMBOL(irq_stat
);
55 static struct softirq_action softirq_vec
[NR_SOFTIRQS
] __cacheline_aligned_in_smp
;
57 static DEFINE_PER_CPU(struct task_struct
*, ksoftirqd
);
59 char *softirq_to_name
[NR_SOFTIRQS
] = {
60 "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK",
61 "TASKLET", "SCHED", "HRTIMER", "RCU"
65 * we cannot loop indefinitely here to avoid userspace starvation,
66 * but we also don't want to introduce a worst case 1/HZ latency
67 * to the pending events, so lets the scheduler to balance
68 * the softirq load for us.
70 void wakeup_softirqd(void)
72 /* Interrupts are disabled: no need to stop preemption */
73 struct task_struct
*tsk
= __get_cpu_var(ksoftirqd
);
75 if (tsk
&& tsk
->state
!= TASK_RUNNING
)
80 * This one is for softirq.c-internal use,
81 * where hardirqs are disabled legitimately:
83 #ifdef CONFIG_TRACE_IRQFLAGS
84 static void __local_bh_disable(unsigned long ip
)
88 WARN_ON_ONCE(in_irq());
90 raw_local_irq_save(flags
);
92 * The preempt tracer hooks into add_preempt_count and will break
93 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
94 * is set and before current->softirq_enabled is cleared.
95 * We must manually increment preempt_count here and manually
96 * call the trace_preempt_off later.
98 preempt_count() += SOFTIRQ_OFFSET
;
100 * Were softirqs turned off above:
102 if (softirq_count() == SOFTIRQ_OFFSET
)
103 trace_softirqs_off(ip
);
104 raw_local_irq_restore(flags
);
106 if (preempt_count() == SOFTIRQ_OFFSET
)
107 trace_preempt_off(CALLER_ADDR0
, get_parent_ip(CALLER_ADDR1
));
109 #else /* !CONFIG_TRACE_IRQFLAGS */
110 static inline void __local_bh_disable(unsigned long ip
)
112 add_preempt_count(SOFTIRQ_OFFSET
);
115 #endif /* CONFIG_TRACE_IRQFLAGS */
117 void local_bh_disable(void)
119 __local_bh_disable((unsigned long)__builtin_return_address(0));
122 EXPORT_SYMBOL(local_bh_disable
);
125 * Special-case - softirqs can safely be enabled in
126 * cond_resched_softirq(), or by __do_softirq(),
127 * without processing still-pending softirqs:
129 void _local_bh_enable(void)
131 WARN_ON_ONCE(in_irq());
132 WARN_ON_ONCE(!irqs_disabled());
134 if (softirq_count() == SOFTIRQ_OFFSET
)
135 trace_softirqs_on((unsigned long)__builtin_return_address(0));
136 sub_preempt_count(SOFTIRQ_OFFSET
);
139 EXPORT_SYMBOL(_local_bh_enable
);
141 static inline void _local_bh_enable_ip(unsigned long ip
)
143 WARN_ON_ONCE(in_irq() || irqs_disabled());
144 #ifdef CONFIG_TRACE_IRQFLAGS
148 * Are softirqs going to be turned on now:
150 if (softirq_count() == SOFTIRQ_OFFSET
)
151 trace_softirqs_on(ip
);
153 * Keep preemption disabled until we are done with
154 * softirq processing:
156 sub_preempt_count(SOFTIRQ_OFFSET
- 1);
158 if (unlikely(!in_interrupt() && local_softirq_pending()))
162 #ifdef CONFIG_TRACE_IRQFLAGS
165 preempt_check_resched();
168 void local_bh_enable(void)
170 _local_bh_enable_ip((unsigned long)__builtin_return_address(0));
172 EXPORT_SYMBOL(local_bh_enable
);
174 void local_bh_enable_ip(unsigned long ip
)
176 _local_bh_enable_ip(ip
);
178 EXPORT_SYMBOL(local_bh_enable_ip
);
181 * We restart softirq processing MAX_SOFTIRQ_RESTART times,
182 * and we fall back to softirqd after that.
184 * This number has been established via experimentation.
185 * The two things to balance is latency against fairness -
186 * we want to handle softirqs as soon as possible, but they
187 * should not be able to lock up the box.
189 #define MAX_SOFTIRQ_RESTART 10
191 asmlinkage
void __do_softirq(void)
193 struct softirq_action
*h
;
195 int max_restart
= MAX_SOFTIRQ_RESTART
;
198 pending
= local_softirq_pending();
199 account_system_vtime(current
);
201 __local_bh_disable((unsigned long)__builtin_return_address(0));
202 lockdep_softirq_enter();
204 cpu
= smp_processor_id();
206 /* Reset the pending bitmask before enabling irqs */
207 set_softirq_pending(0);
215 int prev_count
= preempt_count();
217 trace_softirq_entry(h
, softirq_vec
);
219 trace_softirq_exit(h
, softirq_vec
);
220 if (unlikely(prev_count
!= preempt_count())) {
221 printk(KERN_ERR
"huh, entered softirq %td %s %p"
222 "with preempt_count %08x,"
223 " exited with %08x?\n", h
- softirq_vec
,
224 softirq_to_name
[h
- softirq_vec
],
225 h
->action
, prev_count
, preempt_count());
226 preempt_count() = prev_count
;
229 rcu_bh_qsctr_inc(cpu
);
237 pending
= local_softirq_pending();
238 if (pending
&& --max_restart
)
244 lockdep_softirq_exit();
246 account_system_vtime(current
);
250 #ifndef __ARCH_HAS_DO_SOFTIRQ
252 asmlinkage
void do_softirq(void)
260 local_irq_save(flags
);
262 pending
= local_softirq_pending();
267 local_irq_restore(flags
);
273 * Enter an interrupt context.
277 int cpu
= smp_processor_id();
280 if (idle_cpu(cpu
) && !in_interrupt()) {
282 tick_check_idle(cpu
);
287 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
288 # define invoke_softirq() __do_softirq()
290 # define invoke_softirq() do_softirq()
294 * Exit an interrupt context. Process softirqs if needed and possible:
298 account_system_vtime(current
);
299 trace_hardirq_exit();
300 sub_preempt_count(IRQ_EXIT_OFFSET
);
301 if (!in_interrupt() && local_softirq_pending())
305 /* Make sure that timer wheel updates are propagated */
307 if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
308 tick_nohz_stop_sched_tick(0);
310 preempt_enable_no_resched();
314 * This function must run with irqs disabled!
316 inline void raise_softirq_irqoff(unsigned int nr
)
318 __raise_softirq_irqoff(nr
);
321 * If we're in an interrupt or softirq, we're done
322 * (this also catches softirq-disabled code). We will
323 * actually run the softirq once we return from
324 * the irq or softirq.
326 * Otherwise we wake up ksoftirqd to make sure we
327 * schedule the softirq soon.
333 void raise_softirq(unsigned int nr
)
337 local_irq_save(flags
);
338 raise_softirq_irqoff(nr
);
339 local_irq_restore(flags
);
342 void open_softirq(int nr
, void (*action
)(struct softirq_action
*))
344 softirq_vec
[nr
].action
= action
;
350 struct tasklet_struct
*head
;
351 struct tasklet_struct
**tail
;
354 static DEFINE_PER_CPU(struct tasklet_head
, tasklet_vec
);
355 static DEFINE_PER_CPU(struct tasklet_head
, tasklet_hi_vec
);
357 void __tasklet_schedule(struct tasklet_struct
*t
)
361 local_irq_save(flags
);
363 *__get_cpu_var(tasklet_vec
).tail
= t
;
364 __get_cpu_var(tasklet_vec
).tail
= &(t
->next
);
365 raise_softirq_irqoff(TASKLET_SOFTIRQ
);
366 local_irq_restore(flags
);
369 EXPORT_SYMBOL(__tasklet_schedule
);
371 void __tasklet_hi_schedule(struct tasklet_struct
*t
)
375 local_irq_save(flags
);
377 *__get_cpu_var(tasklet_hi_vec
).tail
= t
;
378 __get_cpu_var(tasklet_hi_vec
).tail
= &(t
->next
);
379 raise_softirq_irqoff(HI_SOFTIRQ
);
380 local_irq_restore(flags
);
383 EXPORT_SYMBOL(__tasklet_hi_schedule
);
385 static void tasklet_action(struct softirq_action
*a
)
387 struct tasklet_struct
*list
;
390 list
= __get_cpu_var(tasklet_vec
).head
;
391 __get_cpu_var(tasklet_vec
).head
= NULL
;
392 __get_cpu_var(tasklet_vec
).tail
= &__get_cpu_var(tasklet_vec
).head
;
396 struct tasklet_struct
*t
= list
;
400 if (tasklet_trylock(t
)) {
401 if (!atomic_read(&t
->count
)) {
402 if (!test_and_clear_bit(TASKLET_STATE_SCHED
, &t
->state
))
413 *__get_cpu_var(tasklet_vec
).tail
= t
;
414 __get_cpu_var(tasklet_vec
).tail
= &(t
->next
);
415 __raise_softirq_irqoff(TASKLET_SOFTIRQ
);
420 static void tasklet_hi_action(struct softirq_action
*a
)
422 struct tasklet_struct
*list
;
425 list
= __get_cpu_var(tasklet_hi_vec
).head
;
426 __get_cpu_var(tasklet_hi_vec
).head
= NULL
;
427 __get_cpu_var(tasklet_hi_vec
).tail
= &__get_cpu_var(tasklet_hi_vec
).head
;
431 struct tasklet_struct
*t
= list
;
435 if (tasklet_trylock(t
)) {
436 if (!atomic_read(&t
->count
)) {
437 if (!test_and_clear_bit(TASKLET_STATE_SCHED
, &t
->state
))
448 *__get_cpu_var(tasklet_hi_vec
).tail
= t
;
449 __get_cpu_var(tasklet_hi_vec
).tail
= &(t
->next
);
450 __raise_softirq_irqoff(HI_SOFTIRQ
);
456 void tasklet_init(struct tasklet_struct
*t
,
457 void (*func
)(unsigned long), unsigned long data
)
461 atomic_set(&t
->count
, 0);
466 EXPORT_SYMBOL(tasklet_init
);
468 void tasklet_kill(struct tasklet_struct
*t
)
471 printk("Attempt to kill tasklet from interrupt\n");
473 while (test_and_set_bit(TASKLET_STATE_SCHED
, &t
->state
)) {
476 } while (test_bit(TASKLET_STATE_SCHED
, &t
->state
));
478 tasklet_unlock_wait(t
);
479 clear_bit(TASKLET_STATE_SCHED
, &t
->state
);
482 EXPORT_SYMBOL(tasklet_kill
);
484 DEFINE_PER_CPU(struct list_head
[NR_SOFTIRQS
], softirq_work_list
);
485 EXPORT_PER_CPU_SYMBOL(softirq_work_list
);
487 static void __local_trigger(struct call_single_data
*cp
, int softirq
)
489 struct list_head
*head
= &__get_cpu_var(softirq_work_list
[softirq
]);
491 list_add_tail(&cp
->list
, head
);
493 /* Trigger the softirq only if the list was previously empty. */
494 if (head
->next
== &cp
->list
)
495 raise_softirq_irqoff(softirq
);
498 #ifdef CONFIG_USE_GENERIC_SMP_HELPERS
499 static void remote_softirq_receive(void *data
)
501 struct call_single_data
*cp
= data
;
507 local_irq_save(flags
);
508 __local_trigger(cp
, softirq
);
509 local_irq_restore(flags
);
512 static int __try_remote_softirq(struct call_single_data
*cp
, int cpu
, int softirq
)
514 if (cpu_online(cpu
)) {
515 cp
->func
= remote_softirq_receive
;
520 __smp_call_function_single(cpu
, cp
, 0);
525 #else /* CONFIG_USE_GENERIC_SMP_HELPERS */
526 static int __try_remote_softirq(struct call_single_data
*cp
, int cpu
, int softirq
)
533 * __send_remote_softirq - try to schedule softirq work on a remote cpu
534 * @cp: private SMP call function data area
535 * @cpu: the remote cpu
536 * @this_cpu: the currently executing cpu
537 * @softirq: the softirq for the work
539 * Attempt to schedule softirq work on a remote cpu. If this cannot be
540 * done, the work is instead queued up on the local cpu.
542 * Interrupts must be disabled.
544 void __send_remote_softirq(struct call_single_data
*cp
, int cpu
, int this_cpu
, int softirq
)
546 if (cpu
== this_cpu
|| __try_remote_softirq(cp
, cpu
, softirq
))
547 __local_trigger(cp
, softirq
);
549 EXPORT_SYMBOL(__send_remote_softirq
);
552 * send_remote_softirq - try to schedule softirq work on a remote cpu
553 * @cp: private SMP call function data area
554 * @cpu: the remote cpu
555 * @softirq: the softirq for the work
557 * Like __send_remote_softirq except that disabling interrupts and
558 * computing the current cpu is done for the caller.
560 void send_remote_softirq(struct call_single_data
*cp
, int cpu
, int softirq
)
565 local_irq_save(flags
);
566 this_cpu
= smp_processor_id();
567 __send_remote_softirq(cp
, cpu
, this_cpu
, softirq
);
568 local_irq_restore(flags
);
570 EXPORT_SYMBOL(send_remote_softirq
);
572 static int __cpuinit
remote_softirq_cpu_notify(struct notifier_block
*self
,
573 unsigned long action
, void *hcpu
)
576 * If a CPU goes away, splice its entries to the current CPU
577 * and trigger a run of the softirq
579 if (action
== CPU_DEAD
|| action
== CPU_DEAD_FROZEN
) {
580 int cpu
= (unsigned long) hcpu
;
584 for (i
= 0; i
< NR_SOFTIRQS
; i
++) {
585 struct list_head
*head
= &per_cpu(softirq_work_list
[i
], cpu
);
586 struct list_head
*local_head
;
588 if (list_empty(head
))
591 local_head
= &__get_cpu_var(softirq_work_list
[i
]);
592 list_splice_init(head
, local_head
);
593 raise_softirq_irqoff(i
);
601 static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier
= {
602 .notifier_call
= remote_softirq_cpu_notify
,
605 void __init
softirq_init(void)
609 for_each_possible_cpu(cpu
) {
612 per_cpu(tasklet_vec
, cpu
).tail
=
613 &per_cpu(tasklet_vec
, cpu
).head
;
614 per_cpu(tasklet_hi_vec
, cpu
).tail
=
615 &per_cpu(tasklet_hi_vec
, cpu
).head
;
616 for (i
= 0; i
< NR_SOFTIRQS
; i
++)
617 INIT_LIST_HEAD(&per_cpu(softirq_work_list
[i
], cpu
));
620 register_hotcpu_notifier(&remote_softirq_cpu_notifier
);
622 open_softirq(TASKLET_SOFTIRQ
, tasklet_action
);
623 open_softirq(HI_SOFTIRQ
, tasklet_hi_action
);
626 static int ksoftirqd(void * __bind_cpu
)
628 set_current_state(TASK_INTERRUPTIBLE
);
630 while (!kthread_should_stop()) {
632 if (!local_softirq_pending()) {
633 preempt_enable_no_resched();
638 __set_current_state(TASK_RUNNING
);
640 while (local_softirq_pending()) {
641 /* Preempt disable stops cpu going offline.
642 If already offline, we'll be on wrong CPU:
644 if (cpu_is_offline((long)__bind_cpu
))
647 preempt_enable_no_resched();
650 rcu_qsctr_inc((long)__bind_cpu
);
653 set_current_state(TASK_INTERRUPTIBLE
);
655 __set_current_state(TASK_RUNNING
);
660 /* Wait for kthread_stop */
661 set_current_state(TASK_INTERRUPTIBLE
);
662 while (!kthread_should_stop()) {
664 set_current_state(TASK_INTERRUPTIBLE
);
666 __set_current_state(TASK_RUNNING
);
670 #ifdef CONFIG_HOTPLUG_CPU
672 * tasklet_kill_immediate is called to remove a tasklet which can already be
673 * scheduled for execution on @cpu.
675 * Unlike tasklet_kill, this function removes the tasklet
676 * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
678 * When this function is called, @cpu must be in the CPU_DEAD state.
680 void tasklet_kill_immediate(struct tasklet_struct
*t
, unsigned int cpu
)
682 struct tasklet_struct
**i
;
684 BUG_ON(cpu_online(cpu
));
685 BUG_ON(test_bit(TASKLET_STATE_RUN
, &t
->state
));
687 if (!test_bit(TASKLET_STATE_SCHED
, &t
->state
))
690 /* CPU is dead, so no lock needed. */
691 for (i
= &per_cpu(tasklet_vec
, cpu
).head
; *i
; i
= &(*i
)->next
) {
694 /* If this was the tail element, move the tail ptr */
696 per_cpu(tasklet_vec
, cpu
).tail
= i
;
703 static void takeover_tasklets(unsigned int cpu
)
705 /* CPU is dead, so no lock needed. */
708 /* Find end, append list for that CPU. */
709 if (&per_cpu(tasklet_vec
, cpu
).head
!= per_cpu(tasklet_vec
, cpu
).tail
) {
710 *(__get_cpu_var(tasklet_vec
).tail
) = per_cpu(tasklet_vec
, cpu
).head
;
711 __get_cpu_var(tasklet_vec
).tail
= per_cpu(tasklet_vec
, cpu
).tail
;
712 per_cpu(tasklet_vec
, cpu
).head
= NULL
;
713 per_cpu(tasklet_vec
, cpu
).tail
= &per_cpu(tasklet_vec
, cpu
).head
;
715 raise_softirq_irqoff(TASKLET_SOFTIRQ
);
717 if (&per_cpu(tasklet_hi_vec
, cpu
).head
!= per_cpu(tasklet_hi_vec
, cpu
).tail
) {
718 *__get_cpu_var(tasklet_hi_vec
).tail
= per_cpu(tasklet_hi_vec
, cpu
).head
;
719 __get_cpu_var(tasklet_hi_vec
).tail
= per_cpu(tasklet_hi_vec
, cpu
).tail
;
720 per_cpu(tasklet_hi_vec
, cpu
).head
= NULL
;
721 per_cpu(tasklet_hi_vec
, cpu
).tail
= &per_cpu(tasklet_hi_vec
, cpu
).head
;
723 raise_softirq_irqoff(HI_SOFTIRQ
);
727 #endif /* CONFIG_HOTPLUG_CPU */
729 static int __cpuinit
cpu_callback(struct notifier_block
*nfb
,
730 unsigned long action
,
733 int hotcpu
= (unsigned long)hcpu
;
734 struct task_struct
*p
;
738 case CPU_UP_PREPARE_FROZEN
:
739 p
= kthread_create(ksoftirqd
, hcpu
, "ksoftirqd/%d", hotcpu
);
741 printk("ksoftirqd for %i failed\n", hotcpu
);
744 kthread_bind(p
, hotcpu
);
745 per_cpu(ksoftirqd
, hotcpu
) = p
;
748 case CPU_ONLINE_FROZEN
:
749 wake_up_process(per_cpu(ksoftirqd
, hotcpu
));
751 #ifdef CONFIG_HOTPLUG_CPU
752 case CPU_UP_CANCELED
:
753 case CPU_UP_CANCELED_FROZEN
:
754 if (!per_cpu(ksoftirqd
, hotcpu
))
756 /* Unbind so it can run. Fall thru. */
757 kthread_bind(per_cpu(ksoftirqd
, hotcpu
),
758 cpumask_any(cpu_online_mask
));
760 case CPU_DEAD_FROZEN
: {
761 struct sched_param param
= { .sched_priority
= MAX_RT_PRIO
-1 };
763 p
= per_cpu(ksoftirqd
, hotcpu
);
764 per_cpu(ksoftirqd
, hotcpu
) = NULL
;
765 sched_setscheduler_nocheck(p
, SCHED_FIFO
, ¶m
);
767 takeover_tasklets(hotcpu
);
770 #endif /* CONFIG_HOTPLUG_CPU */
775 static struct notifier_block __cpuinitdata cpu_nfb
= {
776 .notifier_call
= cpu_callback
779 static __init
int spawn_ksoftirqd(void)
781 void *cpu
= (void *)(long)smp_processor_id();
782 int err
= cpu_callback(&cpu_nfb
, CPU_UP_PREPARE
, cpu
);
784 BUG_ON(err
== NOTIFY_BAD
);
785 cpu_callback(&cpu_nfb
, CPU_ONLINE
, cpu
);
786 register_cpu_notifier(&cpu_nfb
);
789 early_initcall(spawn_ksoftirqd
);
793 * Call a function on all processors
795 int on_each_cpu(void (*func
) (void *info
), void *info
, int wait
)
800 ret
= smp_call_function(func
, info
, wait
);
807 EXPORT_SYMBOL(on_each_cpu
);
811 * [ These __weak aliases are kept in a separate compilation unit, so that
812 * GCC does not inline them incorrectly. ]
815 int __init __weak
early_irq_init(void)
820 int __init __weak
arch_probe_nr_irqs(void)
825 int __init __weak
arch_early_irq_init(void)
830 int __weak
arch_init_chip_data(struct irq_desc
*desc
, int node
)