[PATCH] new scheme to preempt swap token
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / kernel / workqueue.c
blob8d1e7cb8a51a85327bc84a61705a7fe91881228e
1 /*
2 * linux/kernel/workqueue.c
4 * Generic mechanism for defining kernel helper threads for running
5 * arbitrary tasks in process context.
7 * Started by Ingo Molnar, Copyright (C) 2002
9 * Derived from the taskqueue/keventd code by:
11 * David Woodhouse <dwmw2@infradead.org>
12 * Andrew Morton <andrewm@uow.edu.au>
13 * Kai Petzke <wpp@marie.physik.tu-berlin.de>
14 * Theodore Ts'o <tytso@mit.edu>
16 * Made to use alloc_percpu by Christoph Lameter <clameter@sgi.com>.
19 #include <linux/module.h>
20 #include <linux/kernel.h>
21 #include <linux/sched.h>
22 #include <linux/init.h>
23 #include <linux/signal.h>
24 #include <linux/completion.h>
25 #include <linux/workqueue.h>
26 #include <linux/slab.h>
27 #include <linux/cpu.h>
28 #include <linux/notifier.h>
29 #include <linux/kthread.h>
30 #include <linux/hardirq.h>
31 #include <linux/mempolicy.h>
34 * The per-CPU workqueue (if single thread, we always use the first
35 * possible cpu).
37 * The sequence counters are for flush_scheduled_work(). It wants to wait
38 * until all currently-scheduled works are completed, but it doesn't
39 * want to be livelocked by new, incoming ones. So it waits until
40 * remove_sequence is >= the insert_sequence which pertained when
41 * flush_scheduled_work() was called.
43 struct cpu_workqueue_struct {
45 spinlock_t lock;
47 long remove_sequence; /* Least-recently added (next to run) */
48 long insert_sequence; /* Next to add */
50 struct list_head worklist;
51 wait_queue_head_t more_work;
52 wait_queue_head_t work_done;
54 struct workqueue_struct *wq;
55 struct task_struct *thread;
57 int run_depth; /* Detect run_workqueue() recursion depth */
58 } ____cacheline_aligned;
61 * The externally visible workqueue abstraction is an array of
62 * per-CPU workqueues:
64 struct workqueue_struct {
65 struct cpu_workqueue_struct *cpu_wq;
66 const char *name;
67 struct list_head list; /* Empty if single thread */
70 /* All the per-cpu workqueues on the system, for hotplug cpu to add/remove
71 threads to each one as cpus come/go. */
72 static DEFINE_MUTEX(workqueue_mutex);
73 static LIST_HEAD(workqueues);
75 static int singlethread_cpu;
77 /* If it's single threaded, it isn't in the list of workqueues. */
78 static inline int is_single_threaded(struct workqueue_struct *wq)
80 return list_empty(&wq->list);
83 static inline void set_wq_data(struct work_struct *work, void *wq)
85 unsigned long new, old, res;
87 /* assume the pending flag is already set and that the task has already
88 * been queued on this workqueue */
89 new = (unsigned long) wq | (1UL << WORK_STRUCT_PENDING);
90 res = work->management;
91 if (res != new) {
92 do {
93 old = res;
94 new = (unsigned long) wq;
95 new |= (old & WORK_STRUCT_FLAG_MASK);
96 res = cmpxchg(&work->management, old, new);
97 } while (res != old);
101 static inline void *get_wq_data(struct work_struct *work)
103 return (void *) (work->management & WORK_STRUCT_WQ_DATA_MASK);
106 /* Preempt must be disabled. */
107 static void __queue_work(struct cpu_workqueue_struct *cwq,
108 struct work_struct *work)
110 unsigned long flags;
112 spin_lock_irqsave(&cwq->lock, flags);
113 set_wq_data(work, cwq);
114 list_add_tail(&work->entry, &cwq->worklist);
115 cwq->insert_sequence++;
116 wake_up(&cwq->more_work);
117 spin_unlock_irqrestore(&cwq->lock, flags);
121 * queue_work - queue work on a workqueue
122 * @wq: workqueue to use
123 * @work: work to queue
125 * Returns 0 if @work was already on a queue, non-zero otherwise.
127 * We queue the work to the CPU it was submitted, but there is no
128 * guarantee that it will be processed by that CPU.
130 int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work)
132 int ret = 0, cpu = get_cpu();
134 if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) {
135 if (unlikely(is_single_threaded(wq)))
136 cpu = singlethread_cpu;
137 BUG_ON(!list_empty(&work->entry));
138 __queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
139 ret = 1;
141 put_cpu();
142 return ret;
144 EXPORT_SYMBOL_GPL(queue_work);
146 static void delayed_work_timer_fn(unsigned long __data)
148 struct delayed_work *dwork = (struct delayed_work *)__data;
149 struct workqueue_struct *wq = get_wq_data(&dwork->work);
150 int cpu = smp_processor_id();
152 if (unlikely(is_single_threaded(wq)))
153 cpu = singlethread_cpu;
155 __queue_work(per_cpu_ptr(wq->cpu_wq, cpu), &dwork->work);
159 * queue_delayed_work - queue work on a workqueue after delay
160 * @wq: workqueue to use
161 * @work: delayable work to queue
162 * @delay: number of jiffies to wait before queueing
164 * Returns 0 if @work was already on a queue, non-zero otherwise.
166 int fastcall queue_delayed_work(struct workqueue_struct *wq,
167 struct delayed_work *dwork, unsigned long delay)
169 int ret = 0;
170 struct timer_list *timer = &dwork->timer;
171 struct work_struct *work = &dwork->work;
173 if (delay == 0)
174 return queue_work(wq, work);
176 if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) {
177 BUG_ON(timer_pending(timer));
178 BUG_ON(!list_empty(&work->entry));
180 /* This stores wq for the moment, for the timer_fn */
181 set_wq_data(work, wq);
182 timer->expires = jiffies + delay;
183 timer->data = (unsigned long)dwork;
184 timer->function = delayed_work_timer_fn;
185 add_timer(timer);
186 ret = 1;
188 return ret;
190 EXPORT_SYMBOL_GPL(queue_delayed_work);
193 * queue_delayed_work_on - queue work on specific CPU after delay
194 * @cpu: CPU number to execute work on
195 * @wq: workqueue to use
196 * @work: work to queue
197 * @delay: number of jiffies to wait before queueing
199 * Returns 0 if @work was already on a queue, non-zero otherwise.
201 int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
202 struct delayed_work *dwork, unsigned long delay)
204 int ret = 0;
205 struct timer_list *timer = &dwork->timer;
206 struct work_struct *work = &dwork->work;
208 if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) {
209 BUG_ON(timer_pending(timer));
210 BUG_ON(!list_empty(&work->entry));
212 /* This stores wq for the moment, for the timer_fn */
213 set_wq_data(work, wq);
214 timer->expires = jiffies + delay;
215 timer->data = (unsigned long)dwork;
216 timer->function = delayed_work_timer_fn;
217 add_timer_on(timer, cpu);
218 ret = 1;
220 return ret;
222 EXPORT_SYMBOL_GPL(queue_delayed_work_on);
224 static void run_workqueue(struct cpu_workqueue_struct *cwq)
226 unsigned long flags;
229 * Keep taking off work from the queue until
230 * done.
232 spin_lock_irqsave(&cwq->lock, flags);
233 cwq->run_depth++;
234 if (cwq->run_depth > 3) {
235 /* morton gets to eat his hat */
236 printk("%s: recursion depth exceeded: %d\n",
237 __FUNCTION__, cwq->run_depth);
238 dump_stack();
240 while (!list_empty(&cwq->worklist)) {
241 struct work_struct *work = list_entry(cwq->worklist.next,
242 struct work_struct, entry);
243 work_func_t f = work->func;
245 list_del_init(cwq->worklist.next);
246 spin_unlock_irqrestore(&cwq->lock, flags);
248 BUG_ON(get_wq_data(work) != cwq);
249 if (!test_bit(WORK_STRUCT_NOAUTOREL, &work->management))
250 work_release(work);
251 f(work);
253 spin_lock_irqsave(&cwq->lock, flags);
254 cwq->remove_sequence++;
255 wake_up(&cwq->work_done);
257 cwq->run_depth--;
258 spin_unlock_irqrestore(&cwq->lock, flags);
261 static int worker_thread(void *__cwq)
263 struct cpu_workqueue_struct *cwq = __cwq;
264 DECLARE_WAITQUEUE(wait, current);
265 struct k_sigaction sa;
266 sigset_t blocked;
268 current->flags |= PF_NOFREEZE;
270 set_user_nice(current, -5);
272 /* Block and flush all signals */
273 sigfillset(&blocked);
274 sigprocmask(SIG_BLOCK, &blocked, NULL);
275 flush_signals(current);
278 * We inherited MPOL_INTERLEAVE from the booting kernel.
279 * Set MPOL_DEFAULT to insure node local allocations.
281 numa_default_policy();
283 /* SIG_IGN makes children autoreap: see do_notify_parent(). */
284 sa.sa.sa_handler = SIG_IGN;
285 sa.sa.sa_flags = 0;
286 siginitset(&sa.sa.sa_mask, sigmask(SIGCHLD));
287 do_sigaction(SIGCHLD, &sa, (struct k_sigaction *)0);
289 set_current_state(TASK_INTERRUPTIBLE);
290 while (!kthread_should_stop()) {
291 add_wait_queue(&cwq->more_work, &wait);
292 if (list_empty(&cwq->worklist))
293 schedule();
294 else
295 __set_current_state(TASK_RUNNING);
296 remove_wait_queue(&cwq->more_work, &wait);
298 if (!list_empty(&cwq->worklist))
299 run_workqueue(cwq);
300 set_current_state(TASK_INTERRUPTIBLE);
302 __set_current_state(TASK_RUNNING);
303 return 0;
306 static void flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
308 if (cwq->thread == current) {
310 * Probably keventd trying to flush its own queue. So simply run
311 * it by hand rather than deadlocking.
313 run_workqueue(cwq);
314 } else {
315 DEFINE_WAIT(wait);
316 long sequence_needed;
318 spin_lock_irq(&cwq->lock);
319 sequence_needed = cwq->insert_sequence;
321 while (sequence_needed - cwq->remove_sequence > 0) {
322 prepare_to_wait(&cwq->work_done, &wait,
323 TASK_UNINTERRUPTIBLE);
324 spin_unlock_irq(&cwq->lock);
325 schedule();
326 spin_lock_irq(&cwq->lock);
328 finish_wait(&cwq->work_done, &wait);
329 spin_unlock_irq(&cwq->lock);
334 * flush_workqueue - ensure that any scheduled work has run to completion.
335 * @wq: workqueue to flush
337 * Forces execution of the workqueue and blocks until its completion.
338 * This is typically used in driver shutdown handlers.
340 * This function will sample each workqueue's current insert_sequence number and
341 * will sleep until the head sequence is greater than or equal to that. This
342 * means that we sleep until all works which were queued on entry have been
343 * handled, but we are not livelocked by new incoming ones.
345 * This function used to run the workqueues itself. Now we just wait for the
346 * helper threads to do it.
348 void fastcall flush_workqueue(struct workqueue_struct *wq)
350 might_sleep();
352 if (is_single_threaded(wq)) {
353 /* Always use first cpu's area. */
354 flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, singlethread_cpu));
355 } else {
356 int cpu;
358 mutex_lock(&workqueue_mutex);
359 for_each_online_cpu(cpu)
360 flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
361 mutex_unlock(&workqueue_mutex);
364 EXPORT_SYMBOL_GPL(flush_workqueue);
366 static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq,
367 int cpu)
369 struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu);
370 struct task_struct *p;
372 spin_lock_init(&cwq->lock);
373 cwq->wq = wq;
374 cwq->thread = NULL;
375 cwq->insert_sequence = 0;
376 cwq->remove_sequence = 0;
377 INIT_LIST_HEAD(&cwq->worklist);
378 init_waitqueue_head(&cwq->more_work);
379 init_waitqueue_head(&cwq->work_done);
381 if (is_single_threaded(wq))
382 p = kthread_create(worker_thread, cwq, "%s", wq->name);
383 else
384 p = kthread_create(worker_thread, cwq, "%s/%d", wq->name, cpu);
385 if (IS_ERR(p))
386 return NULL;
387 cwq->thread = p;
388 return p;
391 struct workqueue_struct *__create_workqueue(const char *name,
392 int singlethread)
394 int cpu, destroy = 0;
395 struct workqueue_struct *wq;
396 struct task_struct *p;
398 wq = kzalloc(sizeof(*wq), GFP_KERNEL);
399 if (!wq)
400 return NULL;
402 wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct);
403 if (!wq->cpu_wq) {
404 kfree(wq);
405 return NULL;
408 wq->name = name;
409 mutex_lock(&workqueue_mutex);
410 if (singlethread) {
411 INIT_LIST_HEAD(&wq->list);
412 p = create_workqueue_thread(wq, singlethread_cpu);
413 if (!p)
414 destroy = 1;
415 else
416 wake_up_process(p);
417 } else {
418 list_add(&wq->list, &workqueues);
419 for_each_online_cpu(cpu) {
420 p = create_workqueue_thread(wq, cpu);
421 if (p) {
422 kthread_bind(p, cpu);
423 wake_up_process(p);
424 } else
425 destroy = 1;
428 mutex_unlock(&workqueue_mutex);
431 * Was there any error during startup? If yes then clean up:
433 if (destroy) {
434 destroy_workqueue(wq);
435 wq = NULL;
437 return wq;
439 EXPORT_SYMBOL_GPL(__create_workqueue);
441 static void cleanup_workqueue_thread(struct workqueue_struct *wq, int cpu)
443 struct cpu_workqueue_struct *cwq;
444 unsigned long flags;
445 struct task_struct *p;
447 cwq = per_cpu_ptr(wq->cpu_wq, cpu);
448 spin_lock_irqsave(&cwq->lock, flags);
449 p = cwq->thread;
450 cwq->thread = NULL;
451 spin_unlock_irqrestore(&cwq->lock, flags);
452 if (p)
453 kthread_stop(p);
457 * destroy_workqueue - safely terminate a workqueue
458 * @wq: target workqueue
460 * Safely destroy a workqueue. All work currently pending will be done first.
462 void destroy_workqueue(struct workqueue_struct *wq)
464 int cpu;
466 flush_workqueue(wq);
468 /* We don't need the distraction of CPUs appearing and vanishing. */
469 mutex_lock(&workqueue_mutex);
470 if (is_single_threaded(wq))
471 cleanup_workqueue_thread(wq, singlethread_cpu);
472 else {
473 for_each_online_cpu(cpu)
474 cleanup_workqueue_thread(wq, cpu);
475 list_del(&wq->list);
477 mutex_unlock(&workqueue_mutex);
478 free_percpu(wq->cpu_wq);
479 kfree(wq);
481 EXPORT_SYMBOL_GPL(destroy_workqueue);
483 static struct workqueue_struct *keventd_wq;
486 * schedule_work - put work task in global workqueue
487 * @work: job to be done
489 * This puts a job in the kernel-global workqueue.
491 int fastcall schedule_work(struct work_struct *work)
493 return queue_work(keventd_wq, work);
495 EXPORT_SYMBOL(schedule_work);
498 * schedule_delayed_work - put work task in global workqueue after delay
499 * @dwork: job to be done
500 * @delay: number of jiffies to wait or 0 for immediate execution
502 * After waiting for a given time this puts a job in the kernel-global
503 * workqueue.
505 int fastcall schedule_delayed_work(struct delayed_work *dwork, unsigned long delay)
507 return queue_delayed_work(keventd_wq, dwork, delay);
509 EXPORT_SYMBOL(schedule_delayed_work);
512 * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
513 * @cpu: cpu to use
514 * @dwork: job to be done
515 * @delay: number of jiffies to wait
517 * After waiting for a given time this puts a job in the kernel-global
518 * workqueue on the specified CPU.
520 int schedule_delayed_work_on(int cpu,
521 struct delayed_work *dwork, unsigned long delay)
523 return queue_delayed_work_on(cpu, keventd_wq, dwork, delay);
525 EXPORT_SYMBOL(schedule_delayed_work_on);
528 * schedule_on_each_cpu - call a function on each online CPU from keventd
529 * @func: the function to call
531 * Returns zero on success.
532 * Returns -ve errno on failure.
534 * Appears to be racy against CPU hotplug.
536 * schedule_on_each_cpu() is very slow.
538 int schedule_on_each_cpu(work_func_t func)
540 int cpu;
541 struct work_struct *works;
543 works = alloc_percpu(struct work_struct);
544 if (!works)
545 return -ENOMEM;
547 mutex_lock(&workqueue_mutex);
548 for_each_online_cpu(cpu) {
549 INIT_WORK(per_cpu_ptr(works, cpu), func);
550 __queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu),
551 per_cpu_ptr(works, cpu));
553 mutex_unlock(&workqueue_mutex);
554 flush_workqueue(keventd_wq);
555 free_percpu(works);
556 return 0;
559 void flush_scheduled_work(void)
561 flush_workqueue(keventd_wq);
563 EXPORT_SYMBOL(flush_scheduled_work);
566 * cancel_rearming_delayed_workqueue - reliably kill off a delayed
567 * work whose handler rearms the delayed work.
568 * @wq: the controlling workqueue structure
569 * @dwork: the delayed work struct
571 void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq,
572 struct delayed_work *dwork)
574 while (!cancel_delayed_work(dwork))
575 flush_workqueue(wq);
577 EXPORT_SYMBOL(cancel_rearming_delayed_workqueue);
580 * cancel_rearming_delayed_work - reliably kill off a delayed keventd
581 * work whose handler rearms the delayed work.
582 * @dwork: the delayed work struct
584 void cancel_rearming_delayed_work(struct delayed_work *dwork)
586 cancel_rearming_delayed_workqueue(keventd_wq, dwork);
588 EXPORT_SYMBOL(cancel_rearming_delayed_work);
591 * execute_in_process_context - reliably execute the routine with user context
592 * @fn: the function to execute
593 * @ew: guaranteed storage for the execute work structure (must
594 * be available when the work executes)
596 * Executes the function immediately if process context is available,
597 * otherwise schedules the function for delayed execution.
599 * Returns: 0 - function was executed
600 * 1 - function was scheduled for execution
602 int execute_in_process_context(work_func_t fn, struct execute_work *ew)
604 if (!in_interrupt()) {
605 fn(&ew->work);
606 return 0;
609 INIT_WORK(&ew->work, fn);
610 schedule_work(&ew->work);
612 return 1;
614 EXPORT_SYMBOL_GPL(execute_in_process_context);
616 int keventd_up(void)
618 return keventd_wq != NULL;
621 int current_is_keventd(void)
623 struct cpu_workqueue_struct *cwq;
624 int cpu = smp_processor_id(); /* preempt-safe: keventd is per-cpu */
625 int ret = 0;
627 BUG_ON(!keventd_wq);
629 cwq = per_cpu_ptr(keventd_wq->cpu_wq, cpu);
630 if (current == cwq->thread)
631 ret = 1;
633 return ret;
637 #ifdef CONFIG_HOTPLUG_CPU
638 /* Take the work from this (downed) CPU. */
639 static void take_over_work(struct workqueue_struct *wq, unsigned int cpu)
641 struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu);
642 struct list_head list;
643 struct work_struct *work;
645 spin_lock_irq(&cwq->lock);
646 list_replace_init(&cwq->worklist, &list);
648 while (!list_empty(&list)) {
649 printk("Taking work for %s\n", wq->name);
650 work = list_entry(list.next,struct work_struct,entry);
651 list_del(&work->entry);
652 __queue_work(per_cpu_ptr(wq->cpu_wq, smp_processor_id()), work);
654 spin_unlock_irq(&cwq->lock);
657 /* We're holding the cpucontrol mutex here */
658 static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
659 unsigned long action,
660 void *hcpu)
662 unsigned int hotcpu = (unsigned long)hcpu;
663 struct workqueue_struct *wq;
665 switch (action) {
666 case CPU_UP_PREPARE:
667 mutex_lock(&workqueue_mutex);
668 /* Create a new workqueue thread for it. */
669 list_for_each_entry(wq, &workqueues, list) {
670 if (!create_workqueue_thread(wq, hotcpu)) {
671 printk("workqueue for %i failed\n", hotcpu);
672 return NOTIFY_BAD;
675 break;
677 case CPU_ONLINE:
678 /* Kick off worker threads. */
679 list_for_each_entry(wq, &workqueues, list) {
680 struct cpu_workqueue_struct *cwq;
682 cwq = per_cpu_ptr(wq->cpu_wq, hotcpu);
683 kthread_bind(cwq->thread, hotcpu);
684 wake_up_process(cwq->thread);
686 mutex_unlock(&workqueue_mutex);
687 break;
689 case CPU_UP_CANCELED:
690 list_for_each_entry(wq, &workqueues, list) {
691 if (!per_cpu_ptr(wq->cpu_wq, hotcpu)->thread)
692 continue;
693 /* Unbind so it can run. */
694 kthread_bind(per_cpu_ptr(wq->cpu_wq, hotcpu)->thread,
695 any_online_cpu(cpu_online_map));
696 cleanup_workqueue_thread(wq, hotcpu);
698 mutex_unlock(&workqueue_mutex);
699 break;
701 case CPU_DOWN_PREPARE:
702 mutex_lock(&workqueue_mutex);
703 break;
705 case CPU_DOWN_FAILED:
706 mutex_unlock(&workqueue_mutex);
707 break;
709 case CPU_DEAD:
710 list_for_each_entry(wq, &workqueues, list)
711 cleanup_workqueue_thread(wq, hotcpu);
712 list_for_each_entry(wq, &workqueues, list)
713 take_over_work(wq, hotcpu);
714 mutex_unlock(&workqueue_mutex);
715 break;
718 return NOTIFY_OK;
720 #endif
722 void init_workqueues(void)
724 singlethread_cpu = first_cpu(cpu_possible_map);
725 hotcpu_notifier(workqueue_cpu_callback, 0);
726 keventd_wq = create_workqueue("events");
727 BUG_ON(!keventd_wq);