alpha: defconfig: Cleanup from old Kconfig options
[linux-2.6/btrfs-unstable.git] / kernel / kmod.c
blob2f37acde640b6c6e2ce73933a52dbad65f014cf3
1 /*
2 kmod, the new module loader (replaces kerneld)
3 Kirk Petersen
5 Reorganized not to be a daemon by Adam Richter, with guidance
6 from Greg Zornetzer.
8 Modified to avoid chroot and file sharing problems.
9 Mikael Pettersson
11 Limit the concurrent number of kmod modprobes to catch loops from
12 "modprobe needs a service that is in a module".
13 Keith Owens <kaos@ocs.com.au> December 1999
15 Unblock all signals when we exec a usermode process.
16 Shuu Yamaguchi <shuu@wondernetworkresources.com> December 2000
18 call_usermodehelper wait flag, and remove exec_usermodehelper.
19 Rusty Russell <rusty@rustcorp.com.au> Jan 2003
21 #include <linux/module.h>
22 #include <linux/sched.h>
23 #include <linux/sched/task.h>
24 #include <linux/binfmts.h>
25 #include <linux/syscalls.h>
26 #include <linux/unistd.h>
27 #include <linux/kmod.h>
28 #include <linux/slab.h>
29 #include <linux/completion.h>
30 #include <linux/cred.h>
31 #include <linux/file.h>
32 #include <linux/fdtable.h>
33 #include <linux/workqueue.h>
34 #include <linux/security.h>
35 #include <linux/mount.h>
36 #include <linux/kernel.h>
37 #include <linux/init.h>
38 #include <linux/resource.h>
39 #include <linux/notifier.h>
40 #include <linux/suspend.h>
41 #include <linux/rwsem.h>
42 #include <linux/ptrace.h>
43 #include <linux/async.h>
44 #include <linux/uaccess.h>
46 #include <trace/events/module.h>
48 #define CAP_BSET (void *)1
49 #define CAP_PI (void *)2
51 static kernel_cap_t usermodehelper_bset = CAP_FULL_SET;
52 static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET;
53 static DEFINE_SPINLOCK(umh_sysctl_lock);
54 static DECLARE_RWSEM(umhelper_sem);
56 #ifdef CONFIG_MODULES
58 * Assuming:
60 * threads = div64_u64((u64) totalram_pages * (u64) PAGE_SIZE,
61 * (u64) THREAD_SIZE * 8UL);
63 * If you need less than 50 threads would mean we're dealing with systems
64 * smaller than 3200 pages. This assuems you are capable of having ~13M memory,
65 * and this would only be an be an upper limit, after which the OOM killer
66 * would take effect. Systems like these are very unlikely if modules are
67 * enabled.
69 #define MAX_KMOD_CONCURRENT 50
70 static atomic_t kmod_concurrent_max = ATOMIC_INIT(MAX_KMOD_CONCURRENT);
71 static DECLARE_WAIT_QUEUE_HEAD(kmod_wq);
74 * This is a restriction on having *all* MAX_KMOD_CONCURRENT threads
75 * running at the same time without returning. When this happens we
76 * believe you've somehow ended up with a recursive module dependency
77 * creating a loop.
79 * We have no option but to fail.
81 * Userspace should proactively try to detect and prevent these.
83 #define MAX_KMOD_ALL_BUSY_TIMEOUT 5
86 modprobe_path is set via /proc/sys.
88 char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe";
90 static void free_modprobe_argv(struct subprocess_info *info)
92 kfree(info->argv[3]); /* check call_modprobe() */
93 kfree(info->argv);
96 static int call_modprobe(char *module_name, int wait)
98 struct subprocess_info *info;
99 static char *envp[] = {
100 "HOME=/",
101 "TERM=linux",
102 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
103 NULL
106 char **argv = kmalloc(sizeof(char *[5]), GFP_KERNEL);
107 if (!argv)
108 goto out;
110 module_name = kstrdup(module_name, GFP_KERNEL);
111 if (!module_name)
112 goto free_argv;
114 argv[0] = modprobe_path;
115 argv[1] = "-q";
116 argv[2] = "--";
117 argv[3] = module_name; /* check free_modprobe_argv() */
118 argv[4] = NULL;
120 info = call_usermodehelper_setup(modprobe_path, argv, envp, GFP_KERNEL,
121 NULL, free_modprobe_argv, NULL);
122 if (!info)
123 goto free_module_name;
125 return call_usermodehelper_exec(info, wait | UMH_KILLABLE);
127 free_module_name:
128 kfree(module_name);
129 free_argv:
130 kfree(argv);
131 out:
132 return -ENOMEM;
136 * __request_module - try to load a kernel module
137 * @wait: wait (or not) for the operation to complete
138 * @fmt: printf style format string for the name of the module
139 * @...: arguments as specified in the format string
141 * Load a module using the user mode module loader. The function returns
142 * zero on success or a negative errno code or positive exit code from
143 * "modprobe" on failure. Note that a successful module load does not mean
144 * the module did not then unload and exit on an error of its own. Callers
145 * must check that the service they requested is now available not blindly
146 * invoke it.
148 * If module auto-loading support is disabled then this function
149 * becomes a no-operation.
151 int __request_module(bool wait, const char *fmt, ...)
153 va_list args;
154 char module_name[MODULE_NAME_LEN];
155 int ret;
158 * We don't allow synchronous module loading from async. Module
159 * init may invoke async_synchronize_full() which will end up
160 * waiting for this task which already is waiting for the module
161 * loading to complete, leading to a deadlock.
163 WARN_ON_ONCE(wait && current_is_async());
165 if (!modprobe_path[0])
166 return 0;
168 va_start(args, fmt);
169 ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
170 va_end(args);
171 if (ret >= MODULE_NAME_LEN)
172 return -ENAMETOOLONG;
174 ret = security_kernel_module_request(module_name);
175 if (ret)
176 return ret;
178 if (atomic_dec_if_positive(&kmod_concurrent_max) < 0) {
179 pr_warn_ratelimited("request_module: kmod_concurrent_max (%u) close to 0 (max_modprobes: %u), for module %s, throttling...",
180 atomic_read(&kmod_concurrent_max),
181 MAX_KMOD_CONCURRENT, module_name);
182 ret = wait_event_killable_timeout(kmod_wq,
183 atomic_dec_if_positive(&kmod_concurrent_max) >= 0,
184 MAX_KMOD_ALL_BUSY_TIMEOUT * HZ);
185 if (!ret) {
186 pr_warn_ratelimited("request_module: modprobe %s cannot be processed, kmod busy with %d threads for more than %d seconds now",
187 module_name, MAX_KMOD_CONCURRENT, MAX_KMOD_ALL_BUSY_TIMEOUT);
188 return -ETIME;
189 } else if (ret == -ERESTARTSYS) {
190 pr_warn_ratelimited("request_module: sigkill sent for modprobe %s, giving up", module_name);
191 return ret;
195 trace_module_request(module_name, wait, _RET_IP_);
197 ret = call_modprobe(module_name, wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC);
199 atomic_inc(&kmod_concurrent_max);
200 wake_up(&kmod_wq);
202 return ret;
204 EXPORT_SYMBOL(__request_module);
206 #endif /* CONFIG_MODULES */
208 static void call_usermodehelper_freeinfo(struct subprocess_info *info)
210 if (info->cleanup)
211 (*info->cleanup)(info);
212 kfree(info);
215 static void umh_complete(struct subprocess_info *sub_info)
217 struct completion *comp = xchg(&sub_info->complete, NULL);
219 * See call_usermodehelper_exec(). If xchg() returns NULL
220 * we own sub_info, the UMH_KILLABLE caller has gone away
221 * or the caller used UMH_NO_WAIT.
223 if (comp)
224 complete(comp);
225 else
226 call_usermodehelper_freeinfo(sub_info);
230 * This is the task which runs the usermode application
232 static int call_usermodehelper_exec_async(void *data)
234 struct subprocess_info *sub_info = data;
235 struct cred *new;
236 int retval;
238 spin_lock_irq(&current->sighand->siglock);
239 flush_signal_handlers(current, 1);
240 spin_unlock_irq(&current->sighand->siglock);
243 * Our parent (unbound workqueue) runs with elevated scheduling
244 * priority. Avoid propagating that into the userspace child.
246 set_user_nice(current, 0);
248 retval = -ENOMEM;
249 new = prepare_kernel_cred(current);
250 if (!new)
251 goto out;
253 spin_lock(&umh_sysctl_lock);
254 new->cap_bset = cap_intersect(usermodehelper_bset, new->cap_bset);
255 new->cap_inheritable = cap_intersect(usermodehelper_inheritable,
256 new->cap_inheritable);
257 spin_unlock(&umh_sysctl_lock);
259 if (sub_info->init) {
260 retval = sub_info->init(sub_info, new);
261 if (retval) {
262 abort_creds(new);
263 goto out;
267 commit_creds(new);
269 retval = do_execve(getname_kernel(sub_info->path),
270 (const char __user *const __user *)sub_info->argv,
271 (const char __user *const __user *)sub_info->envp);
272 out:
273 sub_info->retval = retval;
275 * call_usermodehelper_exec_sync() will call umh_complete
276 * if UHM_WAIT_PROC.
278 if (!(sub_info->wait & UMH_WAIT_PROC))
279 umh_complete(sub_info);
280 if (!retval)
281 return 0;
282 do_exit(0);
285 /* Handles UMH_WAIT_PROC. */
286 static void call_usermodehelper_exec_sync(struct subprocess_info *sub_info)
288 pid_t pid;
290 /* If SIGCLD is ignored sys_wait4 won't populate the status. */
291 kernel_sigaction(SIGCHLD, SIG_DFL);
292 pid = kernel_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD);
293 if (pid < 0) {
294 sub_info->retval = pid;
295 } else {
296 int ret = -ECHILD;
298 * Normally it is bogus to call wait4() from in-kernel because
299 * wait4() wants to write the exit code to a userspace address.
300 * But call_usermodehelper_exec_sync() always runs as kernel
301 * thread (workqueue) and put_user() to a kernel address works
302 * OK for kernel threads, due to their having an mm_segment_t
303 * which spans the entire address space.
305 * Thus the __user pointer cast is valid here.
307 sys_wait4(pid, (int __user *)&ret, 0, NULL);
310 * If ret is 0, either call_usermodehelper_exec_async failed and
311 * the real error code is already in sub_info->retval or
312 * sub_info->retval is 0 anyway, so don't mess with it then.
314 if (ret)
315 sub_info->retval = ret;
318 /* Restore default kernel sig handler */
319 kernel_sigaction(SIGCHLD, SIG_IGN);
321 umh_complete(sub_info);
325 * We need to create the usermodehelper kernel thread from a task that is affine
326 * to an optimized set of CPUs (or nohz housekeeping ones) such that they
327 * inherit a widest affinity irrespective of call_usermodehelper() callers with
328 * possibly reduced affinity (eg: per-cpu workqueues). We don't want
329 * usermodehelper targets to contend a busy CPU.
331 * Unbound workqueues provide such wide affinity and allow to block on
332 * UMH_WAIT_PROC requests without blocking pending request (up to some limit).
334 * Besides, workqueues provide the privilege level that caller might not have
335 * to perform the usermodehelper request.
338 static void call_usermodehelper_exec_work(struct work_struct *work)
340 struct subprocess_info *sub_info =
341 container_of(work, struct subprocess_info, work);
343 if (sub_info->wait & UMH_WAIT_PROC) {
344 call_usermodehelper_exec_sync(sub_info);
345 } else {
346 pid_t pid;
348 * Use CLONE_PARENT to reparent it to kthreadd; we do not
349 * want to pollute current->children, and we need a parent
350 * that always ignores SIGCHLD to ensure auto-reaping.
352 pid = kernel_thread(call_usermodehelper_exec_async, sub_info,
353 CLONE_PARENT | SIGCHLD);
354 if (pid < 0) {
355 sub_info->retval = pid;
356 umh_complete(sub_info);
362 * If set, call_usermodehelper_exec() will exit immediately returning -EBUSY
363 * (used for preventing user land processes from being created after the user
364 * land has been frozen during a system-wide hibernation or suspend operation).
365 * Should always be manipulated under umhelper_sem acquired for write.
367 static enum umh_disable_depth usermodehelper_disabled = UMH_DISABLED;
369 /* Number of helpers running */
370 static atomic_t running_helpers = ATOMIC_INIT(0);
373 * Wait queue head used by usermodehelper_disable() to wait for all running
374 * helpers to finish.
376 static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq);
379 * Used by usermodehelper_read_lock_wait() to wait for usermodehelper_disabled
380 * to become 'false'.
382 static DECLARE_WAIT_QUEUE_HEAD(usermodehelper_disabled_waitq);
385 * Time to wait for running_helpers to become zero before the setting of
386 * usermodehelper_disabled in usermodehelper_disable() fails
388 #define RUNNING_HELPERS_TIMEOUT (5 * HZ)
390 int usermodehelper_read_trylock(void)
392 DEFINE_WAIT(wait);
393 int ret = 0;
395 down_read(&umhelper_sem);
396 for (;;) {
397 prepare_to_wait(&usermodehelper_disabled_waitq, &wait,
398 TASK_INTERRUPTIBLE);
399 if (!usermodehelper_disabled)
400 break;
402 if (usermodehelper_disabled == UMH_DISABLED)
403 ret = -EAGAIN;
405 up_read(&umhelper_sem);
407 if (ret)
408 break;
410 schedule();
411 try_to_freeze();
413 down_read(&umhelper_sem);
415 finish_wait(&usermodehelper_disabled_waitq, &wait);
416 return ret;
418 EXPORT_SYMBOL_GPL(usermodehelper_read_trylock);
420 long usermodehelper_read_lock_wait(long timeout)
422 DEFINE_WAIT(wait);
424 if (timeout < 0)
425 return -EINVAL;
427 down_read(&umhelper_sem);
428 for (;;) {
429 prepare_to_wait(&usermodehelper_disabled_waitq, &wait,
430 TASK_UNINTERRUPTIBLE);
431 if (!usermodehelper_disabled)
432 break;
434 up_read(&umhelper_sem);
436 timeout = schedule_timeout(timeout);
437 if (!timeout)
438 break;
440 down_read(&umhelper_sem);
442 finish_wait(&usermodehelper_disabled_waitq, &wait);
443 return timeout;
445 EXPORT_SYMBOL_GPL(usermodehelper_read_lock_wait);
447 void usermodehelper_read_unlock(void)
449 up_read(&umhelper_sem);
451 EXPORT_SYMBOL_GPL(usermodehelper_read_unlock);
454 * __usermodehelper_set_disable_depth - Modify usermodehelper_disabled.
455 * @depth: New value to assign to usermodehelper_disabled.
457 * Change the value of usermodehelper_disabled (under umhelper_sem locked for
458 * writing) and wakeup tasks waiting for it to change.
460 void __usermodehelper_set_disable_depth(enum umh_disable_depth depth)
462 down_write(&umhelper_sem);
463 usermodehelper_disabled = depth;
464 wake_up(&usermodehelper_disabled_waitq);
465 up_write(&umhelper_sem);
469 * __usermodehelper_disable - Prevent new helpers from being started.
470 * @depth: New value to assign to usermodehelper_disabled.
472 * Set usermodehelper_disabled to @depth and wait for running helpers to exit.
474 int __usermodehelper_disable(enum umh_disable_depth depth)
476 long retval;
478 if (!depth)
479 return -EINVAL;
481 down_write(&umhelper_sem);
482 usermodehelper_disabled = depth;
483 up_write(&umhelper_sem);
486 * From now on call_usermodehelper_exec() won't start any new
487 * helpers, so it is sufficient if running_helpers turns out to
488 * be zero at one point (it may be increased later, but that
489 * doesn't matter).
491 retval = wait_event_timeout(running_helpers_waitq,
492 atomic_read(&running_helpers) == 0,
493 RUNNING_HELPERS_TIMEOUT);
494 if (retval)
495 return 0;
497 __usermodehelper_set_disable_depth(UMH_ENABLED);
498 return -EAGAIN;
501 static void helper_lock(void)
503 atomic_inc(&running_helpers);
504 smp_mb__after_atomic();
507 static void helper_unlock(void)
509 if (atomic_dec_and_test(&running_helpers))
510 wake_up(&running_helpers_waitq);
514 * call_usermodehelper_setup - prepare to call a usermode helper
515 * @path: path to usermode executable
516 * @argv: arg vector for process
517 * @envp: environment for process
518 * @gfp_mask: gfp mask for memory allocation
519 * @cleanup: a cleanup function
520 * @init: an init function
521 * @data: arbitrary context sensitive data
523 * Returns either %NULL on allocation failure, or a subprocess_info
524 * structure. This should be passed to call_usermodehelper_exec to
525 * exec the process and free the structure.
527 * The init function is used to customize the helper process prior to
528 * exec. A non-zero return code causes the process to error out, exit,
529 * and return the failure to the calling process
531 * The cleanup function is just before ethe subprocess_info is about to
532 * be freed. This can be used for freeing the argv and envp. The
533 * Function must be runnable in either a process context or the
534 * context in which call_usermodehelper_exec is called.
536 struct subprocess_info *call_usermodehelper_setup(const char *path, char **argv,
537 char **envp, gfp_t gfp_mask,
538 int (*init)(struct subprocess_info *info, struct cred *new),
539 void (*cleanup)(struct subprocess_info *info),
540 void *data)
542 struct subprocess_info *sub_info;
543 sub_info = kzalloc(sizeof(struct subprocess_info), gfp_mask);
544 if (!sub_info)
545 goto out;
547 INIT_WORK(&sub_info->work, call_usermodehelper_exec_work);
549 #ifdef CONFIG_STATIC_USERMODEHELPER
550 sub_info->path = CONFIG_STATIC_USERMODEHELPER_PATH;
551 #else
552 sub_info->path = path;
553 #endif
554 sub_info->argv = argv;
555 sub_info->envp = envp;
557 sub_info->cleanup = cleanup;
558 sub_info->init = init;
559 sub_info->data = data;
560 out:
561 return sub_info;
563 EXPORT_SYMBOL(call_usermodehelper_setup);
566 * call_usermodehelper_exec - start a usermode application
567 * @sub_info: information about the subprocessa
568 * @wait: wait for the application to finish and return status.
569 * when UMH_NO_WAIT don't wait at all, but you get no useful error back
570 * when the program couldn't be exec'ed. This makes it safe to call
571 * from interrupt context.
573 * Runs a user-space application. The application is started
574 * asynchronously if wait is not set, and runs as a child of system workqueues.
575 * (ie. it runs with full root capabilities and optimized affinity).
577 int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
579 DECLARE_COMPLETION_ONSTACK(done);
580 int retval = 0;
582 if (!sub_info->path) {
583 call_usermodehelper_freeinfo(sub_info);
584 return -EINVAL;
586 helper_lock();
587 if (usermodehelper_disabled) {
588 retval = -EBUSY;
589 goto out;
593 * If there is no binary for us to call, then just return and get out of
594 * here. This allows us to set STATIC_USERMODEHELPER_PATH to "" and
595 * disable all call_usermodehelper() calls.
597 if (strlen(sub_info->path) == 0)
598 goto out;
601 * Set the completion pointer only if there is a waiter.
602 * This makes it possible to use umh_complete to free
603 * the data structure in case of UMH_NO_WAIT.
605 sub_info->complete = (wait == UMH_NO_WAIT) ? NULL : &done;
606 sub_info->wait = wait;
608 queue_work(system_unbound_wq, &sub_info->work);
609 if (wait == UMH_NO_WAIT) /* task has freed sub_info */
610 goto unlock;
612 if (wait & UMH_KILLABLE) {
613 retval = wait_for_completion_killable(&done);
614 if (!retval)
615 goto wait_done;
617 /* umh_complete() will see NULL and free sub_info */
618 if (xchg(&sub_info->complete, NULL))
619 goto unlock;
620 /* fallthrough, umh_complete() was already called */
623 wait_for_completion(&done);
624 wait_done:
625 retval = sub_info->retval;
626 out:
627 call_usermodehelper_freeinfo(sub_info);
628 unlock:
629 helper_unlock();
630 return retval;
632 EXPORT_SYMBOL(call_usermodehelper_exec);
635 * call_usermodehelper() - prepare and start a usermode application
636 * @path: path to usermode executable
637 * @argv: arg vector for process
638 * @envp: environment for process
639 * @wait: wait for the application to finish and return status.
640 * when UMH_NO_WAIT don't wait at all, but you get no useful error back
641 * when the program couldn't be exec'ed. This makes it safe to call
642 * from interrupt context.
644 * This function is the equivalent to use call_usermodehelper_setup() and
645 * call_usermodehelper_exec().
647 int call_usermodehelper(const char *path, char **argv, char **envp, int wait)
649 struct subprocess_info *info;
650 gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
652 info = call_usermodehelper_setup(path, argv, envp, gfp_mask,
653 NULL, NULL, NULL);
654 if (info == NULL)
655 return -ENOMEM;
657 return call_usermodehelper_exec(info, wait);
659 EXPORT_SYMBOL(call_usermodehelper);
661 static int proc_cap_handler(struct ctl_table *table, int write,
662 void __user *buffer, size_t *lenp, loff_t *ppos)
664 struct ctl_table t;
665 unsigned long cap_array[_KERNEL_CAPABILITY_U32S];
666 kernel_cap_t new_cap;
667 int err, i;
669 if (write && (!capable(CAP_SETPCAP) ||
670 !capable(CAP_SYS_MODULE)))
671 return -EPERM;
674 * convert from the global kernel_cap_t to the ulong array to print to
675 * userspace if this is a read.
677 spin_lock(&umh_sysctl_lock);
678 for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++) {
679 if (table->data == CAP_BSET)
680 cap_array[i] = usermodehelper_bset.cap[i];
681 else if (table->data == CAP_PI)
682 cap_array[i] = usermodehelper_inheritable.cap[i];
683 else
684 BUG();
686 spin_unlock(&umh_sysctl_lock);
688 t = *table;
689 t.data = &cap_array;
692 * actually read or write and array of ulongs from userspace. Remember
693 * these are least significant 32 bits first
695 err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
696 if (err < 0)
697 return err;
700 * convert from the sysctl array of ulongs to the kernel_cap_t
701 * internal representation
703 for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++)
704 new_cap.cap[i] = cap_array[i];
707 * Drop everything not in the new_cap (but don't add things)
709 spin_lock(&umh_sysctl_lock);
710 if (write) {
711 if (table->data == CAP_BSET)
712 usermodehelper_bset = cap_intersect(usermodehelper_bset, new_cap);
713 if (table->data == CAP_PI)
714 usermodehelper_inheritable = cap_intersect(usermodehelper_inheritable, new_cap);
716 spin_unlock(&umh_sysctl_lock);
718 return 0;
721 struct ctl_table usermodehelper_table[] = {
723 .procname = "bset",
724 .data = CAP_BSET,
725 .maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long),
726 .mode = 0600,
727 .proc_handler = proc_cap_handler,
730 .procname = "inheritable",
731 .data = CAP_PI,
732 .maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long),
733 .mode = 0600,
734 .proc_handler = proc_cap_handler,