2 * sysctl.c: General linux system control interface
4 * Begun 24 March 1995, Stephen Tweedie
5 * Added /proc support, Dec 1995
6 * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7 * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8 * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9 * Dynamic registration fixes, Stephen Tweedie.
10 * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11 * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
13 * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14 * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15 * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
17 * The list_for_each() macro wasn't appropriate for the sysctl loop.
18 * Removed it and replaced it with older style, 03/23/00, Bill Wendling
21 #include <linux/module.h>
23 #include <linux/swap.h>
24 #include <linux/slab.h>
25 #include <linux/sysctl.h>
26 #include <linux/proc_fs.h>
27 #include <linux/security.h>
28 #include <linux/ctype.h>
29 #include <linux/kmemcheck.h>
31 #include <linux/init.h>
32 #include <linux/kernel.h>
33 #include <linux/kobject.h>
34 #include <linux/net.h>
35 #include <linux/sysrq.h>
36 #include <linux/highuid.h>
37 #include <linux/writeback.h>
38 #include <linux/ratelimit.h>
39 #include <linux/hugetlb.h>
40 #include <linux/initrd.h>
41 #include <linux/key.h>
42 #include <linux/times.h>
43 #include <linux/limits.h>
44 #include <linux/dcache.h>
45 #include <linux/syscalls.h>
46 #include <linux/vmstat.h>
47 #include <linux/nfs_fs.h>
48 #include <linux/acpi.h>
49 #include <linux/reboot.h>
50 #include <linux/ftrace.h>
51 #include <linux/slow-work.h>
52 #include <linux/perf_event.h>
53 #include <linux/kprobes.h>
55 #include <asm/uaccess.h>
56 #include <asm/processor.h>
60 #include <asm/stacktrace.h>
65 #if defined(CONFIG_SYSCTL)
67 /* External variables not in a header file. */
69 extern int print_fatal_signals
;
70 extern int sysctl_overcommit_memory
;
71 extern int sysctl_overcommit_ratio
;
72 extern int sysctl_panic_on_oom
;
73 extern int sysctl_oom_kill_allocating_task
;
74 extern int sysctl_oom_dump_tasks
;
75 extern int max_threads
;
76 extern int core_uses_pid
;
77 extern int suid_dumpable
;
78 extern char core_pattern
[];
79 extern unsigned int core_pipe_limit
;
81 extern int min_free_kbytes
;
82 extern int pid_max_min
, pid_max_max
;
83 extern int sysctl_drop_caches
;
84 extern int percpu_pagelist_fraction
;
85 extern int compat_log
;
86 extern int latencytop_enabled
;
87 extern int sysctl_nr_open_min
, sysctl_nr_open_max
;
89 extern int sysctl_nr_trim_pages
;
91 #ifdef CONFIG_RCU_TORTURE_TEST
92 extern int rcutorture_runnable
;
93 #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
95 extern int blk_iopoll_enabled
;
98 /* Constants used for minimum and maximum */
99 #ifdef CONFIG_DETECT_SOFTLOCKUP
100 static int sixty
= 60;
101 static int neg_one
= -1;
105 static int __maybe_unused one
= 1;
106 static int __maybe_unused two
= 2;
107 static unsigned long one_ul
= 1;
108 static int one_hundred
= 100;
110 static int ten_thousand
= 10000;
113 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
114 static unsigned long dirty_bytes_min
= 2 * PAGE_SIZE
;
116 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
117 static int maxolduid
= 65535;
118 static int minolduid
;
119 static int min_percpu_pagelist_fract
= 8;
121 static int ngroups_max
= NGROUPS_MAX
;
123 #ifdef CONFIG_MODULES
124 extern char modprobe_path
[];
125 extern int modules_disabled
;
127 #ifdef CONFIG_CHR_DEV_SG
128 extern int sg_big_buff
;
132 #include <asm/system.h>
135 #ifdef CONFIG_SPARC64
136 extern int sysctl_tsb_ratio
;
140 extern int pwrsw_enabled
;
141 extern int unaligned_enabled
;
145 #ifdef CONFIG_MATHEMU
146 extern int sysctl_ieee_emulation_warnings
;
148 extern int sysctl_userprocess_debug
;
149 extern int spin_retry
;
152 #ifdef CONFIG_BSD_PROCESS_ACCT
153 extern int acct_parm
[];
157 extern int no_unaligned_warning
;
158 extern int unaligned_dump_stack
;
161 extern struct ratelimit_state printk_ratelimit_state
;
163 #ifdef CONFIG_RT_MUTEXES
164 extern int max_lock_depth
;
167 #ifdef CONFIG_PROC_SYSCTL
168 static int proc_do_cad_pid(struct ctl_table
*table
, int write
,
169 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
);
170 static int proc_taint(struct ctl_table
*table
, int write
,
171 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
);
174 static struct ctl_table root_table
[];
175 static struct ctl_table_root sysctl_table_root
;
176 static struct ctl_table_header root_table_header
= {
178 .ctl_table
= root_table
,
179 .ctl_entry
= LIST_HEAD_INIT(sysctl_table_root
.default_set
.list
),
180 .root
= &sysctl_table_root
,
181 .set
= &sysctl_table_root
.default_set
,
183 static struct ctl_table_root sysctl_table_root
= {
184 .root_list
= LIST_HEAD_INIT(sysctl_table_root
.root_list
),
185 .default_set
.list
= LIST_HEAD_INIT(root_table_header
.ctl_entry
),
188 static struct ctl_table kern_table
[];
189 static struct ctl_table vm_table
[];
190 static struct ctl_table fs_table
[];
191 static struct ctl_table debug_table
[];
192 static struct ctl_table dev_table
[];
193 extern struct ctl_table random_table
[];
194 #ifdef CONFIG_INOTIFY_USER
195 extern struct ctl_table inotify_table
[];
198 extern struct ctl_table epoll_table
[];
201 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
202 int sysctl_legacy_va_layout
;
205 extern int prove_locking
;
206 extern int lock_stat
;
208 /* The default sysctl tables: */
210 static struct ctl_table root_table
[] = {
212 .procname
= "kernel",
229 .child
= debug_table
,
237 * NOTE: do not add new entries to this table unless you have read
238 * Documentation/sysctl/ctl_unnumbered.txt
243 #ifdef CONFIG_SCHED_DEBUG
244 static int min_sched_granularity_ns
= 100000; /* 100 usecs */
245 static int max_sched_granularity_ns
= NSEC_PER_SEC
; /* 1 second */
246 static int min_wakeup_granularity_ns
; /* 0 usecs */
247 static int max_wakeup_granularity_ns
= NSEC_PER_SEC
; /* 1 second */
248 static int min_sched_tunable_scaling
= SCHED_TUNABLESCALING_NONE
;
249 static int max_sched_tunable_scaling
= SCHED_TUNABLESCALING_END
-1;
250 static int min_sched_shares_ratelimit
= 100000; /* 100 usec */
251 static int max_sched_shares_ratelimit
= NSEC_PER_SEC
; /* 1 second */
254 static struct ctl_table kern_table
[] = {
256 .procname
= "sched_child_runs_first",
257 .data
= &sysctl_sched_child_runs_first
,
258 .maxlen
= sizeof(unsigned int),
260 .proc_handler
= proc_dointvec
,
262 #ifdef CONFIG_SCHED_DEBUG
264 .procname
= "sched_min_granularity_ns",
265 .data
= &sysctl_sched_min_granularity
,
266 .maxlen
= sizeof(unsigned int),
268 .proc_handler
= sched_proc_update_handler
,
269 .extra1
= &min_sched_granularity_ns
,
270 .extra2
= &max_sched_granularity_ns
,
273 .procname
= "sched_latency_ns",
274 .data
= &sysctl_sched_latency
,
275 .maxlen
= sizeof(unsigned int),
277 .proc_handler
= sched_proc_update_handler
,
278 .extra1
= &min_sched_granularity_ns
,
279 .extra2
= &max_sched_granularity_ns
,
282 .procname
= "sched_wakeup_granularity_ns",
283 .data
= &sysctl_sched_wakeup_granularity
,
284 .maxlen
= sizeof(unsigned int),
286 .proc_handler
= sched_proc_update_handler
,
287 .extra1
= &min_wakeup_granularity_ns
,
288 .extra2
= &max_wakeup_granularity_ns
,
291 .procname
= "sched_shares_ratelimit",
292 .data
= &sysctl_sched_shares_ratelimit
,
293 .maxlen
= sizeof(unsigned int),
295 .proc_handler
= sched_proc_update_handler
,
296 .extra1
= &min_sched_shares_ratelimit
,
297 .extra2
= &max_sched_shares_ratelimit
,
300 .procname
= "sched_tunable_scaling",
301 .data
= &sysctl_sched_tunable_scaling
,
302 .maxlen
= sizeof(enum sched_tunable_scaling
),
304 .proc_handler
= sched_proc_update_handler
,
305 .extra1
= &min_sched_tunable_scaling
,
306 .extra2
= &max_sched_tunable_scaling
,
309 .procname
= "sched_shares_thresh",
310 .data
= &sysctl_sched_shares_thresh
,
311 .maxlen
= sizeof(unsigned int),
313 .proc_handler
= proc_dointvec_minmax
,
317 .procname
= "sched_migration_cost",
318 .data
= &sysctl_sched_migration_cost
,
319 .maxlen
= sizeof(unsigned int),
321 .proc_handler
= proc_dointvec
,
324 .procname
= "sched_nr_migrate",
325 .data
= &sysctl_sched_nr_migrate
,
326 .maxlen
= sizeof(unsigned int),
328 .proc_handler
= proc_dointvec
,
331 .procname
= "sched_time_avg",
332 .data
= &sysctl_sched_time_avg
,
333 .maxlen
= sizeof(unsigned int),
335 .proc_handler
= proc_dointvec
,
338 .procname
= "timer_migration",
339 .data
= &sysctl_timer_migration
,
340 .maxlen
= sizeof(unsigned int),
342 .proc_handler
= proc_dointvec_minmax
,
348 .procname
= "sched_rt_period_us",
349 .data
= &sysctl_sched_rt_period
,
350 .maxlen
= sizeof(unsigned int),
352 .proc_handler
= sched_rt_handler
,
355 .procname
= "sched_rt_runtime_us",
356 .data
= &sysctl_sched_rt_runtime
,
357 .maxlen
= sizeof(int),
359 .proc_handler
= sched_rt_handler
,
362 .procname
= "sched_compat_yield",
363 .data
= &sysctl_sched_compat_yield
,
364 .maxlen
= sizeof(unsigned int),
366 .proc_handler
= proc_dointvec
,
368 #ifdef CONFIG_PROVE_LOCKING
370 .procname
= "prove_locking",
371 .data
= &prove_locking
,
372 .maxlen
= sizeof(int),
374 .proc_handler
= proc_dointvec
,
377 #ifdef CONFIG_LOCK_STAT
379 .procname
= "lock_stat",
381 .maxlen
= sizeof(int),
383 .proc_handler
= proc_dointvec
,
388 .data
= &panic_timeout
,
389 .maxlen
= sizeof(int),
391 .proc_handler
= proc_dointvec
,
394 .procname
= "core_uses_pid",
395 .data
= &core_uses_pid
,
396 .maxlen
= sizeof(int),
398 .proc_handler
= proc_dointvec
,
401 .procname
= "core_pattern",
402 .data
= core_pattern
,
403 .maxlen
= CORENAME_MAX_SIZE
,
405 .proc_handler
= proc_dostring
,
408 .procname
= "core_pipe_limit",
409 .data
= &core_pipe_limit
,
410 .maxlen
= sizeof(unsigned int),
412 .proc_handler
= proc_dointvec
,
414 #ifdef CONFIG_PROC_SYSCTL
416 .procname
= "tainted",
417 .maxlen
= sizeof(long),
419 .proc_handler
= proc_taint
,
422 #ifdef CONFIG_LATENCYTOP
424 .procname
= "latencytop",
425 .data
= &latencytop_enabled
,
426 .maxlen
= sizeof(int),
428 .proc_handler
= proc_dointvec
,
431 #ifdef CONFIG_BLK_DEV_INITRD
433 .procname
= "real-root-dev",
434 .data
= &real_root_dev
,
435 .maxlen
= sizeof(int),
437 .proc_handler
= proc_dointvec
,
441 .procname
= "print-fatal-signals",
442 .data
= &print_fatal_signals
,
443 .maxlen
= sizeof(int),
445 .proc_handler
= proc_dointvec
,
449 .procname
= "reboot-cmd",
450 .data
= reboot_command
,
453 .proc_handler
= proc_dostring
,
456 .procname
= "stop-a",
457 .data
= &stop_a_enabled
,
458 .maxlen
= sizeof (int),
460 .proc_handler
= proc_dointvec
,
463 .procname
= "scons-poweroff",
464 .data
= &scons_pwroff
,
465 .maxlen
= sizeof (int),
467 .proc_handler
= proc_dointvec
,
470 #ifdef CONFIG_SPARC64
472 .procname
= "tsb-ratio",
473 .data
= &sysctl_tsb_ratio
,
474 .maxlen
= sizeof (int),
476 .proc_handler
= proc_dointvec
,
481 .procname
= "soft-power",
482 .data
= &pwrsw_enabled
,
483 .maxlen
= sizeof (int),
485 .proc_handler
= proc_dointvec
,
488 .procname
= "unaligned-trap",
489 .data
= &unaligned_enabled
,
490 .maxlen
= sizeof (int),
492 .proc_handler
= proc_dointvec
,
496 .procname
= "ctrl-alt-del",
498 .maxlen
= sizeof(int),
500 .proc_handler
= proc_dointvec
,
502 #ifdef CONFIG_FUNCTION_TRACER
504 .procname
= "ftrace_enabled",
505 .data
= &ftrace_enabled
,
506 .maxlen
= sizeof(int),
508 .proc_handler
= ftrace_enable_sysctl
,
511 #ifdef CONFIG_STACK_TRACER
513 .procname
= "stack_tracer_enabled",
514 .data
= &stack_tracer_enabled
,
515 .maxlen
= sizeof(int),
517 .proc_handler
= stack_trace_sysctl
,
520 #ifdef CONFIG_TRACING
522 .procname
= "ftrace_dump_on_oops",
523 .data
= &ftrace_dump_on_oops
,
524 .maxlen
= sizeof(int),
526 .proc_handler
= proc_dointvec
,
529 #ifdef CONFIG_MODULES
531 .procname
= "modprobe",
532 .data
= &modprobe_path
,
533 .maxlen
= KMOD_PATH_LEN
,
535 .proc_handler
= proc_dostring
,
538 .procname
= "modules_disabled",
539 .data
= &modules_disabled
,
540 .maxlen
= sizeof(int),
542 /* only handle a transition from default "0" to "1" */
543 .proc_handler
= proc_dointvec_minmax
,
548 #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
550 .procname
= "hotplug",
551 .data
= &uevent_helper
,
552 .maxlen
= UEVENT_HELPER_PATH_LEN
,
554 .proc_handler
= proc_dostring
,
557 #ifdef CONFIG_CHR_DEV_SG
559 .procname
= "sg-big-buff",
560 .data
= &sg_big_buff
,
561 .maxlen
= sizeof (int),
563 .proc_handler
= proc_dointvec
,
566 #ifdef CONFIG_BSD_PROCESS_ACCT
570 .maxlen
= 3*sizeof(int),
572 .proc_handler
= proc_dointvec
,
575 #ifdef CONFIG_MAGIC_SYSRQ
578 .data
= &__sysrq_enabled
,
579 .maxlen
= sizeof (int),
581 .proc_handler
= proc_dointvec
,
584 #ifdef CONFIG_PROC_SYSCTL
586 .procname
= "cad_pid",
588 .maxlen
= sizeof (int),
590 .proc_handler
= proc_do_cad_pid
,
594 .procname
= "threads-max",
595 .data
= &max_threads
,
596 .maxlen
= sizeof(int),
598 .proc_handler
= proc_dointvec
,
601 .procname
= "random",
603 .child
= random_table
,
606 .procname
= "overflowuid",
607 .data
= &overflowuid
,
608 .maxlen
= sizeof(int),
610 .proc_handler
= proc_dointvec_minmax
,
611 .extra1
= &minolduid
,
612 .extra2
= &maxolduid
,
615 .procname
= "overflowgid",
616 .data
= &overflowgid
,
617 .maxlen
= sizeof(int),
619 .proc_handler
= proc_dointvec_minmax
,
620 .extra1
= &minolduid
,
621 .extra2
= &maxolduid
,
624 #ifdef CONFIG_MATHEMU
626 .procname
= "ieee_emulation_warnings",
627 .data
= &sysctl_ieee_emulation_warnings
,
628 .maxlen
= sizeof(int),
630 .proc_handler
= proc_dointvec
,
634 .procname
= "userprocess_debug",
635 .data
= &sysctl_userprocess_debug
,
636 .maxlen
= sizeof(int),
638 .proc_handler
= proc_dointvec
,
642 .procname
= "pid_max",
644 .maxlen
= sizeof (int),
646 .proc_handler
= proc_dointvec_minmax
,
647 .extra1
= &pid_max_min
,
648 .extra2
= &pid_max_max
,
651 .procname
= "panic_on_oops",
652 .data
= &panic_on_oops
,
653 .maxlen
= sizeof(int),
655 .proc_handler
= proc_dointvec
,
657 #if defined CONFIG_PRINTK
659 .procname
= "printk",
660 .data
= &console_loglevel
,
661 .maxlen
= 4*sizeof(int),
663 .proc_handler
= proc_dointvec
,
666 .procname
= "printk_ratelimit",
667 .data
= &printk_ratelimit_state
.interval
,
668 .maxlen
= sizeof(int),
670 .proc_handler
= proc_dointvec_jiffies
,
673 .procname
= "printk_ratelimit_burst",
674 .data
= &printk_ratelimit_state
.burst
,
675 .maxlen
= sizeof(int),
677 .proc_handler
= proc_dointvec
,
680 .procname
= "printk_delay",
681 .data
= &printk_delay_msec
,
682 .maxlen
= sizeof(int),
684 .proc_handler
= proc_dointvec_minmax
,
686 .extra2
= &ten_thousand
,
690 .procname
= "ngroups_max",
691 .data
= &ngroups_max
,
692 .maxlen
= sizeof (int),
694 .proc_handler
= proc_dointvec
,
696 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
698 .procname
= "unknown_nmi_panic",
699 .data
= &unknown_nmi_panic
,
700 .maxlen
= sizeof (int),
702 .proc_handler
= proc_dointvec
,
705 .procname
= "nmi_watchdog",
706 .data
= &nmi_watchdog_enabled
,
707 .maxlen
= sizeof (int),
709 .proc_handler
= proc_nmi_enabled
,
712 #if defined(CONFIG_X86)
714 .procname
= "panic_on_unrecovered_nmi",
715 .data
= &panic_on_unrecovered_nmi
,
716 .maxlen
= sizeof(int),
718 .proc_handler
= proc_dointvec
,
721 .procname
= "panic_on_io_nmi",
722 .data
= &panic_on_io_nmi
,
723 .maxlen
= sizeof(int),
725 .proc_handler
= proc_dointvec
,
728 .procname
= "bootloader_type",
729 .data
= &bootloader_type
,
730 .maxlen
= sizeof (int),
732 .proc_handler
= proc_dointvec
,
735 .procname
= "bootloader_version",
736 .data
= &bootloader_version
,
737 .maxlen
= sizeof (int),
739 .proc_handler
= proc_dointvec
,
742 .procname
= "kstack_depth_to_print",
743 .data
= &kstack_depth_to_print
,
744 .maxlen
= sizeof(int),
746 .proc_handler
= proc_dointvec
,
749 .procname
= "io_delay_type",
750 .data
= &io_delay_type
,
751 .maxlen
= sizeof(int),
753 .proc_handler
= proc_dointvec
,
756 #if defined(CONFIG_MMU)
758 .procname
= "randomize_va_space",
759 .data
= &randomize_va_space
,
760 .maxlen
= sizeof(int),
762 .proc_handler
= proc_dointvec
,
765 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
767 .procname
= "spin_retry",
769 .maxlen
= sizeof (int),
771 .proc_handler
= proc_dointvec
,
774 #if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
776 .procname
= "acpi_video_flags",
777 .data
= &acpi_realmode_flags
,
778 .maxlen
= sizeof (unsigned long),
780 .proc_handler
= proc_doulongvec_minmax
,
785 .procname
= "ignore-unaligned-usertrap",
786 .data
= &no_unaligned_warning
,
787 .maxlen
= sizeof (int),
789 .proc_handler
= proc_dointvec
,
792 .procname
= "unaligned-dump-stack",
793 .data
= &unaligned_dump_stack
,
794 .maxlen
= sizeof (int),
796 .proc_handler
= proc_dointvec
,
799 #ifdef CONFIG_DETECT_SOFTLOCKUP
801 .procname
= "softlockup_panic",
802 .data
= &softlockup_panic
,
803 .maxlen
= sizeof(int),
805 .proc_handler
= proc_dointvec_minmax
,
810 .procname
= "softlockup_thresh",
811 .data
= &softlockup_thresh
,
812 .maxlen
= sizeof(int),
814 .proc_handler
= proc_dosoftlockup_thresh
,
819 #ifdef CONFIG_DETECT_HUNG_TASK
821 .procname
= "hung_task_panic",
822 .data
= &sysctl_hung_task_panic
,
823 .maxlen
= sizeof(int),
825 .proc_handler
= proc_dointvec_minmax
,
830 .procname
= "hung_task_check_count",
831 .data
= &sysctl_hung_task_check_count
,
832 .maxlen
= sizeof(unsigned long),
834 .proc_handler
= proc_doulongvec_minmax
,
837 .procname
= "hung_task_timeout_secs",
838 .data
= &sysctl_hung_task_timeout_secs
,
839 .maxlen
= sizeof(unsigned long),
841 .proc_handler
= proc_dohung_task_timeout_secs
,
844 .procname
= "hung_task_warnings",
845 .data
= &sysctl_hung_task_warnings
,
846 .maxlen
= sizeof(unsigned long),
848 .proc_handler
= proc_doulongvec_minmax
,
853 .procname
= "compat-log",
855 .maxlen
= sizeof (int),
857 .proc_handler
= proc_dointvec
,
860 #ifdef CONFIG_RT_MUTEXES
862 .procname
= "max_lock_depth",
863 .data
= &max_lock_depth
,
864 .maxlen
= sizeof(int),
866 .proc_handler
= proc_dointvec
,
870 .procname
= "poweroff_cmd",
871 .data
= &poweroff_cmd
,
872 .maxlen
= POWEROFF_CMD_PATH_LEN
,
874 .proc_handler
= proc_dostring
,
880 .child
= key_sysctls
,
883 #ifdef CONFIG_RCU_TORTURE_TEST
885 .procname
= "rcutorture_runnable",
886 .data
= &rcutorture_runnable
,
887 .maxlen
= sizeof(int),
889 .proc_handler
= proc_dointvec
,
892 #ifdef CONFIG_SLOW_WORK
894 .procname
= "slow-work",
896 .child
= slow_work_sysctls
,
899 #ifdef CONFIG_PERF_EVENTS
901 .procname
= "perf_event_paranoid",
902 .data
= &sysctl_perf_event_paranoid
,
903 .maxlen
= sizeof(sysctl_perf_event_paranoid
),
905 .proc_handler
= proc_dointvec
,
908 .procname
= "perf_event_mlock_kb",
909 .data
= &sysctl_perf_event_mlock
,
910 .maxlen
= sizeof(sysctl_perf_event_mlock
),
912 .proc_handler
= proc_dointvec
,
915 .procname
= "perf_event_max_sample_rate",
916 .data
= &sysctl_perf_event_sample_rate
,
917 .maxlen
= sizeof(sysctl_perf_event_sample_rate
),
919 .proc_handler
= proc_dointvec
,
922 #ifdef CONFIG_KMEMCHECK
924 .procname
= "kmemcheck",
925 .data
= &kmemcheck_enabled
,
926 .maxlen
= sizeof(int),
928 .proc_handler
= proc_dointvec
,
933 .procname
= "blk_iopoll",
934 .data
= &blk_iopoll_enabled
,
935 .maxlen
= sizeof(int),
937 .proc_handler
= proc_dointvec
,
941 * NOTE: do not add new entries to this table unless you have read
942 * Documentation/sysctl/ctl_unnumbered.txt
947 static struct ctl_table vm_table
[] = {
949 .procname
= "overcommit_memory",
950 .data
= &sysctl_overcommit_memory
,
951 .maxlen
= sizeof(sysctl_overcommit_memory
),
953 .proc_handler
= proc_dointvec
,
956 .procname
= "panic_on_oom",
957 .data
= &sysctl_panic_on_oom
,
958 .maxlen
= sizeof(sysctl_panic_on_oom
),
960 .proc_handler
= proc_dointvec
,
963 .procname
= "oom_kill_allocating_task",
964 .data
= &sysctl_oom_kill_allocating_task
,
965 .maxlen
= sizeof(sysctl_oom_kill_allocating_task
),
967 .proc_handler
= proc_dointvec
,
970 .procname
= "oom_dump_tasks",
971 .data
= &sysctl_oom_dump_tasks
,
972 .maxlen
= sizeof(sysctl_oom_dump_tasks
),
974 .proc_handler
= proc_dointvec
,
977 .procname
= "overcommit_ratio",
978 .data
= &sysctl_overcommit_ratio
,
979 .maxlen
= sizeof(sysctl_overcommit_ratio
),
981 .proc_handler
= proc_dointvec
,
984 .procname
= "page-cluster",
985 .data
= &page_cluster
,
986 .maxlen
= sizeof(int),
988 .proc_handler
= proc_dointvec
,
991 .procname
= "dirty_background_ratio",
992 .data
= &dirty_background_ratio
,
993 .maxlen
= sizeof(dirty_background_ratio
),
995 .proc_handler
= dirty_background_ratio_handler
,
997 .extra2
= &one_hundred
,
1000 .procname
= "dirty_background_bytes",
1001 .data
= &dirty_background_bytes
,
1002 .maxlen
= sizeof(dirty_background_bytes
),
1004 .proc_handler
= dirty_background_bytes_handler
,
1008 .procname
= "dirty_ratio",
1009 .data
= &vm_dirty_ratio
,
1010 .maxlen
= sizeof(vm_dirty_ratio
),
1012 .proc_handler
= dirty_ratio_handler
,
1014 .extra2
= &one_hundred
,
1017 .procname
= "dirty_bytes",
1018 .data
= &vm_dirty_bytes
,
1019 .maxlen
= sizeof(vm_dirty_bytes
),
1021 .proc_handler
= dirty_bytes_handler
,
1022 .extra1
= &dirty_bytes_min
,
1025 .procname
= "dirty_writeback_centisecs",
1026 .data
= &dirty_writeback_interval
,
1027 .maxlen
= sizeof(dirty_writeback_interval
),
1029 .proc_handler
= dirty_writeback_centisecs_handler
,
1032 .procname
= "dirty_expire_centisecs",
1033 .data
= &dirty_expire_interval
,
1034 .maxlen
= sizeof(dirty_expire_interval
),
1036 .proc_handler
= proc_dointvec
,
1039 .procname
= "nr_pdflush_threads",
1040 .data
= &nr_pdflush_threads
,
1041 .maxlen
= sizeof nr_pdflush_threads
,
1042 .mode
= 0444 /* read-only*/,
1043 .proc_handler
= proc_dointvec
,
1046 .procname
= "swappiness",
1047 .data
= &vm_swappiness
,
1048 .maxlen
= sizeof(vm_swappiness
),
1050 .proc_handler
= proc_dointvec_minmax
,
1052 .extra2
= &one_hundred
,
1054 #ifdef CONFIG_HUGETLB_PAGE
1056 .procname
= "nr_hugepages",
1058 .maxlen
= sizeof(unsigned long),
1060 .proc_handler
= hugetlb_sysctl_handler
,
1061 .extra1
= (void *)&hugetlb_zero
,
1062 .extra2
= (void *)&hugetlb_infinity
,
1066 .procname
= "nr_hugepages_mempolicy",
1068 .maxlen
= sizeof(unsigned long),
1070 .proc_handler
= &hugetlb_mempolicy_sysctl_handler
,
1071 .extra1
= (void *)&hugetlb_zero
,
1072 .extra2
= (void *)&hugetlb_infinity
,
1076 .procname
= "hugetlb_shm_group",
1077 .data
= &sysctl_hugetlb_shm_group
,
1078 .maxlen
= sizeof(gid_t
),
1080 .proc_handler
= proc_dointvec
,
1083 .procname
= "hugepages_treat_as_movable",
1084 .data
= &hugepages_treat_as_movable
,
1085 .maxlen
= sizeof(int),
1087 .proc_handler
= hugetlb_treat_movable_handler
,
1090 .procname
= "nr_overcommit_hugepages",
1092 .maxlen
= sizeof(unsigned long),
1094 .proc_handler
= hugetlb_overcommit_handler
,
1095 .extra1
= (void *)&hugetlb_zero
,
1096 .extra2
= (void *)&hugetlb_infinity
,
1100 .procname
= "lowmem_reserve_ratio",
1101 .data
= &sysctl_lowmem_reserve_ratio
,
1102 .maxlen
= sizeof(sysctl_lowmem_reserve_ratio
),
1104 .proc_handler
= lowmem_reserve_ratio_sysctl_handler
,
1107 .procname
= "drop_caches",
1108 .data
= &sysctl_drop_caches
,
1109 .maxlen
= sizeof(int),
1111 .proc_handler
= drop_caches_sysctl_handler
,
1114 .procname
= "min_free_kbytes",
1115 .data
= &min_free_kbytes
,
1116 .maxlen
= sizeof(min_free_kbytes
),
1118 .proc_handler
= min_free_kbytes_sysctl_handler
,
1122 .procname
= "percpu_pagelist_fraction",
1123 .data
= &percpu_pagelist_fraction
,
1124 .maxlen
= sizeof(percpu_pagelist_fraction
),
1126 .proc_handler
= percpu_pagelist_fraction_sysctl_handler
,
1127 .extra1
= &min_percpu_pagelist_fract
,
1131 .procname
= "max_map_count",
1132 .data
= &sysctl_max_map_count
,
1133 .maxlen
= sizeof(sysctl_max_map_count
),
1135 .proc_handler
= proc_dointvec_minmax
,
1140 .procname
= "nr_trim_pages",
1141 .data
= &sysctl_nr_trim_pages
,
1142 .maxlen
= sizeof(sysctl_nr_trim_pages
),
1144 .proc_handler
= proc_dointvec_minmax
,
1149 .procname
= "laptop_mode",
1150 .data
= &laptop_mode
,
1151 .maxlen
= sizeof(laptop_mode
),
1153 .proc_handler
= proc_dointvec_jiffies
,
1156 .procname
= "block_dump",
1157 .data
= &block_dump
,
1158 .maxlen
= sizeof(block_dump
),
1160 .proc_handler
= proc_dointvec
,
1164 .procname
= "vfs_cache_pressure",
1165 .data
= &sysctl_vfs_cache_pressure
,
1166 .maxlen
= sizeof(sysctl_vfs_cache_pressure
),
1168 .proc_handler
= proc_dointvec
,
1171 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1173 .procname
= "legacy_va_layout",
1174 .data
= &sysctl_legacy_va_layout
,
1175 .maxlen
= sizeof(sysctl_legacy_va_layout
),
1177 .proc_handler
= proc_dointvec
,
1183 .procname
= "zone_reclaim_mode",
1184 .data
= &zone_reclaim_mode
,
1185 .maxlen
= sizeof(zone_reclaim_mode
),
1187 .proc_handler
= proc_dointvec
,
1191 .procname
= "min_unmapped_ratio",
1192 .data
= &sysctl_min_unmapped_ratio
,
1193 .maxlen
= sizeof(sysctl_min_unmapped_ratio
),
1195 .proc_handler
= sysctl_min_unmapped_ratio_sysctl_handler
,
1197 .extra2
= &one_hundred
,
1200 .procname
= "min_slab_ratio",
1201 .data
= &sysctl_min_slab_ratio
,
1202 .maxlen
= sizeof(sysctl_min_slab_ratio
),
1204 .proc_handler
= sysctl_min_slab_ratio_sysctl_handler
,
1206 .extra2
= &one_hundred
,
1211 .procname
= "stat_interval",
1212 .data
= &sysctl_stat_interval
,
1213 .maxlen
= sizeof(sysctl_stat_interval
),
1215 .proc_handler
= proc_dointvec_jiffies
,
1220 .procname
= "mmap_min_addr",
1221 .data
= &dac_mmap_min_addr
,
1222 .maxlen
= sizeof(unsigned long),
1224 .proc_handler
= mmap_min_addr_handler
,
1229 .procname
= "numa_zonelist_order",
1230 .data
= &numa_zonelist_order
,
1231 .maxlen
= NUMA_ZONELIST_ORDER_LEN
,
1233 .proc_handler
= numa_zonelist_order_handler
,
1236 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1237 (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1239 .procname
= "vdso_enabled",
1240 .data
= &vdso_enabled
,
1241 .maxlen
= sizeof(vdso_enabled
),
1243 .proc_handler
= proc_dointvec
,
1247 #ifdef CONFIG_HIGHMEM
1249 .procname
= "highmem_is_dirtyable",
1250 .data
= &vm_highmem_is_dirtyable
,
1251 .maxlen
= sizeof(vm_highmem_is_dirtyable
),
1253 .proc_handler
= proc_dointvec_minmax
,
1259 .procname
= "scan_unevictable_pages",
1260 .data
= &scan_unevictable_pages
,
1261 .maxlen
= sizeof(scan_unevictable_pages
),
1263 .proc_handler
= scan_unevictable_handler
,
1265 #ifdef CONFIG_MEMORY_FAILURE
1267 .procname
= "memory_failure_early_kill",
1268 .data
= &sysctl_memory_failure_early_kill
,
1269 .maxlen
= sizeof(sysctl_memory_failure_early_kill
),
1271 .proc_handler
= proc_dointvec_minmax
,
1276 .procname
= "memory_failure_recovery",
1277 .data
= &sysctl_memory_failure_recovery
,
1278 .maxlen
= sizeof(sysctl_memory_failure_recovery
),
1280 .proc_handler
= proc_dointvec_minmax
,
1287 * NOTE: do not add new entries to this table unless you have read
1288 * Documentation/sysctl/ctl_unnumbered.txt
1293 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1294 static struct ctl_table binfmt_misc_table
[] = {
1299 static struct ctl_table fs_table
[] = {
1301 .procname
= "inode-nr",
1302 .data
= &inodes_stat
,
1303 .maxlen
= 2*sizeof(int),
1305 .proc_handler
= proc_dointvec
,
1308 .procname
= "inode-state",
1309 .data
= &inodes_stat
,
1310 .maxlen
= 7*sizeof(int),
1312 .proc_handler
= proc_dointvec
,
1315 .procname
= "file-nr",
1316 .data
= &files_stat
,
1317 .maxlen
= 3*sizeof(int),
1319 .proc_handler
= proc_nr_files
,
1322 .procname
= "file-max",
1323 .data
= &files_stat
.max_files
,
1324 .maxlen
= sizeof(int),
1326 .proc_handler
= proc_dointvec
,
1329 .procname
= "nr_open",
1330 .data
= &sysctl_nr_open
,
1331 .maxlen
= sizeof(int),
1333 .proc_handler
= proc_dointvec_minmax
,
1334 .extra1
= &sysctl_nr_open_min
,
1335 .extra2
= &sysctl_nr_open_max
,
1338 .procname
= "dentry-state",
1339 .data
= &dentry_stat
,
1340 .maxlen
= 6*sizeof(int),
1342 .proc_handler
= proc_dointvec
,
1345 .procname
= "overflowuid",
1346 .data
= &fs_overflowuid
,
1347 .maxlen
= sizeof(int),
1349 .proc_handler
= proc_dointvec_minmax
,
1350 .extra1
= &minolduid
,
1351 .extra2
= &maxolduid
,
1354 .procname
= "overflowgid",
1355 .data
= &fs_overflowgid
,
1356 .maxlen
= sizeof(int),
1358 .proc_handler
= proc_dointvec_minmax
,
1359 .extra1
= &minolduid
,
1360 .extra2
= &maxolduid
,
1362 #ifdef CONFIG_FILE_LOCKING
1364 .procname
= "leases-enable",
1365 .data
= &leases_enable
,
1366 .maxlen
= sizeof(int),
1368 .proc_handler
= proc_dointvec
,
1371 #ifdef CONFIG_DNOTIFY
1373 .procname
= "dir-notify-enable",
1374 .data
= &dir_notify_enable
,
1375 .maxlen
= sizeof(int),
1377 .proc_handler
= proc_dointvec
,
1381 #ifdef CONFIG_FILE_LOCKING
1383 .procname
= "lease-break-time",
1384 .data
= &lease_break_time
,
1385 .maxlen
= sizeof(int),
1387 .proc_handler
= proc_dointvec
,
1392 .procname
= "aio-nr",
1394 .maxlen
= sizeof(aio_nr
),
1396 .proc_handler
= proc_doulongvec_minmax
,
1399 .procname
= "aio-max-nr",
1400 .data
= &aio_max_nr
,
1401 .maxlen
= sizeof(aio_max_nr
),
1403 .proc_handler
= proc_doulongvec_minmax
,
1405 #endif /* CONFIG_AIO */
1406 #ifdef CONFIG_INOTIFY_USER
1408 .procname
= "inotify",
1410 .child
= inotify_table
,
1415 .procname
= "epoll",
1417 .child
= epoll_table
,
1422 .procname
= "suid_dumpable",
1423 .data
= &suid_dumpable
,
1424 .maxlen
= sizeof(int),
1426 .proc_handler
= proc_dointvec_minmax
,
1430 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1432 .procname
= "binfmt_misc",
1434 .child
= binfmt_misc_table
,
1438 * NOTE: do not add new entries to this table unless you have read
1439 * Documentation/sysctl/ctl_unnumbered.txt
1444 static struct ctl_table debug_table
[] = {
1445 #if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC)
1447 .procname
= "exception-trace",
1448 .data
= &show_unhandled_signals
,
1449 .maxlen
= sizeof(int),
1451 .proc_handler
= proc_dointvec
1454 #if defined(CONFIG_OPTPROBES)
1456 .procname
= "kprobes-optimization",
1457 .data
= &sysctl_kprobes_optimization
,
1458 .maxlen
= sizeof(int),
1460 .proc_handler
= proc_kprobes_optimization_handler
,
1468 static struct ctl_table dev_table
[] = {
1472 static DEFINE_SPINLOCK(sysctl_lock
);
1474 /* called under sysctl_lock */
1475 static int use_table(struct ctl_table_header
*p
)
1477 if (unlikely(p
->unregistering
))
1483 /* called under sysctl_lock */
1484 static void unuse_table(struct ctl_table_header
*p
)
1487 if (unlikely(p
->unregistering
))
1488 complete(p
->unregistering
);
1491 /* called under sysctl_lock, will reacquire if has to wait */
1492 static void start_unregistering(struct ctl_table_header
*p
)
1495 * if p->used is 0, nobody will ever touch that entry again;
1496 * we'll eliminate all paths to it before dropping sysctl_lock
1498 if (unlikely(p
->used
)) {
1499 struct completion wait
;
1500 init_completion(&wait
);
1501 p
->unregistering
= &wait
;
1502 spin_unlock(&sysctl_lock
);
1503 wait_for_completion(&wait
);
1504 spin_lock(&sysctl_lock
);
1506 /* anything non-NULL; we'll never dereference it */
1507 p
->unregistering
= ERR_PTR(-EINVAL
);
1510 * do not remove from the list until nobody holds it; walking the
1511 * list in do_sysctl() relies on that.
1513 list_del_init(&p
->ctl_entry
);
1516 void sysctl_head_get(struct ctl_table_header
*head
)
1518 spin_lock(&sysctl_lock
);
1520 spin_unlock(&sysctl_lock
);
1523 void sysctl_head_put(struct ctl_table_header
*head
)
1525 spin_lock(&sysctl_lock
);
1528 spin_unlock(&sysctl_lock
);
1531 struct ctl_table_header
*sysctl_head_grab(struct ctl_table_header
*head
)
1535 spin_lock(&sysctl_lock
);
1536 if (!use_table(head
))
1537 head
= ERR_PTR(-ENOENT
);
1538 spin_unlock(&sysctl_lock
);
1542 void sysctl_head_finish(struct ctl_table_header
*head
)
1546 spin_lock(&sysctl_lock
);
1548 spin_unlock(&sysctl_lock
);
1551 static struct ctl_table_set
*
1552 lookup_header_set(struct ctl_table_root
*root
, struct nsproxy
*namespaces
)
1554 struct ctl_table_set
*set
= &root
->default_set
;
1556 set
= root
->lookup(root
, namespaces
);
1560 static struct list_head
*
1561 lookup_header_list(struct ctl_table_root
*root
, struct nsproxy
*namespaces
)
1563 struct ctl_table_set
*set
= lookup_header_set(root
, namespaces
);
1567 struct ctl_table_header
*__sysctl_head_next(struct nsproxy
*namespaces
,
1568 struct ctl_table_header
*prev
)
1570 struct ctl_table_root
*root
;
1571 struct list_head
*header_list
;
1572 struct ctl_table_header
*head
;
1573 struct list_head
*tmp
;
1575 spin_lock(&sysctl_lock
);
1578 tmp
= &prev
->ctl_entry
;
1582 tmp
= &root_table_header
.ctl_entry
;
1584 head
= list_entry(tmp
, struct ctl_table_header
, ctl_entry
);
1586 if (!use_table(head
))
1588 spin_unlock(&sysctl_lock
);
1593 header_list
= lookup_header_list(root
, namespaces
);
1594 if (tmp
!= header_list
)
1598 root
= list_entry(root
->root_list
.next
,
1599 struct ctl_table_root
, root_list
);
1600 if (root
== &sysctl_table_root
)
1602 header_list
= lookup_header_list(root
, namespaces
);
1603 } while (list_empty(header_list
));
1604 tmp
= header_list
->next
;
1607 spin_unlock(&sysctl_lock
);
1611 struct ctl_table_header
*sysctl_head_next(struct ctl_table_header
*prev
)
1613 return __sysctl_head_next(current
->nsproxy
, prev
);
1616 void register_sysctl_root(struct ctl_table_root
*root
)
1618 spin_lock(&sysctl_lock
);
1619 list_add_tail(&root
->root_list
, &sysctl_table_root
.root_list
);
1620 spin_unlock(&sysctl_lock
);
1624 * sysctl_perm does NOT grant the superuser all rights automatically, because
1625 * some sysctl variables are readonly even to root.
1628 static int test_perm(int mode
, int op
)
1630 if (!current_euid())
1632 else if (in_egroup_p(0))
1634 if ((op
& ~mode
& (MAY_READ
|MAY_WRITE
|MAY_EXEC
)) == 0)
1639 int sysctl_perm(struct ctl_table_root
*root
, struct ctl_table
*table
, int op
)
1644 error
= security_sysctl(table
, op
& (MAY_READ
| MAY_WRITE
| MAY_EXEC
));
1648 if (root
->permissions
)
1649 mode
= root
->permissions(root
, current
->nsproxy
, table
);
1653 return test_perm(mode
, op
);
1656 static void sysctl_set_parent(struct ctl_table
*parent
, struct ctl_table
*table
)
1658 for (; table
->procname
; table
++) {
1659 table
->parent
= parent
;
1661 sysctl_set_parent(table
, table
->child
);
1665 static __init
int sysctl_init(void)
1667 sysctl_set_parent(NULL
, root_table
);
1668 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1671 err
= sysctl_check_table(current
->nsproxy
, root_table
);
1677 core_initcall(sysctl_init
);
1679 static struct ctl_table
*is_branch_in(struct ctl_table
*branch
,
1680 struct ctl_table
*table
)
1682 struct ctl_table
*p
;
1683 const char *s
= branch
->procname
;
1685 /* branch should have named subdirectory as its first element */
1686 if (!s
|| !branch
->child
)
1689 /* ... and nothing else */
1690 if (branch
[1].procname
)
1693 /* table should contain subdirectory with the same name */
1694 for (p
= table
; p
->procname
; p
++) {
1697 if (p
->procname
&& strcmp(p
->procname
, s
) == 0)
1703 /* see if attaching q to p would be an improvement */
1704 static void try_attach(struct ctl_table_header
*p
, struct ctl_table_header
*q
)
1706 struct ctl_table
*to
= p
->ctl_table
, *by
= q
->ctl_table
;
1707 struct ctl_table
*next
;
1709 int not_in_parent
= !p
->attached_by
;
1711 while ((next
= is_branch_in(by
, to
)) != NULL
) {
1712 if (by
== q
->attached_by
)
1714 if (to
== p
->attached_by
)
1720 if (is_better
&& not_in_parent
) {
1721 q
->attached_by
= by
;
1722 q
->attached_to
= to
;
1728 * __register_sysctl_paths - register a sysctl hierarchy
1729 * @root: List of sysctl headers to register on
1730 * @namespaces: Data to compute which lists of sysctl entries are visible
1731 * @path: The path to the directory the sysctl table is in.
1732 * @table: the top-level table structure
1734 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1735 * array. A completely 0 filled entry terminates the table.
1737 * The members of the &struct ctl_table structure are used as follows:
1739 * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1740 * enter a sysctl file
1742 * data - a pointer to data for use by proc_handler
1744 * maxlen - the maximum size in bytes of the data
1746 * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1748 * child - a pointer to the child sysctl table if this entry is a directory, or
1751 * proc_handler - the text handler routine (described below)
1753 * de - for internal use by the sysctl routines
1755 * extra1, extra2 - extra pointers usable by the proc handler routines
1757 * Leaf nodes in the sysctl tree will be represented by a single file
1758 * under /proc; non-leaf nodes will be represented by directories.
1760 * sysctl(2) can automatically manage read and write requests through
1761 * the sysctl table. The data and maxlen fields of the ctl_table
1762 * struct enable minimal validation of the values being written to be
1763 * performed, and the mode field allows minimal authentication.
1765 * There must be a proc_handler routine for any terminal nodes
1766 * mirrored under /proc/sys (non-terminals are handled by a built-in
1767 * directory handler). Several default handlers are available to
1768 * cover common cases -
1770 * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1771 * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1772 * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1774 * It is the handler's job to read the input buffer from user memory
1775 * and process it. The handler should return 0 on success.
1777 * This routine returns %NULL on a failure to register, and a pointer
1778 * to the table header on success.
1780 struct ctl_table_header
*__register_sysctl_paths(
1781 struct ctl_table_root
*root
,
1782 struct nsproxy
*namespaces
,
1783 const struct ctl_path
*path
, struct ctl_table
*table
)
1785 struct ctl_table_header
*header
;
1786 struct ctl_table
*new, **prevp
;
1787 unsigned int n
, npath
;
1788 struct ctl_table_set
*set
;
1790 /* Count the path components */
1791 for (npath
= 0; path
[npath
].procname
; ++npath
)
1795 * For each path component, allocate a 2-element ctl_table array.
1796 * The first array element will be filled with the sysctl entry
1797 * for this, the second will be the sentinel (procname == 0).
1799 * We allocate everything in one go so that we don't have to
1800 * worry about freeing additional memory in unregister_sysctl_table.
1802 header
= kzalloc(sizeof(struct ctl_table_header
) +
1803 (2 * npath
* sizeof(struct ctl_table
)), GFP_KERNEL
);
1807 new = (struct ctl_table
*) (header
+ 1);
1809 /* Now connect the dots */
1810 prevp
= &header
->ctl_table
;
1811 for (n
= 0; n
< npath
; ++n
, ++path
) {
1812 /* Copy the procname */
1813 new->procname
= path
->procname
;
1817 prevp
= &new->child
;
1822 header
->ctl_table_arg
= table
;
1824 INIT_LIST_HEAD(&header
->ctl_entry
);
1826 header
->unregistering
= NULL
;
1827 header
->root
= root
;
1828 sysctl_set_parent(NULL
, header
->ctl_table
);
1830 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1831 if (sysctl_check_table(namespaces
, header
->ctl_table
)) {
1836 spin_lock(&sysctl_lock
);
1837 header
->set
= lookup_header_set(root
, namespaces
);
1838 header
->attached_by
= header
->ctl_table
;
1839 header
->attached_to
= root_table
;
1840 header
->parent
= &root_table_header
;
1841 for (set
= header
->set
; set
; set
= set
->parent
) {
1842 struct ctl_table_header
*p
;
1843 list_for_each_entry(p
, &set
->list
, ctl_entry
) {
1844 if (p
->unregistering
)
1846 try_attach(p
, header
);
1849 header
->parent
->count
++;
1850 list_add_tail(&header
->ctl_entry
, &header
->set
->list
);
1851 spin_unlock(&sysctl_lock
);
1857 * register_sysctl_table_path - register a sysctl table hierarchy
1858 * @path: The path to the directory the sysctl table is in.
1859 * @table: the top-level table structure
1861 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1862 * array. A completely 0 filled entry terminates the table.
1864 * See __register_sysctl_paths for more details.
1866 struct ctl_table_header
*register_sysctl_paths(const struct ctl_path
*path
,
1867 struct ctl_table
*table
)
1869 return __register_sysctl_paths(&sysctl_table_root
, current
->nsproxy
,
1874 * register_sysctl_table - register a sysctl table hierarchy
1875 * @table: the top-level table structure
1877 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1878 * array. A completely 0 filled entry terminates the table.
1880 * See register_sysctl_paths for more details.
1882 struct ctl_table_header
*register_sysctl_table(struct ctl_table
*table
)
1884 static const struct ctl_path null_path
[] = { {} };
1886 return register_sysctl_paths(null_path
, table
);
1890 * unregister_sysctl_table - unregister a sysctl table hierarchy
1891 * @header: the header returned from register_sysctl_table
1893 * Unregisters the sysctl table and all children. proc entries may not
1894 * actually be removed until they are no longer used by anyone.
1896 void unregister_sysctl_table(struct ctl_table_header
* header
)
1903 spin_lock(&sysctl_lock
);
1904 start_unregistering(header
);
1905 if (!--header
->parent
->count
) {
1907 kfree(header
->parent
);
1909 if (!--header
->count
)
1911 spin_unlock(&sysctl_lock
);
1914 int sysctl_is_seen(struct ctl_table_header
*p
)
1916 struct ctl_table_set
*set
= p
->set
;
1918 spin_lock(&sysctl_lock
);
1919 if (p
->unregistering
)
1921 else if (!set
->is_seen
)
1924 res
= set
->is_seen(set
);
1925 spin_unlock(&sysctl_lock
);
1929 void setup_sysctl_set(struct ctl_table_set
*p
,
1930 struct ctl_table_set
*parent
,
1931 int (*is_seen
)(struct ctl_table_set
*))
1933 INIT_LIST_HEAD(&p
->list
);
1934 p
->parent
= parent
? parent
: &sysctl_table_root
.default_set
;
1935 p
->is_seen
= is_seen
;
1938 #else /* !CONFIG_SYSCTL */
1939 struct ctl_table_header
*register_sysctl_table(struct ctl_table
* table
)
1944 struct ctl_table_header
*register_sysctl_paths(const struct ctl_path
*path
,
1945 struct ctl_table
*table
)
1950 void unregister_sysctl_table(struct ctl_table_header
* table
)
1954 void setup_sysctl_set(struct ctl_table_set
*p
,
1955 struct ctl_table_set
*parent
,
1956 int (*is_seen
)(struct ctl_table_set
*))
1960 void sysctl_head_put(struct ctl_table_header
*head
)
1964 #endif /* CONFIG_SYSCTL */
1970 #ifdef CONFIG_PROC_SYSCTL
1972 static int _proc_do_string(void* data
, int maxlen
, int write
,
1973 void __user
*buffer
,
1974 size_t *lenp
, loff_t
*ppos
)
1980 if (!data
|| !maxlen
|| !*lenp
) {
1988 while (len
< *lenp
) {
1989 if (get_user(c
, p
++))
1991 if (c
== 0 || c
== '\n')
1997 if(copy_from_user(data
, buffer
, len
))
1999 ((char *) data
)[len
] = 0;
2017 if(copy_to_user(buffer
, data
, len
))
2020 if(put_user('\n', ((char __user
*) buffer
) + len
))
2031 * proc_dostring - read a string sysctl
2032 * @table: the sysctl table
2033 * @write: %TRUE if this is a write to the sysctl file
2034 * @buffer: the user buffer
2035 * @lenp: the size of the user buffer
2036 * @ppos: file position
2038 * Reads/writes a string from/to the user buffer. If the kernel
2039 * buffer provided is not large enough to hold the string, the
2040 * string is truncated. The copied string is %NULL-terminated.
2041 * If the string is being read by the user process, it is copied
2042 * and a newline '\n' is added. It is truncated if the buffer is
2045 * Returns 0 on success.
2047 int proc_dostring(struct ctl_table
*table
, int write
,
2048 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
2050 return _proc_do_string(table
->data
, table
->maxlen
, write
,
2051 buffer
, lenp
, ppos
);
2055 static int do_proc_dointvec_conv(int *negp
, unsigned long *lvalp
,
2057 int write
, void *data
)
2060 *valp
= *negp
? -*lvalp
: *lvalp
;
2065 *lvalp
= (unsigned long)-val
;
2068 *lvalp
= (unsigned long)val
;
2074 static int __do_proc_dointvec(void *tbl_data
, struct ctl_table
*table
,
2075 int write
, void __user
*buffer
,
2076 size_t *lenp
, loff_t
*ppos
,
2077 int (*conv
)(int *negp
, unsigned long *lvalp
, int *valp
,
2078 int write
, void *data
),
2081 #define TMPBUFLEN 21
2082 int *i
, vleft
, first
= 1, neg
;
2086 char buf
[TMPBUFLEN
], *p
;
2087 char __user
*s
= buffer
;
2089 if (!tbl_data
|| !table
->maxlen
|| !*lenp
||
2090 (*ppos
&& !write
)) {
2095 i
= (int *) tbl_data
;
2096 vleft
= table
->maxlen
/ sizeof(*i
);
2100 conv
= do_proc_dointvec_conv
;
2102 for (; left
&& vleft
--; i
++, first
=0) {
2117 if (len
> sizeof(buf
) - 1)
2118 len
= sizeof(buf
) - 1;
2119 if (copy_from_user(buf
, s
, len
))
2123 if (*p
== '-' && left
> 1) {
2127 if (*p
< '0' || *p
> '9')
2130 lval
= simple_strtoul(p
, &p
, 0);
2133 if ((len
< left
) && *p
&& !isspace(*p
))
2138 if (conv(&neg
, &lval
, i
, 1, data
))
2145 if (conv(&neg
, &lval
, i
, 0, data
))
2148 sprintf(p
, "%s%lu", neg
? "-" : "", lval
);
2152 if(copy_to_user(s
, buf
, len
))
2159 if (!write
&& !first
&& left
) {
2160 if(put_user('\n', s
))
2167 if (get_user(c
, s
++))
2182 static int do_proc_dointvec(struct ctl_table
*table
, int write
,
2183 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
,
2184 int (*conv
)(int *negp
, unsigned long *lvalp
, int *valp
,
2185 int write
, void *data
),
2188 return __do_proc_dointvec(table
->data
, table
, write
,
2189 buffer
, lenp
, ppos
, conv
, data
);
2193 * proc_dointvec - read a vector of integers
2194 * @table: the sysctl table
2195 * @write: %TRUE if this is a write to the sysctl file
2196 * @buffer: the user buffer
2197 * @lenp: the size of the user buffer
2198 * @ppos: file position
2200 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2201 * values from/to the user buffer, treated as an ASCII string.
2203 * Returns 0 on success.
2205 int proc_dointvec(struct ctl_table
*table
, int write
,
2206 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
2208 return do_proc_dointvec(table
,write
,buffer
,lenp
,ppos
,
2213 * Taint values can only be increased
2214 * This means we can safely use a temporary.
2216 static int proc_taint(struct ctl_table
*table
, int write
,
2217 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
2220 unsigned long tmptaint
= get_taint();
2223 if (write
&& !capable(CAP_SYS_ADMIN
))
2228 err
= proc_doulongvec_minmax(&t
, write
, buffer
, lenp
, ppos
);
2234 * Poor man's atomic or. Not worth adding a primitive
2235 * to everyone's atomic.h for this
2238 for (i
= 0; i
< BITS_PER_LONG
&& tmptaint
>> i
; i
++) {
2239 if ((tmptaint
>> i
) & 1)
2247 struct do_proc_dointvec_minmax_conv_param
{
2252 static int do_proc_dointvec_minmax_conv(int *negp
, unsigned long *lvalp
,
2254 int write
, void *data
)
2256 struct do_proc_dointvec_minmax_conv_param
*param
= data
;
2258 int val
= *negp
? -*lvalp
: *lvalp
;
2259 if ((param
->min
&& *param
->min
> val
) ||
2260 (param
->max
&& *param
->max
< val
))
2267 *lvalp
= (unsigned long)-val
;
2270 *lvalp
= (unsigned long)val
;
2277 * proc_dointvec_minmax - read a vector of integers with min/max values
2278 * @table: the sysctl table
2279 * @write: %TRUE if this is a write to the sysctl file
2280 * @buffer: the user buffer
2281 * @lenp: the size of the user buffer
2282 * @ppos: file position
2284 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2285 * values from/to the user buffer, treated as an ASCII string.
2287 * This routine will ensure the values are within the range specified by
2288 * table->extra1 (min) and table->extra2 (max).
2290 * Returns 0 on success.
2292 int proc_dointvec_minmax(struct ctl_table
*table
, int write
,
2293 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
2295 struct do_proc_dointvec_minmax_conv_param param
= {
2296 .min
= (int *) table
->extra1
,
2297 .max
= (int *) table
->extra2
,
2299 return do_proc_dointvec(table
, write
, buffer
, lenp
, ppos
,
2300 do_proc_dointvec_minmax_conv
, ¶m
);
2303 static int __do_proc_doulongvec_minmax(void *data
, struct ctl_table
*table
, int write
,
2304 void __user
*buffer
,
2305 size_t *lenp
, loff_t
*ppos
,
2306 unsigned long convmul
,
2307 unsigned long convdiv
)
2309 #define TMPBUFLEN 21
2310 unsigned long *i
, *min
, *max
, val
;
2311 int vleft
, first
=1, neg
;
2313 char buf
[TMPBUFLEN
], *p
;
2314 char __user
*s
= buffer
;
2316 if (!data
|| !table
->maxlen
|| !*lenp
||
2317 (*ppos
&& !write
)) {
2322 i
= (unsigned long *) data
;
2323 min
= (unsigned long *) table
->extra1
;
2324 max
= (unsigned long *) table
->extra2
;
2325 vleft
= table
->maxlen
/ sizeof(unsigned long);
2328 for (; left
&& vleft
--; i
++, min
++, max
++, first
=0) {
2343 if (len
> TMPBUFLEN
-1)
2345 if (copy_from_user(buf
, s
, len
))
2349 if (*p
== '-' && left
> 1) {
2353 if (*p
< '0' || *p
> '9')
2355 val
= simple_strtoul(p
, &p
, 0) * convmul
/ convdiv
;
2357 if ((len
< left
) && *p
&& !isspace(*p
))
2366 if ((min
&& val
< *min
) || (max
&& val
> *max
))
2373 sprintf(p
, "%lu", convdiv
* (*i
) / convmul
);
2377 if(copy_to_user(s
, buf
, len
))
2384 if (!write
&& !first
&& left
) {
2385 if(put_user('\n', s
))
2392 if (get_user(c
, s
++))
2407 static int do_proc_doulongvec_minmax(struct ctl_table
*table
, int write
,
2408 void __user
*buffer
,
2409 size_t *lenp
, loff_t
*ppos
,
2410 unsigned long convmul
,
2411 unsigned long convdiv
)
2413 return __do_proc_doulongvec_minmax(table
->data
, table
, write
,
2414 buffer
, lenp
, ppos
, convmul
, convdiv
);
2418 * proc_doulongvec_minmax - read a vector of long integers with min/max values
2419 * @table: the sysctl table
2420 * @write: %TRUE if this is a write to the sysctl file
2421 * @buffer: the user buffer
2422 * @lenp: the size of the user buffer
2423 * @ppos: file position
2425 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2426 * values from/to the user buffer, treated as an ASCII string.
2428 * This routine will ensure the values are within the range specified by
2429 * table->extra1 (min) and table->extra2 (max).
2431 * Returns 0 on success.
2433 int proc_doulongvec_minmax(struct ctl_table
*table
, int write
,
2434 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
2436 return do_proc_doulongvec_minmax(table
, write
, buffer
, lenp
, ppos
, 1l, 1l);
2440 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2441 * @table: the sysctl table
2442 * @write: %TRUE if this is a write to the sysctl file
2443 * @buffer: the user buffer
2444 * @lenp: the size of the user buffer
2445 * @ppos: file position
2447 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2448 * values from/to the user buffer, treated as an ASCII string. The values
2449 * are treated as milliseconds, and converted to jiffies when they are stored.
2451 * This routine will ensure the values are within the range specified by
2452 * table->extra1 (min) and table->extra2 (max).
2454 * Returns 0 on success.
2456 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table
*table
, int write
,
2457 void __user
*buffer
,
2458 size_t *lenp
, loff_t
*ppos
)
2460 return do_proc_doulongvec_minmax(table
, write
, buffer
,
2461 lenp
, ppos
, HZ
, 1000l);
2465 static int do_proc_dointvec_jiffies_conv(int *negp
, unsigned long *lvalp
,
2467 int write
, void *data
)
2470 if (*lvalp
> LONG_MAX
/ HZ
)
2472 *valp
= *negp
? -(*lvalp
*HZ
) : (*lvalp
*HZ
);
2478 lval
= (unsigned long)-val
;
2481 lval
= (unsigned long)val
;
2488 static int do_proc_dointvec_userhz_jiffies_conv(int *negp
, unsigned long *lvalp
,
2490 int write
, void *data
)
2493 if (USER_HZ
< HZ
&& *lvalp
> (LONG_MAX
/ HZ
) * USER_HZ
)
2495 *valp
= clock_t_to_jiffies(*negp
? -*lvalp
: *lvalp
);
2501 lval
= (unsigned long)-val
;
2504 lval
= (unsigned long)val
;
2506 *lvalp
= jiffies_to_clock_t(lval
);
2511 static int do_proc_dointvec_ms_jiffies_conv(int *negp
, unsigned long *lvalp
,
2513 int write
, void *data
)
2516 *valp
= msecs_to_jiffies(*negp
? -*lvalp
: *lvalp
);
2522 lval
= (unsigned long)-val
;
2525 lval
= (unsigned long)val
;
2527 *lvalp
= jiffies_to_msecs(lval
);
2533 * proc_dointvec_jiffies - read a vector of integers as seconds
2534 * @table: the sysctl table
2535 * @write: %TRUE if this is a write to the sysctl file
2536 * @buffer: the user buffer
2537 * @lenp: the size of the user buffer
2538 * @ppos: file position
2540 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2541 * values from/to the user buffer, treated as an ASCII string.
2542 * The values read are assumed to be in seconds, and are converted into
2545 * Returns 0 on success.
2547 int proc_dointvec_jiffies(struct ctl_table
*table
, int write
,
2548 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
2550 return do_proc_dointvec(table
,write
,buffer
,lenp
,ppos
,
2551 do_proc_dointvec_jiffies_conv
,NULL
);
2555 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2556 * @table: the sysctl table
2557 * @write: %TRUE if this is a write to the sysctl file
2558 * @buffer: the user buffer
2559 * @lenp: the size of the user buffer
2560 * @ppos: pointer to the file position
2562 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2563 * values from/to the user buffer, treated as an ASCII string.
2564 * The values read are assumed to be in 1/USER_HZ seconds, and
2565 * are converted into jiffies.
2567 * Returns 0 on success.
2569 int proc_dointvec_userhz_jiffies(struct ctl_table
*table
, int write
,
2570 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
2572 return do_proc_dointvec(table
,write
,buffer
,lenp
,ppos
,
2573 do_proc_dointvec_userhz_jiffies_conv
,NULL
);
2577 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2578 * @table: the sysctl table
2579 * @write: %TRUE if this is a write to the sysctl file
2580 * @buffer: the user buffer
2581 * @lenp: the size of the user buffer
2582 * @ppos: file position
2583 * @ppos: the current position in the file
2585 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2586 * values from/to the user buffer, treated as an ASCII string.
2587 * The values read are assumed to be in 1/1000 seconds, and
2588 * are converted into jiffies.
2590 * Returns 0 on success.
2592 int proc_dointvec_ms_jiffies(struct ctl_table
*table
, int write
,
2593 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
2595 return do_proc_dointvec(table
, write
, buffer
, lenp
, ppos
,
2596 do_proc_dointvec_ms_jiffies_conv
, NULL
);
2599 static int proc_do_cad_pid(struct ctl_table
*table
, int write
,
2600 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
2602 struct pid
*new_pid
;
2606 tmp
= pid_vnr(cad_pid
);
2608 r
= __do_proc_dointvec(&tmp
, table
, write
, buffer
,
2609 lenp
, ppos
, NULL
, NULL
);
2613 new_pid
= find_get_pid(tmp
);
2617 put_pid(xchg(&cad_pid
, new_pid
));
2621 #else /* CONFIG_PROC_FS */
2623 int proc_dostring(struct ctl_table
*table
, int write
,
2624 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
2629 int proc_dointvec(struct ctl_table
*table
, int write
,
2630 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
2635 int proc_dointvec_minmax(struct ctl_table
*table
, int write
,
2636 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
2641 int proc_dointvec_jiffies(struct ctl_table
*table
, int write
,
2642 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
2647 int proc_dointvec_userhz_jiffies(struct ctl_table
*table
, int write
,
2648 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
2653 int proc_dointvec_ms_jiffies(struct ctl_table
*table
, int write
,
2654 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
2659 int proc_doulongvec_minmax(struct ctl_table
*table
, int write
,
2660 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
2665 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table
*table
, int write
,
2666 void __user
*buffer
,
2667 size_t *lenp
, loff_t
*ppos
)
2673 #endif /* CONFIG_PROC_FS */
2676 * No sense putting this after each symbol definition, twice,
2677 * exception granted :-)
2679 EXPORT_SYMBOL(proc_dointvec
);
2680 EXPORT_SYMBOL(proc_dointvec_jiffies
);
2681 EXPORT_SYMBOL(proc_dointvec_minmax
);
2682 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies
);
2683 EXPORT_SYMBOL(proc_dointvec_ms_jiffies
);
2684 EXPORT_SYMBOL(proc_dostring
);
2685 EXPORT_SYMBOL(proc_doulongvec_minmax
);
2686 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax
);
2687 EXPORT_SYMBOL(register_sysctl_table
);
2688 EXPORT_SYMBOL(register_sysctl_paths
);
2689 EXPORT_SYMBOL(unregister_sysctl_table
);