xref: /linux-6.15/kernel/sysctl.c (revision a115bc07)
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20 
21 #include <linux/module.h>
22 #include <linux/mm.h>
23 #include <linux/swap.h>
24 #include <linux/slab.h>
25 #include <linux/sysctl.h>
26 #include <linux/proc_fs.h>
27 #include <linux/security.h>
28 #include <linux/ctype.h>
29 #include <linux/kmemcheck.h>
30 #include <linux/fs.h>
31 #include <linux/init.h>
32 #include <linux/kernel.h>
33 #include <linux/kobject.h>
34 #include <linux/net.h>
35 #include <linux/sysrq.h>
36 #include <linux/highuid.h>
37 #include <linux/writeback.h>
38 #include <linux/ratelimit.h>
39 #include <linux/hugetlb.h>
40 #include <linux/initrd.h>
41 #include <linux/key.h>
42 #include <linux/times.h>
43 #include <linux/limits.h>
44 #include <linux/dcache.h>
45 #include <linux/syscalls.h>
46 #include <linux/vmstat.h>
47 #include <linux/nfs_fs.h>
48 #include <linux/acpi.h>
49 #include <linux/reboot.h>
50 #include <linux/ftrace.h>
51 #include <linux/slow-work.h>
52 #include <linux/perf_event.h>
53 #include <linux/kprobes.h>
54 
55 #include <asm/uaccess.h>
56 #include <asm/processor.h>
57 
58 #ifdef CONFIG_X86
59 #include <asm/nmi.h>
60 #include <asm/stacktrace.h>
61 #include <asm/io.h>
62 #endif
63 
64 
65 #if defined(CONFIG_SYSCTL)
66 
67 /* External variables not in a header file. */
68 extern int C_A_D;
69 extern int print_fatal_signals;
70 extern int sysctl_overcommit_memory;
71 extern int sysctl_overcommit_ratio;
72 extern int sysctl_panic_on_oom;
73 extern int sysctl_oom_kill_allocating_task;
74 extern int sysctl_oom_dump_tasks;
75 extern int max_threads;
76 extern int core_uses_pid;
77 extern int suid_dumpable;
78 extern char core_pattern[];
79 extern unsigned int core_pipe_limit;
80 extern int pid_max;
81 extern int min_free_kbytes;
82 extern int pid_max_min, pid_max_max;
83 extern int sysctl_drop_caches;
84 extern int percpu_pagelist_fraction;
85 extern int compat_log;
86 extern int latencytop_enabled;
87 extern int sysctl_nr_open_min, sysctl_nr_open_max;
88 #ifndef CONFIG_MMU
89 extern int sysctl_nr_trim_pages;
90 #endif
91 #ifdef CONFIG_RCU_TORTURE_TEST
92 extern int rcutorture_runnable;
93 #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
94 #ifdef CONFIG_BLOCK
95 extern int blk_iopoll_enabled;
96 #endif
97 
98 /* Constants used for minimum and  maximum */
99 #ifdef CONFIG_DETECT_SOFTLOCKUP
100 static int sixty = 60;
101 static int neg_one = -1;
102 #endif
103 
104 static int zero;
105 static int __maybe_unused one = 1;
106 static int __maybe_unused two = 2;
107 static unsigned long one_ul = 1;
108 static int one_hundred = 100;
109 #ifdef CONFIG_PRINTK
110 static int ten_thousand = 10000;
111 #endif
112 
113 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
114 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
115 
116 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
117 static int maxolduid = 65535;
118 static int minolduid;
119 static int min_percpu_pagelist_fract = 8;
120 
121 static int ngroups_max = NGROUPS_MAX;
122 
123 #ifdef CONFIG_MODULES
124 extern char modprobe_path[];
125 extern int modules_disabled;
126 #endif
127 #ifdef CONFIG_CHR_DEV_SG
128 extern int sg_big_buff;
129 #endif
130 
131 #ifdef CONFIG_SPARC
132 #include <asm/system.h>
133 #endif
134 
135 #ifdef CONFIG_SPARC64
136 extern int sysctl_tsb_ratio;
137 #endif
138 
139 #ifdef __hppa__
140 extern int pwrsw_enabled;
141 extern int unaligned_enabled;
142 #endif
143 
144 #ifdef CONFIG_S390
145 #ifdef CONFIG_MATHEMU
146 extern int sysctl_ieee_emulation_warnings;
147 #endif
148 extern int sysctl_userprocess_debug;
149 extern int spin_retry;
150 #endif
151 
152 #ifdef CONFIG_BSD_PROCESS_ACCT
153 extern int acct_parm[];
154 #endif
155 
156 #ifdef CONFIG_IA64
157 extern int no_unaligned_warning;
158 extern int unaligned_dump_stack;
159 #endif
160 
161 extern struct ratelimit_state printk_ratelimit_state;
162 
163 #ifdef CONFIG_RT_MUTEXES
164 extern int max_lock_depth;
165 #endif
166 
167 #ifdef CONFIG_PROC_SYSCTL
168 static int proc_do_cad_pid(struct ctl_table *table, int write,
169 		  void __user *buffer, size_t *lenp, loff_t *ppos);
170 static int proc_taint(struct ctl_table *table, int write,
171 			       void __user *buffer, size_t *lenp, loff_t *ppos);
172 #endif
173 
174 static struct ctl_table root_table[];
175 static struct ctl_table_root sysctl_table_root;
176 static struct ctl_table_header root_table_header = {
177 	.count = 1,
178 	.ctl_table = root_table,
179 	.ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),
180 	.root = &sysctl_table_root,
181 	.set = &sysctl_table_root.default_set,
182 };
183 static struct ctl_table_root sysctl_table_root = {
184 	.root_list = LIST_HEAD_INIT(sysctl_table_root.root_list),
185 	.default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry),
186 };
187 
188 static struct ctl_table kern_table[];
189 static struct ctl_table vm_table[];
190 static struct ctl_table fs_table[];
191 static struct ctl_table debug_table[];
192 static struct ctl_table dev_table[];
193 extern struct ctl_table random_table[];
194 #ifdef CONFIG_INOTIFY_USER
195 extern struct ctl_table inotify_table[];
196 #endif
197 #ifdef CONFIG_EPOLL
198 extern struct ctl_table epoll_table[];
199 #endif
200 
201 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
202 int sysctl_legacy_va_layout;
203 #endif
204 
205 extern int prove_locking;
206 extern int lock_stat;
207 
208 /* The default sysctl tables: */
209 
210 static struct ctl_table root_table[] = {
211 	{
212 		.procname	= "kernel",
213 		.mode		= 0555,
214 		.child		= kern_table,
215 	},
216 	{
217 		.procname	= "vm",
218 		.mode		= 0555,
219 		.child		= vm_table,
220 	},
221 	{
222 		.procname	= "fs",
223 		.mode		= 0555,
224 		.child		= fs_table,
225 	},
226 	{
227 		.procname	= "debug",
228 		.mode		= 0555,
229 		.child		= debug_table,
230 	},
231 	{
232 		.procname	= "dev",
233 		.mode		= 0555,
234 		.child		= dev_table,
235 	},
236 /*
237  * NOTE: do not add new entries to this table unless you have read
238  * Documentation/sysctl/ctl_unnumbered.txt
239  */
240 	{ }
241 };
242 
243 #ifdef CONFIG_SCHED_DEBUG
244 static int min_sched_granularity_ns = 100000;		/* 100 usecs */
245 static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
246 static int min_wakeup_granularity_ns;			/* 0 usecs */
247 static int max_wakeup_granularity_ns = NSEC_PER_SEC;	/* 1 second */
248 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
249 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
250 static int min_sched_shares_ratelimit = 100000; /* 100 usec */
251 static int max_sched_shares_ratelimit = NSEC_PER_SEC; /* 1 second */
252 #endif
253 
254 static struct ctl_table kern_table[] = {
255 	{
256 		.procname	= "sched_child_runs_first",
257 		.data		= &sysctl_sched_child_runs_first,
258 		.maxlen		= sizeof(unsigned int),
259 		.mode		= 0644,
260 		.proc_handler	= proc_dointvec,
261 	},
262 #ifdef CONFIG_SCHED_DEBUG
263 	{
264 		.procname	= "sched_min_granularity_ns",
265 		.data		= &sysctl_sched_min_granularity,
266 		.maxlen		= sizeof(unsigned int),
267 		.mode		= 0644,
268 		.proc_handler	= sched_proc_update_handler,
269 		.extra1		= &min_sched_granularity_ns,
270 		.extra2		= &max_sched_granularity_ns,
271 	},
272 	{
273 		.procname	= "sched_latency_ns",
274 		.data		= &sysctl_sched_latency,
275 		.maxlen		= sizeof(unsigned int),
276 		.mode		= 0644,
277 		.proc_handler	= sched_proc_update_handler,
278 		.extra1		= &min_sched_granularity_ns,
279 		.extra2		= &max_sched_granularity_ns,
280 	},
281 	{
282 		.procname	= "sched_wakeup_granularity_ns",
283 		.data		= &sysctl_sched_wakeup_granularity,
284 		.maxlen		= sizeof(unsigned int),
285 		.mode		= 0644,
286 		.proc_handler	= sched_proc_update_handler,
287 		.extra1		= &min_wakeup_granularity_ns,
288 		.extra2		= &max_wakeup_granularity_ns,
289 	},
290 	{
291 		.procname	= "sched_shares_ratelimit",
292 		.data		= &sysctl_sched_shares_ratelimit,
293 		.maxlen		= sizeof(unsigned int),
294 		.mode		= 0644,
295 		.proc_handler	= sched_proc_update_handler,
296 		.extra1		= &min_sched_shares_ratelimit,
297 		.extra2		= &max_sched_shares_ratelimit,
298 	},
299 	{
300 		.procname	= "sched_tunable_scaling",
301 		.data		= &sysctl_sched_tunable_scaling,
302 		.maxlen		= sizeof(enum sched_tunable_scaling),
303 		.mode		= 0644,
304 		.proc_handler	= sched_proc_update_handler,
305 		.extra1		= &min_sched_tunable_scaling,
306 		.extra2		= &max_sched_tunable_scaling,
307 	},
308 	{
309 		.procname	= "sched_shares_thresh",
310 		.data		= &sysctl_sched_shares_thresh,
311 		.maxlen		= sizeof(unsigned int),
312 		.mode		= 0644,
313 		.proc_handler	= proc_dointvec_minmax,
314 		.extra1		= &zero,
315 	},
316 	{
317 		.procname	= "sched_migration_cost",
318 		.data		= &sysctl_sched_migration_cost,
319 		.maxlen		= sizeof(unsigned int),
320 		.mode		= 0644,
321 		.proc_handler	= proc_dointvec,
322 	},
323 	{
324 		.procname	= "sched_nr_migrate",
325 		.data		= &sysctl_sched_nr_migrate,
326 		.maxlen		= sizeof(unsigned int),
327 		.mode		= 0644,
328 		.proc_handler	= proc_dointvec,
329 	},
330 	{
331 		.procname	= "sched_time_avg",
332 		.data		= &sysctl_sched_time_avg,
333 		.maxlen		= sizeof(unsigned int),
334 		.mode		= 0644,
335 		.proc_handler	= proc_dointvec,
336 	},
337 	{
338 		.procname	= "timer_migration",
339 		.data		= &sysctl_timer_migration,
340 		.maxlen		= sizeof(unsigned int),
341 		.mode		= 0644,
342 		.proc_handler	= proc_dointvec_minmax,
343 		.extra1		= &zero,
344 		.extra2		= &one,
345 	},
346 #endif
347 	{
348 		.procname	= "sched_rt_period_us",
349 		.data		= &sysctl_sched_rt_period,
350 		.maxlen		= sizeof(unsigned int),
351 		.mode		= 0644,
352 		.proc_handler	= sched_rt_handler,
353 	},
354 	{
355 		.procname	= "sched_rt_runtime_us",
356 		.data		= &sysctl_sched_rt_runtime,
357 		.maxlen		= sizeof(int),
358 		.mode		= 0644,
359 		.proc_handler	= sched_rt_handler,
360 	},
361 	{
362 		.procname	= "sched_compat_yield",
363 		.data		= &sysctl_sched_compat_yield,
364 		.maxlen		= sizeof(unsigned int),
365 		.mode		= 0644,
366 		.proc_handler	= proc_dointvec,
367 	},
368 #ifdef CONFIG_PROVE_LOCKING
369 	{
370 		.procname	= "prove_locking",
371 		.data		= &prove_locking,
372 		.maxlen		= sizeof(int),
373 		.mode		= 0644,
374 		.proc_handler	= proc_dointvec,
375 	},
376 #endif
377 #ifdef CONFIG_LOCK_STAT
378 	{
379 		.procname	= "lock_stat",
380 		.data		= &lock_stat,
381 		.maxlen		= sizeof(int),
382 		.mode		= 0644,
383 		.proc_handler	= proc_dointvec,
384 	},
385 #endif
386 	{
387 		.procname	= "panic",
388 		.data		= &panic_timeout,
389 		.maxlen		= sizeof(int),
390 		.mode		= 0644,
391 		.proc_handler	= proc_dointvec,
392 	},
393 	{
394 		.procname	= "core_uses_pid",
395 		.data		= &core_uses_pid,
396 		.maxlen		= sizeof(int),
397 		.mode		= 0644,
398 		.proc_handler	= proc_dointvec,
399 	},
400 	{
401 		.procname	= "core_pattern",
402 		.data		= core_pattern,
403 		.maxlen		= CORENAME_MAX_SIZE,
404 		.mode		= 0644,
405 		.proc_handler	= proc_dostring,
406 	},
407 	{
408 		.procname	= "core_pipe_limit",
409 		.data		= &core_pipe_limit,
410 		.maxlen		= sizeof(unsigned int),
411 		.mode		= 0644,
412 		.proc_handler	= proc_dointvec,
413 	},
414 #ifdef CONFIG_PROC_SYSCTL
415 	{
416 		.procname	= "tainted",
417 		.maxlen 	= sizeof(long),
418 		.mode		= 0644,
419 		.proc_handler	= proc_taint,
420 	},
421 #endif
422 #ifdef CONFIG_LATENCYTOP
423 	{
424 		.procname	= "latencytop",
425 		.data		= &latencytop_enabled,
426 		.maxlen		= sizeof(int),
427 		.mode		= 0644,
428 		.proc_handler	= proc_dointvec,
429 	},
430 #endif
431 #ifdef CONFIG_BLK_DEV_INITRD
432 	{
433 		.procname	= "real-root-dev",
434 		.data		= &real_root_dev,
435 		.maxlen		= sizeof(int),
436 		.mode		= 0644,
437 		.proc_handler	= proc_dointvec,
438 	},
439 #endif
440 	{
441 		.procname	= "print-fatal-signals",
442 		.data		= &print_fatal_signals,
443 		.maxlen		= sizeof(int),
444 		.mode		= 0644,
445 		.proc_handler	= proc_dointvec,
446 	},
447 #ifdef CONFIG_SPARC
448 	{
449 		.procname	= "reboot-cmd",
450 		.data		= reboot_command,
451 		.maxlen		= 256,
452 		.mode		= 0644,
453 		.proc_handler	= proc_dostring,
454 	},
455 	{
456 		.procname	= "stop-a",
457 		.data		= &stop_a_enabled,
458 		.maxlen		= sizeof (int),
459 		.mode		= 0644,
460 		.proc_handler	= proc_dointvec,
461 	},
462 	{
463 		.procname	= "scons-poweroff",
464 		.data		= &scons_pwroff,
465 		.maxlen		= sizeof (int),
466 		.mode		= 0644,
467 		.proc_handler	= proc_dointvec,
468 	},
469 #endif
470 #ifdef CONFIG_SPARC64
471 	{
472 		.procname	= "tsb-ratio",
473 		.data		= &sysctl_tsb_ratio,
474 		.maxlen		= sizeof (int),
475 		.mode		= 0644,
476 		.proc_handler	= proc_dointvec,
477 	},
478 #endif
479 #ifdef __hppa__
480 	{
481 		.procname	= "soft-power",
482 		.data		= &pwrsw_enabled,
483 		.maxlen		= sizeof (int),
484 	 	.mode		= 0644,
485 		.proc_handler	= proc_dointvec,
486 	},
487 	{
488 		.procname	= "unaligned-trap",
489 		.data		= &unaligned_enabled,
490 		.maxlen		= sizeof (int),
491 		.mode		= 0644,
492 		.proc_handler	= proc_dointvec,
493 	},
494 #endif
495 	{
496 		.procname	= "ctrl-alt-del",
497 		.data		= &C_A_D,
498 		.maxlen		= sizeof(int),
499 		.mode		= 0644,
500 		.proc_handler	= proc_dointvec,
501 	},
502 #ifdef CONFIG_FUNCTION_TRACER
503 	{
504 		.procname	= "ftrace_enabled",
505 		.data		= &ftrace_enabled,
506 		.maxlen		= sizeof(int),
507 		.mode		= 0644,
508 		.proc_handler	= ftrace_enable_sysctl,
509 	},
510 #endif
511 #ifdef CONFIG_STACK_TRACER
512 	{
513 		.procname	= "stack_tracer_enabled",
514 		.data		= &stack_tracer_enabled,
515 		.maxlen		= sizeof(int),
516 		.mode		= 0644,
517 		.proc_handler	= stack_trace_sysctl,
518 	},
519 #endif
520 #ifdef CONFIG_TRACING
521 	{
522 		.procname	= "ftrace_dump_on_oops",
523 		.data		= &ftrace_dump_on_oops,
524 		.maxlen		= sizeof(int),
525 		.mode		= 0644,
526 		.proc_handler	= proc_dointvec,
527 	},
528 #endif
529 #ifdef CONFIG_MODULES
530 	{
531 		.procname	= "modprobe",
532 		.data		= &modprobe_path,
533 		.maxlen		= KMOD_PATH_LEN,
534 		.mode		= 0644,
535 		.proc_handler	= proc_dostring,
536 	},
537 	{
538 		.procname	= "modules_disabled",
539 		.data		= &modules_disabled,
540 		.maxlen		= sizeof(int),
541 		.mode		= 0644,
542 		/* only handle a transition from default "0" to "1" */
543 		.proc_handler	= proc_dointvec_minmax,
544 		.extra1		= &one,
545 		.extra2		= &one,
546 	},
547 #endif
548 #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
549 	{
550 		.procname	= "hotplug",
551 		.data		= &uevent_helper,
552 		.maxlen		= UEVENT_HELPER_PATH_LEN,
553 		.mode		= 0644,
554 		.proc_handler	= proc_dostring,
555 	},
556 #endif
557 #ifdef CONFIG_CHR_DEV_SG
558 	{
559 		.procname	= "sg-big-buff",
560 		.data		= &sg_big_buff,
561 		.maxlen		= sizeof (int),
562 		.mode		= 0444,
563 		.proc_handler	= proc_dointvec,
564 	},
565 #endif
566 #ifdef CONFIG_BSD_PROCESS_ACCT
567 	{
568 		.procname	= "acct",
569 		.data		= &acct_parm,
570 		.maxlen		= 3*sizeof(int),
571 		.mode		= 0644,
572 		.proc_handler	= proc_dointvec,
573 	},
574 #endif
575 #ifdef CONFIG_MAGIC_SYSRQ
576 	{
577 		.procname	= "sysrq",
578 		.data		= &__sysrq_enabled,
579 		.maxlen		= sizeof (int),
580 		.mode		= 0644,
581 		.proc_handler	= proc_dointvec,
582 	},
583 #endif
584 #ifdef CONFIG_PROC_SYSCTL
585 	{
586 		.procname	= "cad_pid",
587 		.data		= NULL,
588 		.maxlen		= sizeof (int),
589 		.mode		= 0600,
590 		.proc_handler	= proc_do_cad_pid,
591 	},
592 #endif
593 	{
594 		.procname	= "threads-max",
595 		.data		= &max_threads,
596 		.maxlen		= sizeof(int),
597 		.mode		= 0644,
598 		.proc_handler	= proc_dointvec,
599 	},
600 	{
601 		.procname	= "random",
602 		.mode		= 0555,
603 		.child		= random_table,
604 	},
605 	{
606 		.procname	= "overflowuid",
607 		.data		= &overflowuid,
608 		.maxlen		= sizeof(int),
609 		.mode		= 0644,
610 		.proc_handler	= proc_dointvec_minmax,
611 		.extra1		= &minolduid,
612 		.extra2		= &maxolduid,
613 	},
614 	{
615 		.procname	= "overflowgid",
616 		.data		= &overflowgid,
617 		.maxlen		= sizeof(int),
618 		.mode		= 0644,
619 		.proc_handler	= proc_dointvec_minmax,
620 		.extra1		= &minolduid,
621 		.extra2		= &maxolduid,
622 	},
623 #ifdef CONFIG_S390
624 #ifdef CONFIG_MATHEMU
625 	{
626 		.procname	= "ieee_emulation_warnings",
627 		.data		= &sysctl_ieee_emulation_warnings,
628 		.maxlen		= sizeof(int),
629 		.mode		= 0644,
630 		.proc_handler	= proc_dointvec,
631 	},
632 #endif
633 	{
634 		.procname	= "userprocess_debug",
635 		.data		= &sysctl_userprocess_debug,
636 		.maxlen		= sizeof(int),
637 		.mode		= 0644,
638 		.proc_handler	= proc_dointvec,
639 	},
640 #endif
641 	{
642 		.procname	= "pid_max",
643 		.data		= &pid_max,
644 		.maxlen		= sizeof (int),
645 		.mode		= 0644,
646 		.proc_handler	= proc_dointvec_minmax,
647 		.extra1		= &pid_max_min,
648 		.extra2		= &pid_max_max,
649 	},
650 	{
651 		.procname	= "panic_on_oops",
652 		.data		= &panic_on_oops,
653 		.maxlen		= sizeof(int),
654 		.mode		= 0644,
655 		.proc_handler	= proc_dointvec,
656 	},
657 #if defined CONFIG_PRINTK
658 	{
659 		.procname	= "printk",
660 		.data		= &console_loglevel,
661 		.maxlen		= 4*sizeof(int),
662 		.mode		= 0644,
663 		.proc_handler	= proc_dointvec,
664 	},
665 	{
666 		.procname	= "printk_ratelimit",
667 		.data		= &printk_ratelimit_state.interval,
668 		.maxlen		= sizeof(int),
669 		.mode		= 0644,
670 		.proc_handler	= proc_dointvec_jiffies,
671 	},
672 	{
673 		.procname	= "printk_ratelimit_burst",
674 		.data		= &printk_ratelimit_state.burst,
675 		.maxlen		= sizeof(int),
676 		.mode		= 0644,
677 		.proc_handler	= proc_dointvec,
678 	},
679 	{
680 		.procname	= "printk_delay",
681 		.data		= &printk_delay_msec,
682 		.maxlen		= sizeof(int),
683 		.mode		= 0644,
684 		.proc_handler	= proc_dointvec_minmax,
685 		.extra1		= &zero,
686 		.extra2		= &ten_thousand,
687 	},
688 #endif
689 	{
690 		.procname	= "ngroups_max",
691 		.data		= &ngroups_max,
692 		.maxlen		= sizeof (int),
693 		.mode		= 0444,
694 		.proc_handler	= proc_dointvec,
695 	},
696 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
697 	{
698 		.procname       = "unknown_nmi_panic",
699 		.data           = &unknown_nmi_panic,
700 		.maxlen         = sizeof (int),
701 		.mode           = 0644,
702 		.proc_handler   = proc_dointvec,
703 	},
704 	{
705 		.procname       = "nmi_watchdog",
706 		.data           = &nmi_watchdog_enabled,
707 		.maxlen         = sizeof (int),
708 		.mode           = 0644,
709 		.proc_handler   = proc_nmi_enabled,
710 	},
711 #endif
712 #if defined(CONFIG_X86)
713 	{
714 		.procname	= "panic_on_unrecovered_nmi",
715 		.data		= &panic_on_unrecovered_nmi,
716 		.maxlen		= sizeof(int),
717 		.mode		= 0644,
718 		.proc_handler	= proc_dointvec,
719 	},
720 	{
721 		.procname	= "panic_on_io_nmi",
722 		.data		= &panic_on_io_nmi,
723 		.maxlen		= sizeof(int),
724 		.mode		= 0644,
725 		.proc_handler	= proc_dointvec,
726 	},
727 	{
728 		.procname	= "bootloader_type",
729 		.data		= &bootloader_type,
730 		.maxlen		= sizeof (int),
731 		.mode		= 0444,
732 		.proc_handler	= proc_dointvec,
733 	},
734 	{
735 		.procname	= "bootloader_version",
736 		.data		= &bootloader_version,
737 		.maxlen		= sizeof (int),
738 		.mode		= 0444,
739 		.proc_handler	= proc_dointvec,
740 	},
741 	{
742 		.procname	= "kstack_depth_to_print",
743 		.data		= &kstack_depth_to_print,
744 		.maxlen		= sizeof(int),
745 		.mode		= 0644,
746 		.proc_handler	= proc_dointvec,
747 	},
748 	{
749 		.procname	= "io_delay_type",
750 		.data		= &io_delay_type,
751 		.maxlen		= sizeof(int),
752 		.mode		= 0644,
753 		.proc_handler	= proc_dointvec,
754 	},
755 #endif
756 #if defined(CONFIG_MMU)
757 	{
758 		.procname	= "randomize_va_space",
759 		.data		= &randomize_va_space,
760 		.maxlen		= sizeof(int),
761 		.mode		= 0644,
762 		.proc_handler	= proc_dointvec,
763 	},
764 #endif
765 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
766 	{
767 		.procname	= "spin_retry",
768 		.data		= &spin_retry,
769 		.maxlen		= sizeof (int),
770 		.mode		= 0644,
771 		.proc_handler	= proc_dointvec,
772 	},
773 #endif
774 #if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
775 	{
776 		.procname	= "acpi_video_flags",
777 		.data		= &acpi_realmode_flags,
778 		.maxlen		= sizeof (unsigned long),
779 		.mode		= 0644,
780 		.proc_handler	= proc_doulongvec_minmax,
781 	},
782 #endif
783 #ifdef CONFIG_IA64
784 	{
785 		.procname	= "ignore-unaligned-usertrap",
786 		.data		= &no_unaligned_warning,
787 		.maxlen		= sizeof (int),
788 	 	.mode		= 0644,
789 		.proc_handler	= proc_dointvec,
790 	},
791 	{
792 		.procname	= "unaligned-dump-stack",
793 		.data		= &unaligned_dump_stack,
794 		.maxlen		= sizeof (int),
795 		.mode		= 0644,
796 		.proc_handler	= proc_dointvec,
797 	},
798 #endif
799 #ifdef CONFIG_DETECT_SOFTLOCKUP
800 	{
801 		.procname	= "softlockup_panic",
802 		.data		= &softlockup_panic,
803 		.maxlen		= sizeof(int),
804 		.mode		= 0644,
805 		.proc_handler	= proc_dointvec_minmax,
806 		.extra1		= &zero,
807 		.extra2		= &one,
808 	},
809 	{
810 		.procname	= "softlockup_thresh",
811 		.data		= &softlockup_thresh,
812 		.maxlen		= sizeof(int),
813 		.mode		= 0644,
814 		.proc_handler	= proc_dosoftlockup_thresh,
815 		.extra1		= &neg_one,
816 		.extra2		= &sixty,
817 	},
818 #endif
819 #ifdef CONFIG_DETECT_HUNG_TASK
820 	{
821 		.procname	= "hung_task_panic",
822 		.data		= &sysctl_hung_task_panic,
823 		.maxlen		= sizeof(int),
824 		.mode		= 0644,
825 		.proc_handler	= proc_dointvec_minmax,
826 		.extra1		= &zero,
827 		.extra2		= &one,
828 	},
829 	{
830 		.procname	= "hung_task_check_count",
831 		.data		= &sysctl_hung_task_check_count,
832 		.maxlen		= sizeof(unsigned long),
833 		.mode		= 0644,
834 		.proc_handler	= proc_doulongvec_minmax,
835 	},
836 	{
837 		.procname	= "hung_task_timeout_secs",
838 		.data		= &sysctl_hung_task_timeout_secs,
839 		.maxlen		= sizeof(unsigned long),
840 		.mode		= 0644,
841 		.proc_handler	= proc_dohung_task_timeout_secs,
842 	},
843 	{
844 		.procname	= "hung_task_warnings",
845 		.data		= &sysctl_hung_task_warnings,
846 		.maxlen		= sizeof(unsigned long),
847 		.mode		= 0644,
848 		.proc_handler	= proc_doulongvec_minmax,
849 	},
850 #endif
851 #ifdef CONFIG_COMPAT
852 	{
853 		.procname	= "compat-log",
854 		.data		= &compat_log,
855 		.maxlen		= sizeof (int),
856 	 	.mode		= 0644,
857 		.proc_handler	= proc_dointvec,
858 	},
859 #endif
860 #ifdef CONFIG_RT_MUTEXES
861 	{
862 		.procname	= "max_lock_depth",
863 		.data		= &max_lock_depth,
864 		.maxlen		= sizeof(int),
865 		.mode		= 0644,
866 		.proc_handler	= proc_dointvec,
867 	},
868 #endif
869 	{
870 		.procname	= "poweroff_cmd",
871 		.data		= &poweroff_cmd,
872 		.maxlen		= POWEROFF_CMD_PATH_LEN,
873 		.mode		= 0644,
874 		.proc_handler	= proc_dostring,
875 	},
876 #ifdef CONFIG_KEYS
877 	{
878 		.procname	= "keys",
879 		.mode		= 0555,
880 		.child		= key_sysctls,
881 	},
882 #endif
883 #ifdef CONFIG_RCU_TORTURE_TEST
884 	{
885 		.procname       = "rcutorture_runnable",
886 		.data           = &rcutorture_runnable,
887 		.maxlen         = sizeof(int),
888 		.mode           = 0644,
889 		.proc_handler	= proc_dointvec,
890 	},
891 #endif
892 #ifdef CONFIG_SLOW_WORK
893 	{
894 		.procname	= "slow-work",
895 		.mode		= 0555,
896 		.child		= slow_work_sysctls,
897 	},
898 #endif
899 #ifdef CONFIG_PERF_EVENTS
900 	{
901 		.procname	= "perf_event_paranoid",
902 		.data		= &sysctl_perf_event_paranoid,
903 		.maxlen		= sizeof(sysctl_perf_event_paranoid),
904 		.mode		= 0644,
905 		.proc_handler	= proc_dointvec,
906 	},
907 	{
908 		.procname	= "perf_event_mlock_kb",
909 		.data		= &sysctl_perf_event_mlock,
910 		.maxlen		= sizeof(sysctl_perf_event_mlock),
911 		.mode		= 0644,
912 		.proc_handler	= proc_dointvec,
913 	},
914 	{
915 		.procname	= "perf_event_max_sample_rate",
916 		.data		= &sysctl_perf_event_sample_rate,
917 		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
918 		.mode		= 0644,
919 		.proc_handler	= proc_dointvec,
920 	},
921 #endif
922 #ifdef CONFIG_KMEMCHECK
923 	{
924 		.procname	= "kmemcheck",
925 		.data		= &kmemcheck_enabled,
926 		.maxlen		= sizeof(int),
927 		.mode		= 0644,
928 		.proc_handler	= proc_dointvec,
929 	},
930 #endif
931 #ifdef CONFIG_BLOCK
932 	{
933 		.procname	= "blk_iopoll",
934 		.data		= &blk_iopoll_enabled,
935 		.maxlen		= sizeof(int),
936 		.mode		= 0644,
937 		.proc_handler	= proc_dointvec,
938 	},
939 #endif
940 /*
941  * NOTE: do not add new entries to this table unless you have read
942  * Documentation/sysctl/ctl_unnumbered.txt
943  */
944 	{ }
945 };
946 
947 static struct ctl_table vm_table[] = {
948 	{
949 		.procname	= "overcommit_memory",
950 		.data		= &sysctl_overcommit_memory,
951 		.maxlen		= sizeof(sysctl_overcommit_memory),
952 		.mode		= 0644,
953 		.proc_handler	= proc_dointvec,
954 	},
955 	{
956 		.procname	= "panic_on_oom",
957 		.data		= &sysctl_panic_on_oom,
958 		.maxlen		= sizeof(sysctl_panic_on_oom),
959 		.mode		= 0644,
960 		.proc_handler	= proc_dointvec,
961 	},
962 	{
963 		.procname	= "oom_kill_allocating_task",
964 		.data		= &sysctl_oom_kill_allocating_task,
965 		.maxlen		= sizeof(sysctl_oom_kill_allocating_task),
966 		.mode		= 0644,
967 		.proc_handler	= proc_dointvec,
968 	},
969 	{
970 		.procname	= "oom_dump_tasks",
971 		.data		= &sysctl_oom_dump_tasks,
972 		.maxlen		= sizeof(sysctl_oom_dump_tasks),
973 		.mode		= 0644,
974 		.proc_handler	= proc_dointvec,
975 	},
976 	{
977 		.procname	= "overcommit_ratio",
978 		.data		= &sysctl_overcommit_ratio,
979 		.maxlen		= sizeof(sysctl_overcommit_ratio),
980 		.mode		= 0644,
981 		.proc_handler	= proc_dointvec,
982 	},
983 	{
984 		.procname	= "page-cluster",
985 		.data		= &page_cluster,
986 		.maxlen		= sizeof(int),
987 		.mode		= 0644,
988 		.proc_handler	= proc_dointvec,
989 	},
990 	{
991 		.procname	= "dirty_background_ratio",
992 		.data		= &dirty_background_ratio,
993 		.maxlen		= sizeof(dirty_background_ratio),
994 		.mode		= 0644,
995 		.proc_handler	= dirty_background_ratio_handler,
996 		.extra1		= &zero,
997 		.extra2		= &one_hundred,
998 	},
999 	{
1000 		.procname	= "dirty_background_bytes",
1001 		.data		= &dirty_background_bytes,
1002 		.maxlen		= sizeof(dirty_background_bytes),
1003 		.mode		= 0644,
1004 		.proc_handler	= dirty_background_bytes_handler,
1005 		.extra1		= &one_ul,
1006 	},
1007 	{
1008 		.procname	= "dirty_ratio",
1009 		.data		= &vm_dirty_ratio,
1010 		.maxlen		= sizeof(vm_dirty_ratio),
1011 		.mode		= 0644,
1012 		.proc_handler	= dirty_ratio_handler,
1013 		.extra1		= &zero,
1014 		.extra2		= &one_hundred,
1015 	},
1016 	{
1017 		.procname	= "dirty_bytes",
1018 		.data		= &vm_dirty_bytes,
1019 		.maxlen		= sizeof(vm_dirty_bytes),
1020 		.mode		= 0644,
1021 		.proc_handler	= dirty_bytes_handler,
1022 		.extra1		= &dirty_bytes_min,
1023 	},
1024 	{
1025 		.procname	= "dirty_writeback_centisecs",
1026 		.data		= &dirty_writeback_interval,
1027 		.maxlen		= sizeof(dirty_writeback_interval),
1028 		.mode		= 0644,
1029 		.proc_handler	= dirty_writeback_centisecs_handler,
1030 	},
1031 	{
1032 		.procname	= "dirty_expire_centisecs",
1033 		.data		= &dirty_expire_interval,
1034 		.maxlen		= sizeof(dirty_expire_interval),
1035 		.mode		= 0644,
1036 		.proc_handler	= proc_dointvec,
1037 	},
1038 	{
1039 		.procname	= "nr_pdflush_threads",
1040 		.data		= &nr_pdflush_threads,
1041 		.maxlen		= sizeof nr_pdflush_threads,
1042 		.mode		= 0444 /* read-only*/,
1043 		.proc_handler	= proc_dointvec,
1044 	},
1045 	{
1046 		.procname	= "swappiness",
1047 		.data		= &vm_swappiness,
1048 		.maxlen		= sizeof(vm_swappiness),
1049 		.mode		= 0644,
1050 		.proc_handler	= proc_dointvec_minmax,
1051 		.extra1		= &zero,
1052 		.extra2		= &one_hundred,
1053 	},
1054 #ifdef CONFIG_HUGETLB_PAGE
1055 	{
1056 		.procname	= "nr_hugepages",
1057 		.data		= NULL,
1058 		.maxlen		= sizeof(unsigned long),
1059 		.mode		= 0644,
1060 		.proc_handler	= hugetlb_sysctl_handler,
1061 		.extra1		= (void *)&hugetlb_zero,
1062 		.extra2		= (void *)&hugetlb_infinity,
1063 	},
1064 #ifdef CONFIG_NUMA
1065 	{
1066 		.procname       = "nr_hugepages_mempolicy",
1067 		.data           = NULL,
1068 		.maxlen         = sizeof(unsigned long),
1069 		.mode           = 0644,
1070 		.proc_handler   = &hugetlb_mempolicy_sysctl_handler,
1071 		.extra1		= (void *)&hugetlb_zero,
1072 		.extra2		= (void *)&hugetlb_infinity,
1073 	},
1074 #endif
1075 	 {
1076 		.procname	= "hugetlb_shm_group",
1077 		.data		= &sysctl_hugetlb_shm_group,
1078 		.maxlen		= sizeof(gid_t),
1079 		.mode		= 0644,
1080 		.proc_handler	= proc_dointvec,
1081 	 },
1082 	 {
1083 		.procname	= "hugepages_treat_as_movable",
1084 		.data		= &hugepages_treat_as_movable,
1085 		.maxlen		= sizeof(int),
1086 		.mode		= 0644,
1087 		.proc_handler	= hugetlb_treat_movable_handler,
1088 	},
1089 	{
1090 		.procname	= "nr_overcommit_hugepages",
1091 		.data		= NULL,
1092 		.maxlen		= sizeof(unsigned long),
1093 		.mode		= 0644,
1094 		.proc_handler	= hugetlb_overcommit_handler,
1095 		.extra1		= (void *)&hugetlb_zero,
1096 		.extra2		= (void *)&hugetlb_infinity,
1097 	},
1098 #endif
1099 	{
1100 		.procname	= "lowmem_reserve_ratio",
1101 		.data		= &sysctl_lowmem_reserve_ratio,
1102 		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
1103 		.mode		= 0644,
1104 		.proc_handler	= lowmem_reserve_ratio_sysctl_handler,
1105 	},
1106 	{
1107 		.procname	= "drop_caches",
1108 		.data		= &sysctl_drop_caches,
1109 		.maxlen		= sizeof(int),
1110 		.mode		= 0644,
1111 		.proc_handler	= drop_caches_sysctl_handler,
1112 	},
1113 	{
1114 		.procname	= "min_free_kbytes",
1115 		.data		= &min_free_kbytes,
1116 		.maxlen		= sizeof(min_free_kbytes),
1117 		.mode		= 0644,
1118 		.proc_handler	= min_free_kbytes_sysctl_handler,
1119 		.extra1		= &zero,
1120 	},
1121 	{
1122 		.procname	= "percpu_pagelist_fraction",
1123 		.data		= &percpu_pagelist_fraction,
1124 		.maxlen		= sizeof(percpu_pagelist_fraction),
1125 		.mode		= 0644,
1126 		.proc_handler	= percpu_pagelist_fraction_sysctl_handler,
1127 		.extra1		= &min_percpu_pagelist_fract,
1128 	},
1129 #ifdef CONFIG_MMU
1130 	{
1131 		.procname	= "max_map_count",
1132 		.data		= &sysctl_max_map_count,
1133 		.maxlen		= sizeof(sysctl_max_map_count),
1134 		.mode		= 0644,
1135 		.proc_handler	= proc_dointvec_minmax,
1136 		.extra1		= &zero,
1137 	},
1138 #else
1139 	{
1140 		.procname	= "nr_trim_pages",
1141 		.data		= &sysctl_nr_trim_pages,
1142 		.maxlen		= sizeof(sysctl_nr_trim_pages),
1143 		.mode		= 0644,
1144 		.proc_handler	= proc_dointvec_minmax,
1145 		.extra1		= &zero,
1146 	},
1147 #endif
1148 	{
1149 		.procname	= "laptop_mode",
1150 		.data		= &laptop_mode,
1151 		.maxlen		= sizeof(laptop_mode),
1152 		.mode		= 0644,
1153 		.proc_handler	= proc_dointvec_jiffies,
1154 	},
1155 	{
1156 		.procname	= "block_dump",
1157 		.data		= &block_dump,
1158 		.maxlen		= sizeof(block_dump),
1159 		.mode		= 0644,
1160 		.proc_handler	= proc_dointvec,
1161 		.extra1		= &zero,
1162 	},
1163 	{
1164 		.procname	= "vfs_cache_pressure",
1165 		.data		= &sysctl_vfs_cache_pressure,
1166 		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
1167 		.mode		= 0644,
1168 		.proc_handler	= proc_dointvec,
1169 		.extra1		= &zero,
1170 	},
1171 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1172 	{
1173 		.procname	= "legacy_va_layout",
1174 		.data		= &sysctl_legacy_va_layout,
1175 		.maxlen		= sizeof(sysctl_legacy_va_layout),
1176 		.mode		= 0644,
1177 		.proc_handler	= proc_dointvec,
1178 		.extra1		= &zero,
1179 	},
1180 #endif
1181 #ifdef CONFIG_NUMA
1182 	{
1183 		.procname	= "zone_reclaim_mode",
1184 		.data		= &zone_reclaim_mode,
1185 		.maxlen		= sizeof(zone_reclaim_mode),
1186 		.mode		= 0644,
1187 		.proc_handler	= proc_dointvec,
1188 		.extra1		= &zero,
1189 	},
1190 	{
1191 		.procname	= "min_unmapped_ratio",
1192 		.data		= &sysctl_min_unmapped_ratio,
1193 		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
1194 		.mode		= 0644,
1195 		.proc_handler	= sysctl_min_unmapped_ratio_sysctl_handler,
1196 		.extra1		= &zero,
1197 		.extra2		= &one_hundred,
1198 	},
1199 	{
1200 		.procname	= "min_slab_ratio",
1201 		.data		= &sysctl_min_slab_ratio,
1202 		.maxlen		= sizeof(sysctl_min_slab_ratio),
1203 		.mode		= 0644,
1204 		.proc_handler	= sysctl_min_slab_ratio_sysctl_handler,
1205 		.extra1		= &zero,
1206 		.extra2		= &one_hundred,
1207 	},
1208 #endif
1209 #ifdef CONFIG_SMP
1210 	{
1211 		.procname	= "stat_interval",
1212 		.data		= &sysctl_stat_interval,
1213 		.maxlen		= sizeof(sysctl_stat_interval),
1214 		.mode		= 0644,
1215 		.proc_handler	= proc_dointvec_jiffies,
1216 	},
1217 #endif
1218 #ifdef CONFIG_MMU
1219 	{
1220 		.procname	= "mmap_min_addr",
1221 		.data		= &dac_mmap_min_addr,
1222 		.maxlen		= sizeof(unsigned long),
1223 		.mode		= 0644,
1224 		.proc_handler	= mmap_min_addr_handler,
1225 	},
1226 #endif
1227 #ifdef CONFIG_NUMA
1228 	{
1229 		.procname	= "numa_zonelist_order",
1230 		.data		= &numa_zonelist_order,
1231 		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
1232 		.mode		= 0644,
1233 		.proc_handler	= numa_zonelist_order_handler,
1234 	},
1235 #endif
1236 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1237    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1238 	{
1239 		.procname	= "vdso_enabled",
1240 		.data		= &vdso_enabled,
1241 		.maxlen		= sizeof(vdso_enabled),
1242 		.mode		= 0644,
1243 		.proc_handler	= proc_dointvec,
1244 		.extra1		= &zero,
1245 	},
1246 #endif
1247 #ifdef CONFIG_HIGHMEM
1248 	{
1249 		.procname	= "highmem_is_dirtyable",
1250 		.data		= &vm_highmem_is_dirtyable,
1251 		.maxlen		= sizeof(vm_highmem_is_dirtyable),
1252 		.mode		= 0644,
1253 		.proc_handler	= proc_dointvec_minmax,
1254 		.extra1		= &zero,
1255 		.extra2		= &one,
1256 	},
1257 #endif
1258 	{
1259 		.procname	= "scan_unevictable_pages",
1260 		.data		= &scan_unevictable_pages,
1261 		.maxlen		= sizeof(scan_unevictable_pages),
1262 		.mode		= 0644,
1263 		.proc_handler	= scan_unevictable_handler,
1264 	},
1265 #ifdef CONFIG_MEMORY_FAILURE
1266 	{
1267 		.procname	= "memory_failure_early_kill",
1268 		.data		= &sysctl_memory_failure_early_kill,
1269 		.maxlen		= sizeof(sysctl_memory_failure_early_kill),
1270 		.mode		= 0644,
1271 		.proc_handler	= proc_dointvec_minmax,
1272 		.extra1		= &zero,
1273 		.extra2		= &one,
1274 	},
1275 	{
1276 		.procname	= "memory_failure_recovery",
1277 		.data		= &sysctl_memory_failure_recovery,
1278 		.maxlen		= sizeof(sysctl_memory_failure_recovery),
1279 		.mode		= 0644,
1280 		.proc_handler	= proc_dointvec_minmax,
1281 		.extra1		= &zero,
1282 		.extra2		= &one,
1283 	},
1284 #endif
1285 
1286 /*
1287  * NOTE: do not add new entries to this table unless you have read
1288  * Documentation/sysctl/ctl_unnumbered.txt
1289  */
1290 	{ }
1291 };
1292 
1293 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1294 static struct ctl_table binfmt_misc_table[] = {
1295 	{ }
1296 };
1297 #endif
1298 
1299 static struct ctl_table fs_table[] = {
1300 	{
1301 		.procname	= "inode-nr",
1302 		.data		= &inodes_stat,
1303 		.maxlen		= 2*sizeof(int),
1304 		.mode		= 0444,
1305 		.proc_handler	= proc_dointvec,
1306 	},
1307 	{
1308 		.procname	= "inode-state",
1309 		.data		= &inodes_stat,
1310 		.maxlen		= 7*sizeof(int),
1311 		.mode		= 0444,
1312 		.proc_handler	= proc_dointvec,
1313 	},
1314 	{
1315 		.procname	= "file-nr",
1316 		.data		= &files_stat,
1317 		.maxlen		= 3*sizeof(int),
1318 		.mode		= 0444,
1319 		.proc_handler	= proc_nr_files,
1320 	},
1321 	{
1322 		.procname	= "file-max",
1323 		.data		= &files_stat.max_files,
1324 		.maxlen		= sizeof(int),
1325 		.mode		= 0644,
1326 		.proc_handler	= proc_dointvec,
1327 	},
1328 	{
1329 		.procname	= "nr_open",
1330 		.data		= &sysctl_nr_open,
1331 		.maxlen		= sizeof(int),
1332 		.mode		= 0644,
1333 		.proc_handler	= proc_dointvec_minmax,
1334 		.extra1		= &sysctl_nr_open_min,
1335 		.extra2		= &sysctl_nr_open_max,
1336 	},
1337 	{
1338 		.procname	= "dentry-state",
1339 		.data		= &dentry_stat,
1340 		.maxlen		= 6*sizeof(int),
1341 		.mode		= 0444,
1342 		.proc_handler	= proc_dointvec,
1343 	},
1344 	{
1345 		.procname	= "overflowuid",
1346 		.data		= &fs_overflowuid,
1347 		.maxlen		= sizeof(int),
1348 		.mode		= 0644,
1349 		.proc_handler	= proc_dointvec_minmax,
1350 		.extra1		= &minolduid,
1351 		.extra2		= &maxolduid,
1352 	},
1353 	{
1354 		.procname	= "overflowgid",
1355 		.data		= &fs_overflowgid,
1356 		.maxlen		= sizeof(int),
1357 		.mode		= 0644,
1358 		.proc_handler	= proc_dointvec_minmax,
1359 		.extra1		= &minolduid,
1360 		.extra2		= &maxolduid,
1361 	},
1362 #ifdef CONFIG_FILE_LOCKING
1363 	{
1364 		.procname	= "leases-enable",
1365 		.data		= &leases_enable,
1366 		.maxlen		= sizeof(int),
1367 		.mode		= 0644,
1368 		.proc_handler	= proc_dointvec,
1369 	},
1370 #endif
1371 #ifdef CONFIG_DNOTIFY
1372 	{
1373 		.procname	= "dir-notify-enable",
1374 		.data		= &dir_notify_enable,
1375 		.maxlen		= sizeof(int),
1376 		.mode		= 0644,
1377 		.proc_handler	= proc_dointvec,
1378 	},
1379 #endif
1380 #ifdef CONFIG_MMU
1381 #ifdef CONFIG_FILE_LOCKING
1382 	{
1383 		.procname	= "lease-break-time",
1384 		.data		= &lease_break_time,
1385 		.maxlen		= sizeof(int),
1386 		.mode		= 0644,
1387 		.proc_handler	= proc_dointvec,
1388 	},
1389 #endif
1390 #ifdef CONFIG_AIO
1391 	{
1392 		.procname	= "aio-nr",
1393 		.data		= &aio_nr,
1394 		.maxlen		= sizeof(aio_nr),
1395 		.mode		= 0444,
1396 		.proc_handler	= proc_doulongvec_minmax,
1397 	},
1398 	{
1399 		.procname	= "aio-max-nr",
1400 		.data		= &aio_max_nr,
1401 		.maxlen		= sizeof(aio_max_nr),
1402 		.mode		= 0644,
1403 		.proc_handler	= proc_doulongvec_minmax,
1404 	},
1405 #endif /* CONFIG_AIO */
1406 #ifdef CONFIG_INOTIFY_USER
1407 	{
1408 		.procname	= "inotify",
1409 		.mode		= 0555,
1410 		.child		= inotify_table,
1411 	},
1412 #endif
1413 #ifdef CONFIG_EPOLL
1414 	{
1415 		.procname	= "epoll",
1416 		.mode		= 0555,
1417 		.child		= epoll_table,
1418 	},
1419 #endif
1420 #endif
1421 	{
1422 		.procname	= "suid_dumpable",
1423 		.data		= &suid_dumpable,
1424 		.maxlen		= sizeof(int),
1425 		.mode		= 0644,
1426 		.proc_handler	= proc_dointvec_minmax,
1427 		.extra1		= &zero,
1428 		.extra2		= &two,
1429 	},
1430 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1431 	{
1432 		.procname	= "binfmt_misc",
1433 		.mode		= 0555,
1434 		.child		= binfmt_misc_table,
1435 	},
1436 #endif
1437 /*
1438  * NOTE: do not add new entries to this table unless you have read
1439  * Documentation/sysctl/ctl_unnumbered.txt
1440  */
1441 	{ }
1442 };
1443 
1444 static struct ctl_table debug_table[] = {
1445 #if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC)
1446 	{
1447 		.procname	= "exception-trace",
1448 		.data		= &show_unhandled_signals,
1449 		.maxlen		= sizeof(int),
1450 		.mode		= 0644,
1451 		.proc_handler	= proc_dointvec
1452 	},
1453 #endif
1454 #if defined(CONFIG_OPTPROBES)
1455 	{
1456 		.procname	= "kprobes-optimization",
1457 		.data		= &sysctl_kprobes_optimization,
1458 		.maxlen		= sizeof(int),
1459 		.mode		= 0644,
1460 		.proc_handler	= proc_kprobes_optimization_handler,
1461 		.extra1		= &zero,
1462 		.extra2		= &one,
1463 	},
1464 #endif
1465 	{ }
1466 };
1467 
1468 static struct ctl_table dev_table[] = {
1469 	{ }
1470 };
1471 
1472 static DEFINE_SPINLOCK(sysctl_lock);
1473 
1474 /* called under sysctl_lock */
1475 static int use_table(struct ctl_table_header *p)
1476 {
1477 	if (unlikely(p->unregistering))
1478 		return 0;
1479 	p->used++;
1480 	return 1;
1481 }
1482 
1483 /* called under sysctl_lock */
1484 static void unuse_table(struct ctl_table_header *p)
1485 {
1486 	if (!--p->used)
1487 		if (unlikely(p->unregistering))
1488 			complete(p->unregistering);
1489 }
1490 
1491 /* called under sysctl_lock, will reacquire if has to wait */
1492 static void start_unregistering(struct ctl_table_header *p)
1493 {
1494 	/*
1495 	 * if p->used is 0, nobody will ever touch that entry again;
1496 	 * we'll eliminate all paths to it before dropping sysctl_lock
1497 	 */
1498 	if (unlikely(p->used)) {
1499 		struct completion wait;
1500 		init_completion(&wait);
1501 		p->unregistering = &wait;
1502 		spin_unlock(&sysctl_lock);
1503 		wait_for_completion(&wait);
1504 		spin_lock(&sysctl_lock);
1505 	} else {
1506 		/* anything non-NULL; we'll never dereference it */
1507 		p->unregistering = ERR_PTR(-EINVAL);
1508 	}
1509 	/*
1510 	 * do not remove from the list until nobody holds it; walking the
1511 	 * list in do_sysctl() relies on that.
1512 	 */
1513 	list_del_init(&p->ctl_entry);
1514 }
1515 
1516 void sysctl_head_get(struct ctl_table_header *head)
1517 {
1518 	spin_lock(&sysctl_lock);
1519 	head->count++;
1520 	spin_unlock(&sysctl_lock);
1521 }
1522 
1523 void sysctl_head_put(struct ctl_table_header *head)
1524 {
1525 	spin_lock(&sysctl_lock);
1526 	if (!--head->count)
1527 		kfree(head);
1528 	spin_unlock(&sysctl_lock);
1529 }
1530 
1531 struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
1532 {
1533 	if (!head)
1534 		BUG();
1535 	spin_lock(&sysctl_lock);
1536 	if (!use_table(head))
1537 		head = ERR_PTR(-ENOENT);
1538 	spin_unlock(&sysctl_lock);
1539 	return head;
1540 }
1541 
1542 void sysctl_head_finish(struct ctl_table_header *head)
1543 {
1544 	if (!head)
1545 		return;
1546 	spin_lock(&sysctl_lock);
1547 	unuse_table(head);
1548 	spin_unlock(&sysctl_lock);
1549 }
1550 
1551 static struct ctl_table_set *
1552 lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
1553 {
1554 	struct ctl_table_set *set = &root->default_set;
1555 	if (root->lookup)
1556 		set = root->lookup(root, namespaces);
1557 	return set;
1558 }
1559 
1560 static struct list_head *
1561 lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces)
1562 {
1563 	struct ctl_table_set *set = lookup_header_set(root, namespaces);
1564 	return &set->list;
1565 }
1566 
1567 struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces,
1568 					    struct ctl_table_header *prev)
1569 {
1570 	struct ctl_table_root *root;
1571 	struct list_head *header_list;
1572 	struct ctl_table_header *head;
1573 	struct list_head *tmp;
1574 
1575 	spin_lock(&sysctl_lock);
1576 	if (prev) {
1577 		head = prev;
1578 		tmp = &prev->ctl_entry;
1579 		unuse_table(prev);
1580 		goto next;
1581 	}
1582 	tmp = &root_table_header.ctl_entry;
1583 	for (;;) {
1584 		head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1585 
1586 		if (!use_table(head))
1587 			goto next;
1588 		spin_unlock(&sysctl_lock);
1589 		return head;
1590 	next:
1591 		root = head->root;
1592 		tmp = tmp->next;
1593 		header_list = lookup_header_list(root, namespaces);
1594 		if (tmp != header_list)
1595 			continue;
1596 
1597 		do {
1598 			root = list_entry(root->root_list.next,
1599 					struct ctl_table_root, root_list);
1600 			if (root == &sysctl_table_root)
1601 				goto out;
1602 			header_list = lookup_header_list(root, namespaces);
1603 		} while (list_empty(header_list));
1604 		tmp = header_list->next;
1605 	}
1606 out:
1607 	spin_unlock(&sysctl_lock);
1608 	return NULL;
1609 }
1610 
1611 struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1612 {
1613 	return __sysctl_head_next(current->nsproxy, prev);
1614 }
1615 
1616 void register_sysctl_root(struct ctl_table_root *root)
1617 {
1618 	spin_lock(&sysctl_lock);
1619 	list_add_tail(&root->root_list, &sysctl_table_root.root_list);
1620 	spin_unlock(&sysctl_lock);
1621 }
1622 
1623 /*
1624  * sysctl_perm does NOT grant the superuser all rights automatically, because
1625  * some sysctl variables are readonly even to root.
1626  */
1627 
1628 static int test_perm(int mode, int op)
1629 {
1630 	if (!current_euid())
1631 		mode >>= 6;
1632 	else if (in_egroup_p(0))
1633 		mode >>= 3;
1634 	if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
1635 		return 0;
1636 	return -EACCES;
1637 }
1638 
1639 int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
1640 {
1641 	int error;
1642 	int mode;
1643 
1644 	error = security_sysctl(table, op & (MAY_READ | MAY_WRITE | MAY_EXEC));
1645 	if (error)
1646 		return error;
1647 
1648 	if (root->permissions)
1649 		mode = root->permissions(root, current->nsproxy, table);
1650 	else
1651 		mode = table->mode;
1652 
1653 	return test_perm(mode, op);
1654 }
1655 
1656 static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1657 {
1658 	for (; table->procname; table++) {
1659 		table->parent = parent;
1660 		if (table->child)
1661 			sysctl_set_parent(table, table->child);
1662 	}
1663 }
1664 
1665 static __init int sysctl_init(void)
1666 {
1667 	sysctl_set_parent(NULL, root_table);
1668 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1669 	{
1670 		int err;
1671 		err = sysctl_check_table(current->nsproxy, root_table);
1672 	}
1673 #endif
1674 	return 0;
1675 }
1676 
1677 core_initcall(sysctl_init);
1678 
1679 static struct ctl_table *is_branch_in(struct ctl_table *branch,
1680 				      struct ctl_table *table)
1681 {
1682 	struct ctl_table *p;
1683 	const char *s = branch->procname;
1684 
1685 	/* branch should have named subdirectory as its first element */
1686 	if (!s || !branch->child)
1687 		return NULL;
1688 
1689 	/* ... and nothing else */
1690 	if (branch[1].procname)
1691 		return NULL;
1692 
1693 	/* table should contain subdirectory with the same name */
1694 	for (p = table; p->procname; p++) {
1695 		if (!p->child)
1696 			continue;
1697 		if (p->procname && strcmp(p->procname, s) == 0)
1698 			return p;
1699 	}
1700 	return NULL;
1701 }
1702 
1703 /* see if attaching q to p would be an improvement */
1704 static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
1705 {
1706 	struct ctl_table *to = p->ctl_table, *by = q->ctl_table;
1707 	struct ctl_table *next;
1708 	int is_better = 0;
1709 	int not_in_parent = !p->attached_by;
1710 
1711 	while ((next = is_branch_in(by, to)) != NULL) {
1712 		if (by == q->attached_by)
1713 			is_better = 1;
1714 		if (to == p->attached_by)
1715 			not_in_parent = 1;
1716 		by = by->child;
1717 		to = next->child;
1718 	}
1719 
1720 	if (is_better && not_in_parent) {
1721 		q->attached_by = by;
1722 		q->attached_to = to;
1723 		q->parent = p;
1724 	}
1725 }
1726 
1727 /**
1728  * __register_sysctl_paths - register a sysctl hierarchy
1729  * @root: List of sysctl headers to register on
1730  * @namespaces: Data to compute which lists of sysctl entries are visible
1731  * @path: The path to the directory the sysctl table is in.
1732  * @table: the top-level table structure
1733  *
1734  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1735  * array. A completely 0 filled entry terminates the table.
1736  *
1737  * The members of the &struct ctl_table structure are used as follows:
1738  *
1739  * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1740  *            enter a sysctl file
1741  *
1742  * data - a pointer to data for use by proc_handler
1743  *
1744  * maxlen - the maximum size in bytes of the data
1745  *
1746  * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1747  *
1748  * child - a pointer to the child sysctl table if this entry is a directory, or
1749  *         %NULL.
1750  *
1751  * proc_handler - the text handler routine (described below)
1752  *
1753  * de - for internal use by the sysctl routines
1754  *
1755  * extra1, extra2 - extra pointers usable by the proc handler routines
1756  *
1757  * Leaf nodes in the sysctl tree will be represented by a single file
1758  * under /proc; non-leaf nodes will be represented by directories.
1759  *
1760  * sysctl(2) can automatically manage read and write requests through
1761  * the sysctl table.  The data and maxlen fields of the ctl_table
1762  * struct enable minimal validation of the values being written to be
1763  * performed, and the mode field allows minimal authentication.
1764  *
1765  * There must be a proc_handler routine for any terminal nodes
1766  * mirrored under /proc/sys (non-terminals are handled by a built-in
1767  * directory handler).  Several default handlers are available to
1768  * cover common cases -
1769  *
1770  * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1771  * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1772  * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1773  *
1774  * It is the handler's job to read the input buffer from user memory
1775  * and process it. The handler should return 0 on success.
1776  *
1777  * This routine returns %NULL on a failure to register, and a pointer
1778  * to the table header on success.
1779  */
1780 struct ctl_table_header *__register_sysctl_paths(
1781 	struct ctl_table_root *root,
1782 	struct nsproxy *namespaces,
1783 	const struct ctl_path *path, struct ctl_table *table)
1784 {
1785 	struct ctl_table_header *header;
1786 	struct ctl_table *new, **prevp;
1787 	unsigned int n, npath;
1788 	struct ctl_table_set *set;
1789 
1790 	/* Count the path components */
1791 	for (npath = 0; path[npath].procname; ++npath)
1792 		;
1793 
1794 	/*
1795 	 * For each path component, allocate a 2-element ctl_table array.
1796 	 * The first array element will be filled with the sysctl entry
1797 	 * for this, the second will be the sentinel (procname == 0).
1798 	 *
1799 	 * We allocate everything in one go so that we don't have to
1800 	 * worry about freeing additional memory in unregister_sysctl_table.
1801 	 */
1802 	header = kzalloc(sizeof(struct ctl_table_header) +
1803 			 (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL);
1804 	if (!header)
1805 		return NULL;
1806 
1807 	new = (struct ctl_table *) (header + 1);
1808 
1809 	/* Now connect the dots */
1810 	prevp = &header->ctl_table;
1811 	for (n = 0; n < npath; ++n, ++path) {
1812 		/* Copy the procname */
1813 		new->procname = path->procname;
1814 		new->mode     = 0555;
1815 
1816 		*prevp = new;
1817 		prevp = &new->child;
1818 
1819 		new += 2;
1820 	}
1821 	*prevp = table;
1822 	header->ctl_table_arg = table;
1823 
1824 	INIT_LIST_HEAD(&header->ctl_entry);
1825 	header->used = 0;
1826 	header->unregistering = NULL;
1827 	header->root = root;
1828 	sysctl_set_parent(NULL, header->ctl_table);
1829 	header->count = 1;
1830 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1831 	if (sysctl_check_table(namespaces, header->ctl_table)) {
1832 		kfree(header);
1833 		return NULL;
1834 	}
1835 #endif
1836 	spin_lock(&sysctl_lock);
1837 	header->set = lookup_header_set(root, namespaces);
1838 	header->attached_by = header->ctl_table;
1839 	header->attached_to = root_table;
1840 	header->parent = &root_table_header;
1841 	for (set = header->set; set; set = set->parent) {
1842 		struct ctl_table_header *p;
1843 		list_for_each_entry(p, &set->list, ctl_entry) {
1844 			if (p->unregistering)
1845 				continue;
1846 			try_attach(p, header);
1847 		}
1848 	}
1849 	header->parent->count++;
1850 	list_add_tail(&header->ctl_entry, &header->set->list);
1851 	spin_unlock(&sysctl_lock);
1852 
1853 	return header;
1854 }
1855 
1856 /**
1857  * register_sysctl_table_path - register a sysctl table hierarchy
1858  * @path: The path to the directory the sysctl table is in.
1859  * @table: the top-level table structure
1860  *
1861  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1862  * array. A completely 0 filled entry terminates the table.
1863  *
1864  * See __register_sysctl_paths for more details.
1865  */
1866 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
1867 						struct ctl_table *table)
1868 {
1869 	return __register_sysctl_paths(&sysctl_table_root, current->nsproxy,
1870 					path, table);
1871 }
1872 
1873 /**
1874  * register_sysctl_table - register a sysctl table hierarchy
1875  * @table: the top-level table structure
1876  *
1877  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1878  * array. A completely 0 filled entry terminates the table.
1879  *
1880  * See register_sysctl_paths for more details.
1881  */
1882 struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
1883 {
1884 	static const struct ctl_path null_path[] = { {} };
1885 
1886 	return register_sysctl_paths(null_path, table);
1887 }
1888 
1889 /**
1890  * unregister_sysctl_table - unregister a sysctl table hierarchy
1891  * @header: the header returned from register_sysctl_table
1892  *
1893  * Unregisters the sysctl table and all children. proc entries may not
1894  * actually be removed until they are no longer used by anyone.
1895  */
1896 void unregister_sysctl_table(struct ctl_table_header * header)
1897 {
1898 	might_sleep();
1899 
1900 	if (header == NULL)
1901 		return;
1902 
1903 	spin_lock(&sysctl_lock);
1904 	start_unregistering(header);
1905 	if (!--header->parent->count) {
1906 		WARN_ON(1);
1907 		kfree(header->parent);
1908 	}
1909 	if (!--header->count)
1910 		kfree(header);
1911 	spin_unlock(&sysctl_lock);
1912 }
1913 
1914 int sysctl_is_seen(struct ctl_table_header *p)
1915 {
1916 	struct ctl_table_set *set = p->set;
1917 	int res;
1918 	spin_lock(&sysctl_lock);
1919 	if (p->unregistering)
1920 		res = 0;
1921 	else if (!set->is_seen)
1922 		res = 1;
1923 	else
1924 		res = set->is_seen(set);
1925 	spin_unlock(&sysctl_lock);
1926 	return res;
1927 }
1928 
1929 void setup_sysctl_set(struct ctl_table_set *p,
1930 	struct ctl_table_set *parent,
1931 	int (*is_seen)(struct ctl_table_set *))
1932 {
1933 	INIT_LIST_HEAD(&p->list);
1934 	p->parent = parent ? parent : &sysctl_table_root.default_set;
1935 	p->is_seen = is_seen;
1936 }
1937 
1938 #else /* !CONFIG_SYSCTL */
1939 struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
1940 {
1941 	return NULL;
1942 }
1943 
1944 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
1945 						    struct ctl_table *table)
1946 {
1947 	return NULL;
1948 }
1949 
1950 void unregister_sysctl_table(struct ctl_table_header * table)
1951 {
1952 }
1953 
1954 void setup_sysctl_set(struct ctl_table_set *p,
1955 	struct ctl_table_set *parent,
1956 	int (*is_seen)(struct ctl_table_set *))
1957 {
1958 }
1959 
1960 void sysctl_head_put(struct ctl_table_header *head)
1961 {
1962 }
1963 
1964 #endif /* CONFIG_SYSCTL */
1965 
1966 /*
1967  * /proc/sys support
1968  */
1969 
1970 #ifdef CONFIG_PROC_SYSCTL
1971 
1972 static int _proc_do_string(void* data, int maxlen, int write,
1973 			   void __user *buffer,
1974 			   size_t *lenp, loff_t *ppos)
1975 {
1976 	size_t len;
1977 	char __user *p;
1978 	char c;
1979 
1980 	if (!data || !maxlen || !*lenp) {
1981 		*lenp = 0;
1982 		return 0;
1983 	}
1984 
1985 	if (write) {
1986 		len = 0;
1987 		p = buffer;
1988 		while (len < *lenp) {
1989 			if (get_user(c, p++))
1990 				return -EFAULT;
1991 			if (c == 0 || c == '\n')
1992 				break;
1993 			len++;
1994 		}
1995 		if (len >= maxlen)
1996 			len = maxlen-1;
1997 		if(copy_from_user(data, buffer, len))
1998 			return -EFAULT;
1999 		((char *) data)[len] = 0;
2000 		*ppos += *lenp;
2001 	} else {
2002 		len = strlen(data);
2003 		if (len > maxlen)
2004 			len = maxlen;
2005 
2006 		if (*ppos > len) {
2007 			*lenp = 0;
2008 			return 0;
2009 		}
2010 
2011 		data += *ppos;
2012 		len  -= *ppos;
2013 
2014 		if (len > *lenp)
2015 			len = *lenp;
2016 		if (len)
2017 			if(copy_to_user(buffer, data, len))
2018 				return -EFAULT;
2019 		if (len < *lenp) {
2020 			if(put_user('\n', ((char __user *) buffer) + len))
2021 				return -EFAULT;
2022 			len++;
2023 		}
2024 		*lenp = len;
2025 		*ppos += len;
2026 	}
2027 	return 0;
2028 }
2029 
2030 /**
2031  * proc_dostring - read a string sysctl
2032  * @table: the sysctl table
2033  * @write: %TRUE if this is a write to the sysctl file
2034  * @buffer: the user buffer
2035  * @lenp: the size of the user buffer
2036  * @ppos: file position
2037  *
2038  * Reads/writes a string from/to the user buffer. If the kernel
2039  * buffer provided is not large enough to hold the string, the
2040  * string is truncated. The copied string is %NULL-terminated.
2041  * If the string is being read by the user process, it is copied
2042  * and a newline '\n' is added. It is truncated if the buffer is
2043  * not large enough.
2044  *
2045  * Returns 0 on success.
2046  */
2047 int proc_dostring(struct ctl_table *table, int write,
2048 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2049 {
2050 	return _proc_do_string(table->data, table->maxlen, write,
2051 			       buffer, lenp, ppos);
2052 }
2053 
2054 
2055 static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
2056 				 int *valp,
2057 				 int write, void *data)
2058 {
2059 	if (write) {
2060 		*valp = *negp ? -*lvalp : *lvalp;
2061 	} else {
2062 		int val = *valp;
2063 		if (val < 0) {
2064 			*negp = -1;
2065 			*lvalp = (unsigned long)-val;
2066 		} else {
2067 			*negp = 0;
2068 			*lvalp = (unsigned long)val;
2069 		}
2070 	}
2071 	return 0;
2072 }
2073 
2074 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2075 		  int write, void __user *buffer,
2076 		  size_t *lenp, loff_t *ppos,
2077 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
2078 			      int write, void *data),
2079 		  void *data)
2080 {
2081 #define TMPBUFLEN 21
2082 	int *i, vleft, first = 1, neg;
2083 	unsigned long lval;
2084 	size_t left, len;
2085 
2086 	char buf[TMPBUFLEN], *p;
2087 	char __user *s = buffer;
2088 
2089 	if (!tbl_data || !table->maxlen || !*lenp ||
2090 	    (*ppos && !write)) {
2091 		*lenp = 0;
2092 		return 0;
2093 	}
2094 
2095 	i = (int *) tbl_data;
2096 	vleft = table->maxlen / sizeof(*i);
2097 	left = *lenp;
2098 
2099 	if (!conv)
2100 		conv = do_proc_dointvec_conv;
2101 
2102 	for (; left && vleft--; i++, first=0) {
2103 		if (write) {
2104 			while (left) {
2105 				char c;
2106 				if (get_user(c, s))
2107 					return -EFAULT;
2108 				if (!isspace(c))
2109 					break;
2110 				left--;
2111 				s++;
2112 			}
2113 			if (!left)
2114 				break;
2115 			neg = 0;
2116 			len = left;
2117 			if (len > sizeof(buf) - 1)
2118 				len = sizeof(buf) - 1;
2119 			if (copy_from_user(buf, s, len))
2120 				return -EFAULT;
2121 			buf[len] = 0;
2122 			p = buf;
2123 			if (*p == '-' && left > 1) {
2124 				neg = 1;
2125 				p++;
2126 			}
2127 			if (*p < '0' || *p > '9')
2128 				break;
2129 
2130 			lval = simple_strtoul(p, &p, 0);
2131 
2132 			len = p-buf;
2133 			if ((len < left) && *p && !isspace(*p))
2134 				break;
2135 			s += len;
2136 			left -= len;
2137 
2138 			if (conv(&neg, &lval, i, 1, data))
2139 				break;
2140 		} else {
2141 			p = buf;
2142 			if (!first)
2143 				*p++ = '\t';
2144 
2145 			if (conv(&neg, &lval, i, 0, data))
2146 				break;
2147 
2148 			sprintf(p, "%s%lu", neg ? "-" : "", lval);
2149 			len = strlen(buf);
2150 			if (len > left)
2151 				len = left;
2152 			if(copy_to_user(s, buf, len))
2153 				return -EFAULT;
2154 			left -= len;
2155 			s += len;
2156 		}
2157 	}
2158 
2159 	if (!write && !first && left) {
2160 		if(put_user('\n', s))
2161 			return -EFAULT;
2162 		left--, s++;
2163 	}
2164 	if (write) {
2165 		while (left) {
2166 			char c;
2167 			if (get_user(c, s++))
2168 				return -EFAULT;
2169 			if (!isspace(c))
2170 				break;
2171 			left--;
2172 		}
2173 	}
2174 	if (write && first)
2175 		return -EINVAL;
2176 	*lenp -= left;
2177 	*ppos += *lenp;
2178 	return 0;
2179 #undef TMPBUFLEN
2180 }
2181 
2182 static int do_proc_dointvec(struct ctl_table *table, int write,
2183 		  void __user *buffer, size_t *lenp, loff_t *ppos,
2184 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
2185 			      int write, void *data),
2186 		  void *data)
2187 {
2188 	return __do_proc_dointvec(table->data, table, write,
2189 			buffer, lenp, ppos, conv, data);
2190 }
2191 
2192 /**
2193  * proc_dointvec - read a vector of integers
2194  * @table: the sysctl table
2195  * @write: %TRUE if this is a write to the sysctl file
2196  * @buffer: the user buffer
2197  * @lenp: the size of the user buffer
2198  * @ppos: file position
2199  *
2200  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2201  * values from/to the user buffer, treated as an ASCII string.
2202  *
2203  * Returns 0 on success.
2204  */
2205 int proc_dointvec(struct ctl_table *table, int write,
2206 		     void __user *buffer, size_t *lenp, loff_t *ppos)
2207 {
2208     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2209 		    	    NULL,NULL);
2210 }
2211 
2212 /*
2213  * Taint values can only be increased
2214  * This means we can safely use a temporary.
2215  */
2216 static int proc_taint(struct ctl_table *table, int write,
2217 			       void __user *buffer, size_t *lenp, loff_t *ppos)
2218 {
2219 	struct ctl_table t;
2220 	unsigned long tmptaint = get_taint();
2221 	int err;
2222 
2223 	if (write && !capable(CAP_SYS_ADMIN))
2224 		return -EPERM;
2225 
2226 	t = *table;
2227 	t.data = &tmptaint;
2228 	err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2229 	if (err < 0)
2230 		return err;
2231 
2232 	if (write) {
2233 		/*
2234 		 * Poor man's atomic or. Not worth adding a primitive
2235 		 * to everyone's atomic.h for this
2236 		 */
2237 		int i;
2238 		for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2239 			if ((tmptaint >> i) & 1)
2240 				add_taint(i);
2241 		}
2242 	}
2243 
2244 	return err;
2245 }
2246 
2247 struct do_proc_dointvec_minmax_conv_param {
2248 	int *min;
2249 	int *max;
2250 };
2251 
2252 static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
2253 					int *valp,
2254 					int write, void *data)
2255 {
2256 	struct do_proc_dointvec_minmax_conv_param *param = data;
2257 	if (write) {
2258 		int val = *negp ? -*lvalp : *lvalp;
2259 		if ((param->min && *param->min > val) ||
2260 		    (param->max && *param->max < val))
2261 			return -EINVAL;
2262 		*valp = val;
2263 	} else {
2264 		int val = *valp;
2265 		if (val < 0) {
2266 			*negp = -1;
2267 			*lvalp = (unsigned long)-val;
2268 		} else {
2269 			*negp = 0;
2270 			*lvalp = (unsigned long)val;
2271 		}
2272 	}
2273 	return 0;
2274 }
2275 
2276 /**
2277  * proc_dointvec_minmax - read a vector of integers with min/max values
2278  * @table: the sysctl table
2279  * @write: %TRUE if this is a write to the sysctl file
2280  * @buffer: the user buffer
2281  * @lenp: the size of the user buffer
2282  * @ppos: file position
2283  *
2284  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2285  * values from/to the user buffer, treated as an ASCII string.
2286  *
2287  * This routine will ensure the values are within the range specified by
2288  * table->extra1 (min) and table->extra2 (max).
2289  *
2290  * Returns 0 on success.
2291  */
2292 int proc_dointvec_minmax(struct ctl_table *table, int write,
2293 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2294 {
2295 	struct do_proc_dointvec_minmax_conv_param param = {
2296 		.min = (int *) table->extra1,
2297 		.max = (int *) table->extra2,
2298 	};
2299 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
2300 				do_proc_dointvec_minmax_conv, &param);
2301 }
2302 
2303 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2304 				     void __user *buffer,
2305 				     size_t *lenp, loff_t *ppos,
2306 				     unsigned long convmul,
2307 				     unsigned long convdiv)
2308 {
2309 #define TMPBUFLEN 21
2310 	unsigned long *i, *min, *max, val;
2311 	int vleft, first=1, neg;
2312 	size_t len, left;
2313 	char buf[TMPBUFLEN], *p;
2314 	char __user *s = buffer;
2315 
2316 	if (!data || !table->maxlen || !*lenp ||
2317 	    (*ppos && !write)) {
2318 		*lenp = 0;
2319 		return 0;
2320 	}
2321 
2322 	i = (unsigned long *) data;
2323 	min = (unsigned long *) table->extra1;
2324 	max = (unsigned long *) table->extra2;
2325 	vleft = table->maxlen / sizeof(unsigned long);
2326 	left = *lenp;
2327 
2328 	for (; left && vleft--; i++, min++, max++, first=0) {
2329 		if (write) {
2330 			while (left) {
2331 				char c;
2332 				if (get_user(c, s))
2333 					return -EFAULT;
2334 				if (!isspace(c))
2335 					break;
2336 				left--;
2337 				s++;
2338 			}
2339 			if (!left)
2340 				break;
2341 			neg = 0;
2342 			len = left;
2343 			if (len > TMPBUFLEN-1)
2344 				len = TMPBUFLEN-1;
2345 			if (copy_from_user(buf, s, len))
2346 				return -EFAULT;
2347 			buf[len] = 0;
2348 			p = buf;
2349 			if (*p == '-' && left > 1) {
2350 				neg = 1;
2351 				p++;
2352 			}
2353 			if (*p < '0' || *p > '9')
2354 				break;
2355 			val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
2356 			len = p-buf;
2357 			if ((len < left) && *p && !isspace(*p))
2358 				break;
2359 			if (neg)
2360 				val = -val;
2361 			s += len;
2362 			left -= len;
2363 
2364 			if(neg)
2365 				continue;
2366 			if ((min && val < *min) || (max && val > *max))
2367 				continue;
2368 			*i = val;
2369 		} else {
2370 			p = buf;
2371 			if (!first)
2372 				*p++ = '\t';
2373 			sprintf(p, "%lu", convdiv * (*i) / convmul);
2374 			len = strlen(buf);
2375 			if (len > left)
2376 				len = left;
2377 			if(copy_to_user(s, buf, len))
2378 				return -EFAULT;
2379 			left -= len;
2380 			s += len;
2381 		}
2382 	}
2383 
2384 	if (!write && !first && left) {
2385 		if(put_user('\n', s))
2386 			return -EFAULT;
2387 		left--, s++;
2388 	}
2389 	if (write) {
2390 		while (left) {
2391 			char c;
2392 			if (get_user(c, s++))
2393 				return -EFAULT;
2394 			if (!isspace(c))
2395 				break;
2396 			left--;
2397 		}
2398 	}
2399 	if (write && first)
2400 		return -EINVAL;
2401 	*lenp -= left;
2402 	*ppos += *lenp;
2403 	return 0;
2404 #undef TMPBUFLEN
2405 }
2406 
2407 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2408 				     void __user *buffer,
2409 				     size_t *lenp, loff_t *ppos,
2410 				     unsigned long convmul,
2411 				     unsigned long convdiv)
2412 {
2413 	return __do_proc_doulongvec_minmax(table->data, table, write,
2414 			buffer, lenp, ppos, convmul, convdiv);
2415 }
2416 
2417 /**
2418  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2419  * @table: the sysctl table
2420  * @write: %TRUE if this is a write to the sysctl file
2421  * @buffer: the user buffer
2422  * @lenp: the size of the user buffer
2423  * @ppos: file position
2424  *
2425  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2426  * values from/to the user buffer, treated as an ASCII string.
2427  *
2428  * This routine will ensure the values are within the range specified by
2429  * table->extra1 (min) and table->extra2 (max).
2430  *
2431  * Returns 0 on success.
2432  */
2433 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2434 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2435 {
2436     return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2437 }
2438 
2439 /**
2440  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2441  * @table: the sysctl table
2442  * @write: %TRUE if this is a write to the sysctl file
2443  * @buffer: the user buffer
2444  * @lenp: the size of the user buffer
2445  * @ppos: file position
2446  *
2447  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2448  * values from/to the user buffer, treated as an ASCII string. The values
2449  * are treated as milliseconds, and converted to jiffies when they are stored.
2450  *
2451  * This routine will ensure the values are within the range specified by
2452  * table->extra1 (min) and table->extra2 (max).
2453  *
2454  * Returns 0 on success.
2455  */
2456 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2457 				      void __user *buffer,
2458 				      size_t *lenp, loff_t *ppos)
2459 {
2460     return do_proc_doulongvec_minmax(table, write, buffer,
2461 				     lenp, ppos, HZ, 1000l);
2462 }
2463 
2464 
2465 static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
2466 					 int *valp,
2467 					 int write, void *data)
2468 {
2469 	if (write) {
2470 		if (*lvalp > LONG_MAX / HZ)
2471 			return 1;
2472 		*valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2473 	} else {
2474 		int val = *valp;
2475 		unsigned long lval;
2476 		if (val < 0) {
2477 			*negp = -1;
2478 			lval = (unsigned long)-val;
2479 		} else {
2480 			*negp = 0;
2481 			lval = (unsigned long)val;
2482 		}
2483 		*lvalp = lval / HZ;
2484 	}
2485 	return 0;
2486 }
2487 
2488 static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
2489 						int *valp,
2490 						int write, void *data)
2491 {
2492 	if (write) {
2493 		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2494 			return 1;
2495 		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2496 	} else {
2497 		int val = *valp;
2498 		unsigned long lval;
2499 		if (val < 0) {
2500 			*negp = -1;
2501 			lval = (unsigned long)-val;
2502 		} else {
2503 			*negp = 0;
2504 			lval = (unsigned long)val;
2505 		}
2506 		*lvalp = jiffies_to_clock_t(lval);
2507 	}
2508 	return 0;
2509 }
2510 
2511 static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
2512 					    int *valp,
2513 					    int write, void *data)
2514 {
2515 	if (write) {
2516 		*valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2517 	} else {
2518 		int val = *valp;
2519 		unsigned long lval;
2520 		if (val < 0) {
2521 			*negp = -1;
2522 			lval = (unsigned long)-val;
2523 		} else {
2524 			*negp = 0;
2525 			lval = (unsigned long)val;
2526 		}
2527 		*lvalp = jiffies_to_msecs(lval);
2528 	}
2529 	return 0;
2530 }
2531 
2532 /**
2533  * proc_dointvec_jiffies - read a vector of integers as seconds
2534  * @table: the sysctl table
2535  * @write: %TRUE if this is a write to the sysctl file
2536  * @buffer: the user buffer
2537  * @lenp: the size of the user buffer
2538  * @ppos: file position
2539  *
2540  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2541  * values from/to the user buffer, treated as an ASCII string.
2542  * The values read are assumed to be in seconds, and are converted into
2543  * jiffies.
2544  *
2545  * Returns 0 on success.
2546  */
2547 int proc_dointvec_jiffies(struct ctl_table *table, int write,
2548 			  void __user *buffer, size_t *lenp, loff_t *ppos)
2549 {
2550     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2551 		    	    do_proc_dointvec_jiffies_conv,NULL);
2552 }
2553 
2554 /**
2555  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2556  * @table: the sysctl table
2557  * @write: %TRUE if this is a write to the sysctl file
2558  * @buffer: the user buffer
2559  * @lenp: the size of the user buffer
2560  * @ppos: pointer to the file position
2561  *
2562  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2563  * values from/to the user buffer, treated as an ASCII string.
2564  * The values read are assumed to be in 1/USER_HZ seconds, and
2565  * are converted into jiffies.
2566  *
2567  * Returns 0 on success.
2568  */
2569 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2570 				 void __user *buffer, size_t *lenp, loff_t *ppos)
2571 {
2572     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2573 		    	    do_proc_dointvec_userhz_jiffies_conv,NULL);
2574 }
2575 
2576 /**
2577  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2578  * @table: the sysctl table
2579  * @write: %TRUE if this is a write to the sysctl file
2580  * @buffer: the user buffer
2581  * @lenp: the size of the user buffer
2582  * @ppos: file position
2583  * @ppos: the current position in the file
2584  *
2585  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2586  * values from/to the user buffer, treated as an ASCII string.
2587  * The values read are assumed to be in 1/1000 seconds, and
2588  * are converted into jiffies.
2589  *
2590  * Returns 0 on success.
2591  */
2592 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2593 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2594 {
2595 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
2596 				do_proc_dointvec_ms_jiffies_conv, NULL);
2597 }
2598 
2599 static int proc_do_cad_pid(struct ctl_table *table, int write,
2600 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2601 {
2602 	struct pid *new_pid;
2603 	pid_t tmp;
2604 	int r;
2605 
2606 	tmp = pid_vnr(cad_pid);
2607 
2608 	r = __do_proc_dointvec(&tmp, table, write, buffer,
2609 			       lenp, ppos, NULL, NULL);
2610 	if (r || !write)
2611 		return r;
2612 
2613 	new_pid = find_get_pid(tmp);
2614 	if (!new_pid)
2615 		return -ESRCH;
2616 
2617 	put_pid(xchg(&cad_pid, new_pid));
2618 	return 0;
2619 }
2620 
2621 #else /* CONFIG_PROC_FS */
2622 
2623 int proc_dostring(struct ctl_table *table, int write,
2624 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2625 {
2626 	return -ENOSYS;
2627 }
2628 
2629 int proc_dointvec(struct ctl_table *table, int write,
2630 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2631 {
2632 	return -ENOSYS;
2633 }
2634 
2635 int proc_dointvec_minmax(struct ctl_table *table, int write,
2636 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2637 {
2638 	return -ENOSYS;
2639 }
2640 
2641 int proc_dointvec_jiffies(struct ctl_table *table, int write,
2642 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2643 {
2644 	return -ENOSYS;
2645 }
2646 
2647 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2648 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2649 {
2650 	return -ENOSYS;
2651 }
2652 
2653 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2654 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2655 {
2656 	return -ENOSYS;
2657 }
2658 
2659 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2660 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2661 {
2662 	return -ENOSYS;
2663 }
2664 
2665 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2666 				      void __user *buffer,
2667 				      size_t *lenp, loff_t *ppos)
2668 {
2669     return -ENOSYS;
2670 }
2671 
2672 
2673 #endif /* CONFIG_PROC_FS */
2674 
2675 /*
2676  * No sense putting this after each symbol definition, twice,
2677  * exception granted :-)
2678  */
2679 EXPORT_SYMBOL(proc_dointvec);
2680 EXPORT_SYMBOL(proc_dointvec_jiffies);
2681 EXPORT_SYMBOL(proc_dointvec_minmax);
2682 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2683 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2684 EXPORT_SYMBOL(proc_dostring);
2685 EXPORT_SYMBOL(proc_doulongvec_minmax);
2686 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2687 EXPORT_SYMBOL(register_sysctl_table);
2688 EXPORT_SYMBOL(register_sysctl_paths);
2689 EXPORT_SYMBOL(unregister_sysctl_table);
2690