xref: /linux-6.15/kernel/sysctl.c (revision 0d4dd797)
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20 
21 #include <linux/module.h>
22 #include <linux/mm.h>
23 #include <linux/swap.h>
24 #include <linux/slab.h>
25 #include <linux/sysctl.h>
26 #include <linux/bitmap.h>
27 #include <linux/signal.h>
28 #include <linux/printk.h>
29 #include <linux/proc_fs.h>
30 #include <linux/security.h>
31 #include <linux/ctype.h>
32 #include <linux/kmemcheck.h>
33 #include <linux/kmemleak.h>
34 #include <linux/fs.h>
35 #include <linux/init.h>
36 #include <linux/kernel.h>
37 #include <linux/kobject.h>
38 #include <linux/net.h>
39 #include <linux/sysrq.h>
40 #include <linux/highuid.h>
41 #include <linux/writeback.h>
42 #include <linux/ratelimit.h>
43 #include <linux/compaction.h>
44 #include <linux/hugetlb.h>
45 #include <linux/initrd.h>
46 #include <linux/key.h>
47 #include <linux/times.h>
48 #include <linux/limits.h>
49 #include <linux/dcache.h>
50 #include <linux/dnotify.h>
51 #include <linux/syscalls.h>
52 #include <linux/vmstat.h>
53 #include <linux/nfs_fs.h>
54 #include <linux/acpi.h>
55 #include <linux/reboot.h>
56 #include <linux/ftrace.h>
57 #include <linux/perf_event.h>
58 #include <linux/kprobes.h>
59 #include <linux/pipe_fs_i.h>
60 #include <linux/oom.h>
61 #include <linux/kmod.h>
62 #include <linux/capability.h>
63 #include <linux/binfmts.h>
64 #include <linux/sched/sysctl.h>
65 
66 #include <asm/uaccess.h>
67 #include <asm/processor.h>
68 
69 #ifdef CONFIG_X86
70 #include <asm/nmi.h>
71 #include <asm/stacktrace.h>
72 #include <asm/io.h>
73 #endif
74 #ifdef CONFIG_SPARC
75 #include <asm/setup.h>
76 #endif
77 #ifdef CONFIG_BSD_PROCESS_ACCT
78 #include <linux/acct.h>
79 #endif
80 #ifdef CONFIG_RT_MUTEXES
81 #include <linux/rtmutex.h>
82 #endif
83 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
84 #include <linux/lockdep.h>
85 #endif
86 #ifdef CONFIG_CHR_DEV_SG
87 #include <scsi/sg.h>
88 #endif
89 
90 #ifdef CONFIG_LOCKUP_DETECTOR
91 #include <linux/nmi.h>
92 #endif
93 
94 
95 #if defined(CONFIG_SYSCTL)
96 
97 /* External variables not in a header file. */
98 extern int sysctl_overcommit_memory;
99 extern int sysctl_overcommit_ratio;
100 extern int max_threads;
101 extern int suid_dumpable;
102 #ifdef CONFIG_COREDUMP
103 extern int core_uses_pid;
104 extern char core_pattern[];
105 extern unsigned int core_pipe_limit;
106 #endif
107 extern int pid_max;
108 extern int pid_max_min, pid_max_max;
109 extern int percpu_pagelist_fraction;
110 extern int compat_log;
111 extern int latencytop_enabled;
112 extern int sysctl_nr_open_min, sysctl_nr_open_max;
113 #ifndef CONFIG_MMU
114 extern int sysctl_nr_trim_pages;
115 #endif
116 #ifdef CONFIG_BLOCK
117 extern int blk_iopoll_enabled;
118 #endif
119 
120 /* Constants used for minimum and  maximum */
121 #ifdef CONFIG_LOCKUP_DETECTOR
122 static int sixty = 60;
123 #endif
124 
125 static int zero;
126 static int __maybe_unused one = 1;
127 static int __maybe_unused two = 2;
128 static int __maybe_unused three = 3;
129 static unsigned long one_ul = 1;
130 static int one_hundred = 100;
131 #ifdef CONFIG_PRINTK
132 static int ten_thousand = 10000;
133 #endif
134 
135 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
136 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
137 
138 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
139 static int maxolduid = 65535;
140 static int minolduid;
141 static int min_percpu_pagelist_fract = 8;
142 
143 static int ngroups_max = NGROUPS_MAX;
144 static const int cap_last_cap = CAP_LAST_CAP;
145 
146 #ifdef CONFIG_INOTIFY_USER
147 #include <linux/inotify.h>
148 #endif
149 #ifdef CONFIG_SPARC
150 #endif
151 
152 #ifdef CONFIG_SPARC64
153 extern int sysctl_tsb_ratio;
154 #endif
155 
156 #ifdef __hppa__
157 extern int pwrsw_enabled;
158 #endif
159 
160 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
161 extern int unaligned_enabled;
162 #endif
163 
164 #ifdef CONFIG_IA64
165 extern int unaligned_dump_stack;
166 #endif
167 
168 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
169 extern int no_unaligned_warning;
170 #endif
171 
172 #ifdef CONFIG_PROC_SYSCTL
173 static int proc_do_cad_pid(struct ctl_table *table, int write,
174 		  void __user *buffer, size_t *lenp, loff_t *ppos);
175 static int proc_taint(struct ctl_table *table, int write,
176 			       void __user *buffer, size_t *lenp, loff_t *ppos);
177 #endif
178 
179 #ifdef CONFIG_PRINTK
180 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
181 				void __user *buffer, size_t *lenp, loff_t *ppos);
182 #endif
183 
184 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
185 		void __user *buffer, size_t *lenp, loff_t *ppos);
186 #ifdef CONFIG_COREDUMP
187 static int proc_dostring_coredump(struct ctl_table *table, int write,
188 		void __user *buffer, size_t *lenp, loff_t *ppos);
189 #endif
190 
191 #ifdef CONFIG_MAGIC_SYSRQ
192 /* Note: sysrq code uses it's own private copy */
193 static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
194 
195 static int sysrq_sysctl_handler(ctl_table *table, int write,
196 				void __user *buffer, size_t *lenp,
197 				loff_t *ppos)
198 {
199 	int error;
200 
201 	error = proc_dointvec(table, write, buffer, lenp, ppos);
202 	if (error)
203 		return error;
204 
205 	if (write)
206 		sysrq_toggle_support(__sysrq_enabled);
207 
208 	return 0;
209 }
210 
211 #endif
212 
213 static struct ctl_table kern_table[];
214 static struct ctl_table vm_table[];
215 static struct ctl_table fs_table[];
216 static struct ctl_table debug_table[];
217 static struct ctl_table dev_table[];
218 extern struct ctl_table random_table[];
219 #ifdef CONFIG_EPOLL
220 extern struct ctl_table epoll_table[];
221 #endif
222 
223 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
224 int sysctl_legacy_va_layout;
225 #endif
226 
227 /* The default sysctl tables: */
228 
229 static struct ctl_table sysctl_base_table[] = {
230 	{
231 		.procname	= "kernel",
232 		.mode		= 0555,
233 		.child		= kern_table,
234 	},
235 	{
236 		.procname	= "vm",
237 		.mode		= 0555,
238 		.child		= vm_table,
239 	},
240 	{
241 		.procname	= "fs",
242 		.mode		= 0555,
243 		.child		= fs_table,
244 	},
245 	{
246 		.procname	= "debug",
247 		.mode		= 0555,
248 		.child		= debug_table,
249 	},
250 	{
251 		.procname	= "dev",
252 		.mode		= 0555,
253 		.child		= dev_table,
254 	},
255 	{ }
256 };
257 
258 #ifdef CONFIG_SCHED_DEBUG
259 static int min_sched_granularity_ns = 100000;		/* 100 usecs */
260 static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
261 static int min_wakeup_granularity_ns;			/* 0 usecs */
262 static int max_wakeup_granularity_ns = NSEC_PER_SEC;	/* 1 second */
263 #ifdef CONFIG_SMP
264 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
265 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
266 #endif /* CONFIG_SMP */
267 #endif /* CONFIG_SCHED_DEBUG */
268 
269 #ifdef CONFIG_COMPACTION
270 static int min_extfrag_threshold;
271 static int max_extfrag_threshold = 1000;
272 #endif
273 
274 static struct ctl_table kern_table[] = {
275 	{
276 		.procname	= "sched_child_runs_first",
277 		.data		= &sysctl_sched_child_runs_first,
278 		.maxlen		= sizeof(unsigned int),
279 		.mode		= 0644,
280 		.proc_handler	= proc_dointvec,
281 	},
282 #ifdef CONFIG_SCHED_DEBUG
283 	{
284 		.procname	= "sched_min_granularity_ns",
285 		.data		= &sysctl_sched_min_granularity,
286 		.maxlen		= sizeof(unsigned int),
287 		.mode		= 0644,
288 		.proc_handler	= sched_proc_update_handler,
289 		.extra1		= &min_sched_granularity_ns,
290 		.extra2		= &max_sched_granularity_ns,
291 	},
292 	{
293 		.procname	= "sched_latency_ns",
294 		.data		= &sysctl_sched_latency,
295 		.maxlen		= sizeof(unsigned int),
296 		.mode		= 0644,
297 		.proc_handler	= sched_proc_update_handler,
298 		.extra1		= &min_sched_granularity_ns,
299 		.extra2		= &max_sched_granularity_ns,
300 	},
301 	{
302 		.procname	= "sched_wakeup_granularity_ns",
303 		.data		= &sysctl_sched_wakeup_granularity,
304 		.maxlen		= sizeof(unsigned int),
305 		.mode		= 0644,
306 		.proc_handler	= sched_proc_update_handler,
307 		.extra1		= &min_wakeup_granularity_ns,
308 		.extra2		= &max_wakeup_granularity_ns,
309 	},
310 #ifdef CONFIG_SMP
311 	{
312 		.procname	= "sched_tunable_scaling",
313 		.data		= &sysctl_sched_tunable_scaling,
314 		.maxlen		= sizeof(enum sched_tunable_scaling),
315 		.mode		= 0644,
316 		.proc_handler	= sched_proc_update_handler,
317 		.extra1		= &min_sched_tunable_scaling,
318 		.extra2		= &max_sched_tunable_scaling,
319 	},
320 	{
321 		.procname	= "sched_migration_cost_ns",
322 		.data		= &sysctl_sched_migration_cost,
323 		.maxlen		= sizeof(unsigned int),
324 		.mode		= 0644,
325 		.proc_handler	= proc_dointvec,
326 	},
327 	{
328 		.procname	= "sched_nr_migrate",
329 		.data		= &sysctl_sched_nr_migrate,
330 		.maxlen		= sizeof(unsigned int),
331 		.mode		= 0644,
332 		.proc_handler	= proc_dointvec,
333 	},
334 	{
335 		.procname	= "sched_time_avg_ms",
336 		.data		= &sysctl_sched_time_avg,
337 		.maxlen		= sizeof(unsigned int),
338 		.mode		= 0644,
339 		.proc_handler	= proc_dointvec,
340 	},
341 	{
342 		.procname	= "sched_shares_window_ns",
343 		.data		= &sysctl_sched_shares_window,
344 		.maxlen		= sizeof(unsigned int),
345 		.mode		= 0644,
346 		.proc_handler	= proc_dointvec,
347 	},
348 	{
349 		.procname	= "timer_migration",
350 		.data		= &sysctl_timer_migration,
351 		.maxlen		= sizeof(unsigned int),
352 		.mode		= 0644,
353 		.proc_handler	= proc_dointvec_minmax,
354 		.extra1		= &zero,
355 		.extra2		= &one,
356 	},
357 #endif /* CONFIG_SMP */
358 #ifdef CONFIG_NUMA_BALANCING
359 	{
360 		.procname	= "numa_balancing_scan_delay_ms",
361 		.data		= &sysctl_numa_balancing_scan_delay,
362 		.maxlen		= sizeof(unsigned int),
363 		.mode		= 0644,
364 		.proc_handler	= proc_dointvec,
365 	},
366 	{
367 		.procname	= "numa_balancing_scan_period_min_ms",
368 		.data		= &sysctl_numa_balancing_scan_period_min,
369 		.maxlen		= sizeof(unsigned int),
370 		.mode		= 0644,
371 		.proc_handler	= proc_dointvec,
372 	},
373 	{
374 		.procname	= "numa_balancing_scan_period_max_ms",
375 		.data		= &sysctl_numa_balancing_scan_period_max,
376 		.maxlen		= sizeof(unsigned int),
377 		.mode		= 0644,
378 		.proc_handler	= proc_dointvec,
379 	},
380 	{
381 		.procname	= "numa_balancing_scan_size_mb",
382 		.data		= &sysctl_numa_balancing_scan_size,
383 		.maxlen		= sizeof(unsigned int),
384 		.mode		= 0644,
385 		.proc_handler	= proc_dointvec,
386 	},
387 	{
388 		.procname       = "numa_balancing_migrate_deferred",
389 		.data           = &sysctl_numa_balancing_migrate_deferred,
390 		.maxlen         = sizeof(unsigned int),
391 		.mode           = 0644,
392 		.proc_handler   = proc_dointvec,
393 	},
394 #endif /* CONFIG_NUMA_BALANCING */
395 #endif /* CONFIG_SCHED_DEBUG */
396 	{
397 		.procname	= "sched_rt_period_us",
398 		.data		= &sysctl_sched_rt_period,
399 		.maxlen		= sizeof(unsigned int),
400 		.mode		= 0644,
401 		.proc_handler	= sched_rt_handler,
402 	},
403 	{
404 		.procname	= "sched_rt_runtime_us",
405 		.data		= &sysctl_sched_rt_runtime,
406 		.maxlen		= sizeof(int),
407 		.mode		= 0644,
408 		.proc_handler	= sched_rt_handler,
409 	},
410 	{
411 		.procname	= "sched_rr_timeslice_ms",
412 		.data		= &sched_rr_timeslice,
413 		.maxlen		= sizeof(int),
414 		.mode		= 0644,
415 		.proc_handler	= sched_rr_handler,
416 	},
417 #ifdef CONFIG_SCHED_AUTOGROUP
418 	{
419 		.procname	= "sched_autogroup_enabled",
420 		.data		= &sysctl_sched_autogroup_enabled,
421 		.maxlen		= sizeof(unsigned int),
422 		.mode		= 0644,
423 		.proc_handler	= proc_dointvec_minmax,
424 		.extra1		= &zero,
425 		.extra2		= &one,
426 	},
427 #endif
428 #ifdef CONFIG_CFS_BANDWIDTH
429 	{
430 		.procname	= "sched_cfs_bandwidth_slice_us",
431 		.data		= &sysctl_sched_cfs_bandwidth_slice,
432 		.maxlen		= sizeof(unsigned int),
433 		.mode		= 0644,
434 		.proc_handler	= proc_dointvec_minmax,
435 		.extra1		= &one,
436 	},
437 #endif
438 #ifdef CONFIG_PROVE_LOCKING
439 	{
440 		.procname	= "prove_locking",
441 		.data		= &prove_locking,
442 		.maxlen		= sizeof(int),
443 		.mode		= 0644,
444 		.proc_handler	= proc_dointvec,
445 	},
446 #endif
447 #ifdef CONFIG_LOCK_STAT
448 	{
449 		.procname	= "lock_stat",
450 		.data		= &lock_stat,
451 		.maxlen		= sizeof(int),
452 		.mode		= 0644,
453 		.proc_handler	= proc_dointvec,
454 	},
455 #endif
456 	{
457 		.procname	= "panic",
458 		.data		= &panic_timeout,
459 		.maxlen		= sizeof(int),
460 		.mode		= 0644,
461 		.proc_handler	= proc_dointvec,
462 	},
463 #ifdef CONFIG_COREDUMP
464 	{
465 		.procname	= "core_uses_pid",
466 		.data		= &core_uses_pid,
467 		.maxlen		= sizeof(int),
468 		.mode		= 0644,
469 		.proc_handler	= proc_dointvec,
470 	},
471 	{
472 		.procname	= "core_pattern",
473 		.data		= core_pattern,
474 		.maxlen		= CORENAME_MAX_SIZE,
475 		.mode		= 0644,
476 		.proc_handler	= proc_dostring_coredump,
477 	},
478 	{
479 		.procname	= "core_pipe_limit",
480 		.data		= &core_pipe_limit,
481 		.maxlen		= sizeof(unsigned int),
482 		.mode		= 0644,
483 		.proc_handler	= proc_dointvec,
484 	},
485 #endif
486 #ifdef CONFIG_PROC_SYSCTL
487 	{
488 		.procname	= "tainted",
489 		.maxlen 	= sizeof(long),
490 		.mode		= 0644,
491 		.proc_handler	= proc_taint,
492 	},
493 #endif
494 #ifdef CONFIG_LATENCYTOP
495 	{
496 		.procname	= "latencytop",
497 		.data		= &latencytop_enabled,
498 		.maxlen		= sizeof(int),
499 		.mode		= 0644,
500 		.proc_handler	= proc_dointvec,
501 	},
502 #endif
503 #ifdef CONFIG_BLK_DEV_INITRD
504 	{
505 		.procname	= "real-root-dev",
506 		.data		= &real_root_dev,
507 		.maxlen		= sizeof(int),
508 		.mode		= 0644,
509 		.proc_handler	= proc_dointvec,
510 	},
511 #endif
512 	{
513 		.procname	= "print-fatal-signals",
514 		.data		= &print_fatal_signals,
515 		.maxlen		= sizeof(int),
516 		.mode		= 0644,
517 		.proc_handler	= proc_dointvec,
518 	},
519 #ifdef CONFIG_SPARC
520 	{
521 		.procname	= "reboot-cmd",
522 		.data		= reboot_command,
523 		.maxlen		= 256,
524 		.mode		= 0644,
525 		.proc_handler	= proc_dostring,
526 	},
527 	{
528 		.procname	= "stop-a",
529 		.data		= &stop_a_enabled,
530 		.maxlen		= sizeof (int),
531 		.mode		= 0644,
532 		.proc_handler	= proc_dointvec,
533 	},
534 	{
535 		.procname	= "scons-poweroff",
536 		.data		= &scons_pwroff,
537 		.maxlen		= sizeof (int),
538 		.mode		= 0644,
539 		.proc_handler	= proc_dointvec,
540 	},
541 #endif
542 #ifdef CONFIG_SPARC64
543 	{
544 		.procname	= "tsb-ratio",
545 		.data		= &sysctl_tsb_ratio,
546 		.maxlen		= sizeof (int),
547 		.mode		= 0644,
548 		.proc_handler	= proc_dointvec,
549 	},
550 #endif
551 #ifdef __hppa__
552 	{
553 		.procname	= "soft-power",
554 		.data		= &pwrsw_enabled,
555 		.maxlen		= sizeof (int),
556 	 	.mode		= 0644,
557 		.proc_handler	= proc_dointvec,
558 	},
559 #endif
560 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
561 	{
562 		.procname	= "unaligned-trap",
563 		.data		= &unaligned_enabled,
564 		.maxlen		= sizeof (int),
565 		.mode		= 0644,
566 		.proc_handler	= proc_dointvec,
567 	},
568 #endif
569 	{
570 		.procname	= "ctrl-alt-del",
571 		.data		= &C_A_D,
572 		.maxlen		= sizeof(int),
573 		.mode		= 0644,
574 		.proc_handler	= proc_dointvec,
575 	},
576 #ifdef CONFIG_FUNCTION_TRACER
577 	{
578 		.procname	= "ftrace_enabled",
579 		.data		= &ftrace_enabled,
580 		.maxlen		= sizeof(int),
581 		.mode		= 0644,
582 		.proc_handler	= ftrace_enable_sysctl,
583 	},
584 #endif
585 #ifdef CONFIG_STACK_TRACER
586 	{
587 		.procname	= "stack_tracer_enabled",
588 		.data		= &stack_tracer_enabled,
589 		.maxlen		= sizeof(int),
590 		.mode		= 0644,
591 		.proc_handler	= stack_trace_sysctl,
592 	},
593 #endif
594 #ifdef CONFIG_TRACING
595 	{
596 		.procname	= "ftrace_dump_on_oops",
597 		.data		= &ftrace_dump_on_oops,
598 		.maxlen		= sizeof(int),
599 		.mode		= 0644,
600 		.proc_handler	= proc_dointvec,
601 	},
602 	{
603 		.procname	= "traceoff_on_warning",
604 		.data		= &__disable_trace_on_warning,
605 		.maxlen		= sizeof(__disable_trace_on_warning),
606 		.mode		= 0644,
607 		.proc_handler	= proc_dointvec,
608 	},
609 #endif
610 #ifdef CONFIG_MODULES
611 	{
612 		.procname	= "modprobe",
613 		.data		= &modprobe_path,
614 		.maxlen		= KMOD_PATH_LEN,
615 		.mode		= 0644,
616 		.proc_handler	= proc_dostring,
617 	},
618 	{
619 		.procname	= "modules_disabled",
620 		.data		= &modules_disabled,
621 		.maxlen		= sizeof(int),
622 		.mode		= 0644,
623 		/* only handle a transition from default "0" to "1" */
624 		.proc_handler	= proc_dointvec_minmax,
625 		.extra1		= &one,
626 		.extra2		= &one,
627 	},
628 #endif
629 
630 	{
631 		.procname	= "hotplug",
632 		.data		= &uevent_helper,
633 		.maxlen		= UEVENT_HELPER_PATH_LEN,
634 		.mode		= 0644,
635 		.proc_handler	= proc_dostring,
636 	},
637 
638 #ifdef CONFIG_CHR_DEV_SG
639 	{
640 		.procname	= "sg-big-buff",
641 		.data		= &sg_big_buff,
642 		.maxlen		= sizeof (int),
643 		.mode		= 0444,
644 		.proc_handler	= proc_dointvec,
645 	},
646 #endif
647 #ifdef CONFIG_BSD_PROCESS_ACCT
648 	{
649 		.procname	= "acct",
650 		.data		= &acct_parm,
651 		.maxlen		= 3*sizeof(int),
652 		.mode		= 0644,
653 		.proc_handler	= proc_dointvec,
654 	},
655 #endif
656 #ifdef CONFIG_MAGIC_SYSRQ
657 	{
658 		.procname	= "sysrq",
659 		.data		= &__sysrq_enabled,
660 		.maxlen		= sizeof (int),
661 		.mode		= 0644,
662 		.proc_handler	= sysrq_sysctl_handler,
663 	},
664 #endif
665 #ifdef CONFIG_PROC_SYSCTL
666 	{
667 		.procname	= "cad_pid",
668 		.data		= NULL,
669 		.maxlen		= sizeof (int),
670 		.mode		= 0600,
671 		.proc_handler	= proc_do_cad_pid,
672 	},
673 #endif
674 	{
675 		.procname	= "threads-max",
676 		.data		= &max_threads,
677 		.maxlen		= sizeof(int),
678 		.mode		= 0644,
679 		.proc_handler	= proc_dointvec,
680 	},
681 	{
682 		.procname	= "random",
683 		.mode		= 0555,
684 		.child		= random_table,
685 	},
686 	{
687 		.procname	= "usermodehelper",
688 		.mode		= 0555,
689 		.child		= usermodehelper_table,
690 	},
691 	{
692 		.procname	= "overflowuid",
693 		.data		= &overflowuid,
694 		.maxlen		= sizeof(int),
695 		.mode		= 0644,
696 		.proc_handler	= proc_dointvec_minmax,
697 		.extra1		= &minolduid,
698 		.extra2		= &maxolduid,
699 	},
700 	{
701 		.procname	= "overflowgid",
702 		.data		= &overflowgid,
703 		.maxlen		= sizeof(int),
704 		.mode		= 0644,
705 		.proc_handler	= proc_dointvec_minmax,
706 		.extra1		= &minolduid,
707 		.extra2		= &maxolduid,
708 	},
709 #ifdef CONFIG_S390
710 #ifdef CONFIG_MATHEMU
711 	{
712 		.procname	= "ieee_emulation_warnings",
713 		.data		= &sysctl_ieee_emulation_warnings,
714 		.maxlen		= sizeof(int),
715 		.mode		= 0644,
716 		.proc_handler	= proc_dointvec,
717 	},
718 #endif
719 	{
720 		.procname	= "userprocess_debug",
721 		.data		= &show_unhandled_signals,
722 		.maxlen		= sizeof(int),
723 		.mode		= 0644,
724 		.proc_handler	= proc_dointvec,
725 	},
726 #endif
727 	{
728 		.procname	= "pid_max",
729 		.data		= &pid_max,
730 		.maxlen		= sizeof (int),
731 		.mode		= 0644,
732 		.proc_handler	= proc_dointvec_minmax,
733 		.extra1		= &pid_max_min,
734 		.extra2		= &pid_max_max,
735 	},
736 	{
737 		.procname	= "panic_on_oops",
738 		.data		= &panic_on_oops,
739 		.maxlen		= sizeof(int),
740 		.mode		= 0644,
741 		.proc_handler	= proc_dointvec,
742 	},
743 #if defined CONFIG_PRINTK
744 	{
745 		.procname	= "printk",
746 		.data		= &console_loglevel,
747 		.maxlen		= 4*sizeof(int),
748 		.mode		= 0644,
749 		.proc_handler	= proc_dointvec,
750 	},
751 	{
752 		.procname	= "printk_ratelimit",
753 		.data		= &printk_ratelimit_state.interval,
754 		.maxlen		= sizeof(int),
755 		.mode		= 0644,
756 		.proc_handler	= proc_dointvec_jiffies,
757 	},
758 	{
759 		.procname	= "printk_ratelimit_burst",
760 		.data		= &printk_ratelimit_state.burst,
761 		.maxlen		= sizeof(int),
762 		.mode		= 0644,
763 		.proc_handler	= proc_dointvec,
764 	},
765 	{
766 		.procname	= "printk_delay",
767 		.data		= &printk_delay_msec,
768 		.maxlen		= sizeof(int),
769 		.mode		= 0644,
770 		.proc_handler	= proc_dointvec_minmax,
771 		.extra1		= &zero,
772 		.extra2		= &ten_thousand,
773 	},
774 	{
775 		.procname	= "dmesg_restrict",
776 		.data		= &dmesg_restrict,
777 		.maxlen		= sizeof(int),
778 		.mode		= 0644,
779 		.proc_handler	= proc_dointvec_minmax_sysadmin,
780 		.extra1		= &zero,
781 		.extra2		= &one,
782 	},
783 	{
784 		.procname	= "kptr_restrict",
785 		.data		= &kptr_restrict,
786 		.maxlen		= sizeof(int),
787 		.mode		= 0644,
788 		.proc_handler	= proc_dointvec_minmax_sysadmin,
789 		.extra1		= &zero,
790 		.extra2		= &two,
791 	},
792 #endif
793 	{
794 		.procname	= "ngroups_max",
795 		.data		= &ngroups_max,
796 		.maxlen		= sizeof (int),
797 		.mode		= 0444,
798 		.proc_handler	= proc_dointvec,
799 	},
800 	{
801 		.procname	= "cap_last_cap",
802 		.data		= (void *)&cap_last_cap,
803 		.maxlen		= sizeof(int),
804 		.mode		= 0444,
805 		.proc_handler	= proc_dointvec,
806 	},
807 #if defined(CONFIG_LOCKUP_DETECTOR)
808 	{
809 		.procname       = "watchdog",
810 		.data           = &watchdog_user_enabled,
811 		.maxlen         = sizeof (int),
812 		.mode           = 0644,
813 		.proc_handler   = proc_dowatchdog,
814 		.extra1		= &zero,
815 		.extra2		= &one,
816 	},
817 	{
818 		.procname	= "watchdog_thresh",
819 		.data		= &watchdog_thresh,
820 		.maxlen		= sizeof(int),
821 		.mode		= 0644,
822 		.proc_handler	= proc_dowatchdog,
823 		.extra1		= &zero,
824 		.extra2		= &sixty,
825 	},
826 	{
827 		.procname	= "softlockup_panic",
828 		.data		= &softlockup_panic,
829 		.maxlen		= sizeof(int),
830 		.mode		= 0644,
831 		.proc_handler	= proc_dointvec_minmax,
832 		.extra1		= &zero,
833 		.extra2		= &one,
834 	},
835 	{
836 		.procname       = "nmi_watchdog",
837 		.data           = &watchdog_user_enabled,
838 		.maxlen         = sizeof (int),
839 		.mode           = 0644,
840 		.proc_handler   = proc_dowatchdog,
841 		.extra1		= &zero,
842 		.extra2		= &one,
843 	},
844 #endif
845 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
846 	{
847 		.procname       = "unknown_nmi_panic",
848 		.data           = &unknown_nmi_panic,
849 		.maxlen         = sizeof (int),
850 		.mode           = 0644,
851 		.proc_handler   = proc_dointvec,
852 	},
853 #endif
854 #if defined(CONFIG_X86)
855 	{
856 		.procname	= "panic_on_unrecovered_nmi",
857 		.data		= &panic_on_unrecovered_nmi,
858 		.maxlen		= sizeof(int),
859 		.mode		= 0644,
860 		.proc_handler	= proc_dointvec,
861 	},
862 	{
863 		.procname	= "panic_on_io_nmi",
864 		.data		= &panic_on_io_nmi,
865 		.maxlen		= sizeof(int),
866 		.mode		= 0644,
867 		.proc_handler	= proc_dointvec,
868 	},
869 #ifdef CONFIG_DEBUG_STACKOVERFLOW
870 	{
871 		.procname	= "panic_on_stackoverflow",
872 		.data		= &sysctl_panic_on_stackoverflow,
873 		.maxlen		= sizeof(int),
874 		.mode		= 0644,
875 		.proc_handler	= proc_dointvec,
876 	},
877 #endif
878 	{
879 		.procname	= "bootloader_type",
880 		.data		= &bootloader_type,
881 		.maxlen		= sizeof (int),
882 		.mode		= 0444,
883 		.proc_handler	= proc_dointvec,
884 	},
885 	{
886 		.procname	= "bootloader_version",
887 		.data		= &bootloader_version,
888 		.maxlen		= sizeof (int),
889 		.mode		= 0444,
890 		.proc_handler	= proc_dointvec,
891 	},
892 	{
893 		.procname	= "kstack_depth_to_print",
894 		.data		= &kstack_depth_to_print,
895 		.maxlen		= sizeof(int),
896 		.mode		= 0644,
897 		.proc_handler	= proc_dointvec,
898 	},
899 	{
900 		.procname	= "io_delay_type",
901 		.data		= &io_delay_type,
902 		.maxlen		= sizeof(int),
903 		.mode		= 0644,
904 		.proc_handler	= proc_dointvec,
905 	},
906 #endif
907 #if defined(CONFIG_MMU)
908 	{
909 		.procname	= "randomize_va_space",
910 		.data		= &randomize_va_space,
911 		.maxlen		= sizeof(int),
912 		.mode		= 0644,
913 		.proc_handler	= proc_dointvec,
914 	},
915 #endif
916 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
917 	{
918 		.procname	= "spin_retry",
919 		.data		= &spin_retry,
920 		.maxlen		= sizeof (int),
921 		.mode		= 0644,
922 		.proc_handler	= proc_dointvec,
923 	},
924 #endif
925 #if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
926 	{
927 		.procname	= "acpi_video_flags",
928 		.data		= &acpi_realmode_flags,
929 		.maxlen		= sizeof (unsigned long),
930 		.mode		= 0644,
931 		.proc_handler	= proc_doulongvec_minmax,
932 	},
933 #endif
934 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
935 	{
936 		.procname	= "ignore-unaligned-usertrap",
937 		.data		= &no_unaligned_warning,
938 		.maxlen		= sizeof (int),
939 	 	.mode		= 0644,
940 		.proc_handler	= proc_dointvec,
941 	},
942 #endif
943 #ifdef CONFIG_IA64
944 	{
945 		.procname	= "unaligned-dump-stack",
946 		.data		= &unaligned_dump_stack,
947 		.maxlen		= sizeof (int),
948 		.mode		= 0644,
949 		.proc_handler	= proc_dointvec,
950 	},
951 #endif
952 #ifdef CONFIG_DETECT_HUNG_TASK
953 	{
954 		.procname	= "hung_task_panic",
955 		.data		= &sysctl_hung_task_panic,
956 		.maxlen		= sizeof(int),
957 		.mode		= 0644,
958 		.proc_handler	= proc_dointvec_minmax,
959 		.extra1		= &zero,
960 		.extra2		= &one,
961 	},
962 	{
963 		.procname	= "hung_task_check_count",
964 		.data		= &sysctl_hung_task_check_count,
965 		.maxlen		= sizeof(int),
966 		.mode		= 0644,
967 		.proc_handler	= proc_dointvec_minmax,
968 		.extra1		= &zero,
969 	},
970 	{
971 		.procname	= "hung_task_timeout_secs",
972 		.data		= &sysctl_hung_task_timeout_secs,
973 		.maxlen		= sizeof(unsigned long),
974 		.mode		= 0644,
975 		.proc_handler	= proc_dohung_task_timeout_secs,
976 	},
977 	{
978 		.procname	= "hung_task_warnings",
979 		.data		= &sysctl_hung_task_warnings,
980 		.maxlen		= sizeof(unsigned long),
981 		.mode		= 0644,
982 		.proc_handler	= proc_doulongvec_minmax,
983 	},
984 #endif
985 #ifdef CONFIG_COMPAT
986 	{
987 		.procname	= "compat-log",
988 		.data		= &compat_log,
989 		.maxlen		= sizeof (int),
990 	 	.mode		= 0644,
991 		.proc_handler	= proc_dointvec,
992 	},
993 #endif
994 #ifdef CONFIG_RT_MUTEXES
995 	{
996 		.procname	= "max_lock_depth",
997 		.data		= &max_lock_depth,
998 		.maxlen		= sizeof(int),
999 		.mode		= 0644,
1000 		.proc_handler	= proc_dointvec,
1001 	},
1002 #endif
1003 	{
1004 		.procname	= "poweroff_cmd",
1005 		.data		= &poweroff_cmd,
1006 		.maxlen		= POWEROFF_CMD_PATH_LEN,
1007 		.mode		= 0644,
1008 		.proc_handler	= proc_dostring,
1009 	},
1010 #ifdef CONFIG_KEYS
1011 	{
1012 		.procname	= "keys",
1013 		.mode		= 0555,
1014 		.child		= key_sysctls,
1015 	},
1016 #endif
1017 #ifdef CONFIG_RCU_TORTURE_TEST
1018 	{
1019 		.procname       = "rcutorture_runnable",
1020 		.data           = &rcutorture_runnable,
1021 		.maxlen         = sizeof(int),
1022 		.mode           = 0644,
1023 		.proc_handler	= proc_dointvec,
1024 	},
1025 #endif
1026 #ifdef CONFIG_PERF_EVENTS
1027 	/*
1028 	 * User-space scripts rely on the existence of this file
1029 	 * as a feature check for perf_events being enabled.
1030 	 *
1031 	 * So it's an ABI, do not remove!
1032 	 */
1033 	{
1034 		.procname	= "perf_event_paranoid",
1035 		.data		= &sysctl_perf_event_paranoid,
1036 		.maxlen		= sizeof(sysctl_perf_event_paranoid),
1037 		.mode		= 0644,
1038 		.proc_handler	= proc_dointvec,
1039 	},
1040 	{
1041 		.procname	= "perf_event_mlock_kb",
1042 		.data		= &sysctl_perf_event_mlock,
1043 		.maxlen		= sizeof(sysctl_perf_event_mlock),
1044 		.mode		= 0644,
1045 		.proc_handler	= proc_dointvec,
1046 	},
1047 	{
1048 		.procname	= "perf_event_max_sample_rate",
1049 		.data		= &sysctl_perf_event_sample_rate,
1050 		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
1051 		.mode		= 0644,
1052 		.proc_handler	= perf_proc_update_handler,
1053 		.extra1		= &one,
1054 	},
1055 	{
1056 		.procname	= "perf_cpu_time_max_percent",
1057 		.data		= &sysctl_perf_cpu_time_max_percent,
1058 		.maxlen		= sizeof(sysctl_perf_cpu_time_max_percent),
1059 		.mode		= 0644,
1060 		.proc_handler	= perf_cpu_time_max_percent_handler,
1061 		.extra1		= &zero,
1062 		.extra2		= &one_hundred,
1063 	},
1064 #endif
1065 #ifdef CONFIG_KMEMCHECK
1066 	{
1067 		.procname	= "kmemcheck",
1068 		.data		= &kmemcheck_enabled,
1069 		.maxlen		= sizeof(int),
1070 		.mode		= 0644,
1071 		.proc_handler	= proc_dointvec,
1072 	},
1073 #endif
1074 #ifdef CONFIG_BLOCK
1075 	{
1076 		.procname	= "blk_iopoll",
1077 		.data		= &blk_iopoll_enabled,
1078 		.maxlen		= sizeof(int),
1079 		.mode		= 0644,
1080 		.proc_handler	= proc_dointvec,
1081 	},
1082 #endif
1083 	{ }
1084 };
1085 
1086 static struct ctl_table vm_table[] = {
1087 	{
1088 		.procname	= "overcommit_memory",
1089 		.data		= &sysctl_overcommit_memory,
1090 		.maxlen		= sizeof(sysctl_overcommit_memory),
1091 		.mode		= 0644,
1092 		.proc_handler	= proc_dointvec_minmax,
1093 		.extra1		= &zero,
1094 		.extra2		= &two,
1095 	},
1096 	{
1097 		.procname	= "panic_on_oom",
1098 		.data		= &sysctl_panic_on_oom,
1099 		.maxlen		= sizeof(sysctl_panic_on_oom),
1100 		.mode		= 0644,
1101 		.proc_handler	= proc_dointvec_minmax,
1102 		.extra1		= &zero,
1103 		.extra2		= &two,
1104 	},
1105 	{
1106 		.procname	= "oom_kill_allocating_task",
1107 		.data		= &sysctl_oom_kill_allocating_task,
1108 		.maxlen		= sizeof(sysctl_oom_kill_allocating_task),
1109 		.mode		= 0644,
1110 		.proc_handler	= proc_dointvec,
1111 	},
1112 	{
1113 		.procname	= "oom_dump_tasks",
1114 		.data		= &sysctl_oom_dump_tasks,
1115 		.maxlen		= sizeof(sysctl_oom_dump_tasks),
1116 		.mode		= 0644,
1117 		.proc_handler	= proc_dointvec,
1118 	},
1119 	{
1120 		.procname	= "overcommit_ratio",
1121 		.data		= &sysctl_overcommit_ratio,
1122 		.maxlen		= sizeof(sysctl_overcommit_ratio),
1123 		.mode		= 0644,
1124 		.proc_handler	= proc_dointvec,
1125 	},
1126 	{
1127 		.procname	= "page-cluster",
1128 		.data		= &page_cluster,
1129 		.maxlen		= sizeof(int),
1130 		.mode		= 0644,
1131 		.proc_handler	= proc_dointvec_minmax,
1132 		.extra1		= &zero,
1133 	},
1134 	{
1135 		.procname	= "dirty_background_ratio",
1136 		.data		= &dirty_background_ratio,
1137 		.maxlen		= sizeof(dirty_background_ratio),
1138 		.mode		= 0644,
1139 		.proc_handler	= dirty_background_ratio_handler,
1140 		.extra1		= &zero,
1141 		.extra2		= &one_hundred,
1142 	},
1143 	{
1144 		.procname	= "dirty_background_bytes",
1145 		.data		= &dirty_background_bytes,
1146 		.maxlen		= sizeof(dirty_background_bytes),
1147 		.mode		= 0644,
1148 		.proc_handler	= dirty_background_bytes_handler,
1149 		.extra1		= &one_ul,
1150 	},
1151 	{
1152 		.procname	= "dirty_ratio",
1153 		.data		= &vm_dirty_ratio,
1154 		.maxlen		= sizeof(vm_dirty_ratio),
1155 		.mode		= 0644,
1156 		.proc_handler	= dirty_ratio_handler,
1157 		.extra1		= &zero,
1158 		.extra2		= &one_hundred,
1159 	},
1160 	{
1161 		.procname	= "dirty_bytes",
1162 		.data		= &vm_dirty_bytes,
1163 		.maxlen		= sizeof(vm_dirty_bytes),
1164 		.mode		= 0644,
1165 		.proc_handler	= dirty_bytes_handler,
1166 		.extra1		= &dirty_bytes_min,
1167 	},
1168 	{
1169 		.procname	= "dirty_writeback_centisecs",
1170 		.data		= &dirty_writeback_interval,
1171 		.maxlen		= sizeof(dirty_writeback_interval),
1172 		.mode		= 0644,
1173 		.proc_handler	= dirty_writeback_centisecs_handler,
1174 	},
1175 	{
1176 		.procname	= "dirty_expire_centisecs",
1177 		.data		= &dirty_expire_interval,
1178 		.maxlen		= sizeof(dirty_expire_interval),
1179 		.mode		= 0644,
1180 		.proc_handler	= proc_dointvec_minmax,
1181 		.extra1		= &zero,
1182 	},
1183 	{
1184 		.procname       = "nr_pdflush_threads",
1185 		.mode           = 0444 /* read-only */,
1186 		.proc_handler   = pdflush_proc_obsolete,
1187 	},
1188 	{
1189 		.procname	= "swappiness",
1190 		.data		= &vm_swappiness,
1191 		.maxlen		= sizeof(vm_swappiness),
1192 		.mode		= 0644,
1193 		.proc_handler	= proc_dointvec_minmax,
1194 		.extra1		= &zero,
1195 		.extra2		= &one_hundred,
1196 	},
1197 #ifdef CONFIG_HUGETLB_PAGE
1198 	{
1199 		.procname	= "nr_hugepages",
1200 		.data		= NULL,
1201 		.maxlen		= sizeof(unsigned long),
1202 		.mode		= 0644,
1203 		.proc_handler	= hugetlb_sysctl_handler,
1204 		.extra1		= (void *)&hugetlb_zero,
1205 		.extra2		= (void *)&hugetlb_infinity,
1206 	},
1207 #ifdef CONFIG_NUMA
1208 	{
1209 		.procname       = "nr_hugepages_mempolicy",
1210 		.data           = NULL,
1211 		.maxlen         = sizeof(unsigned long),
1212 		.mode           = 0644,
1213 		.proc_handler   = &hugetlb_mempolicy_sysctl_handler,
1214 		.extra1		= (void *)&hugetlb_zero,
1215 		.extra2		= (void *)&hugetlb_infinity,
1216 	},
1217 #endif
1218 	 {
1219 		.procname	= "hugetlb_shm_group",
1220 		.data		= &sysctl_hugetlb_shm_group,
1221 		.maxlen		= sizeof(gid_t),
1222 		.mode		= 0644,
1223 		.proc_handler	= proc_dointvec,
1224 	 },
1225 	 {
1226 		.procname	= "hugepages_treat_as_movable",
1227 		.data		= &hugepages_treat_as_movable,
1228 		.maxlen		= sizeof(int),
1229 		.mode		= 0644,
1230 		.proc_handler	= proc_dointvec,
1231 	},
1232 	{
1233 		.procname	= "nr_overcommit_hugepages",
1234 		.data		= NULL,
1235 		.maxlen		= sizeof(unsigned long),
1236 		.mode		= 0644,
1237 		.proc_handler	= hugetlb_overcommit_handler,
1238 		.extra1		= (void *)&hugetlb_zero,
1239 		.extra2		= (void *)&hugetlb_infinity,
1240 	},
1241 #endif
1242 	{
1243 		.procname	= "lowmem_reserve_ratio",
1244 		.data		= &sysctl_lowmem_reserve_ratio,
1245 		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
1246 		.mode		= 0644,
1247 		.proc_handler	= lowmem_reserve_ratio_sysctl_handler,
1248 	},
1249 	{
1250 		.procname	= "drop_caches",
1251 		.data		= &sysctl_drop_caches,
1252 		.maxlen		= sizeof(int),
1253 		.mode		= 0644,
1254 		.proc_handler	= drop_caches_sysctl_handler,
1255 		.extra1		= &one,
1256 		.extra2		= &three,
1257 	},
1258 #ifdef CONFIG_COMPACTION
1259 	{
1260 		.procname	= "compact_memory",
1261 		.data		= &sysctl_compact_memory,
1262 		.maxlen		= sizeof(int),
1263 		.mode		= 0200,
1264 		.proc_handler	= sysctl_compaction_handler,
1265 	},
1266 	{
1267 		.procname	= "extfrag_threshold",
1268 		.data		= &sysctl_extfrag_threshold,
1269 		.maxlen		= sizeof(int),
1270 		.mode		= 0644,
1271 		.proc_handler	= sysctl_extfrag_handler,
1272 		.extra1		= &min_extfrag_threshold,
1273 		.extra2		= &max_extfrag_threshold,
1274 	},
1275 
1276 #endif /* CONFIG_COMPACTION */
1277 	{
1278 		.procname	= "min_free_kbytes",
1279 		.data		= &min_free_kbytes,
1280 		.maxlen		= sizeof(min_free_kbytes),
1281 		.mode		= 0644,
1282 		.proc_handler	= min_free_kbytes_sysctl_handler,
1283 		.extra1		= &zero,
1284 	},
1285 	{
1286 		.procname	= "percpu_pagelist_fraction",
1287 		.data		= &percpu_pagelist_fraction,
1288 		.maxlen		= sizeof(percpu_pagelist_fraction),
1289 		.mode		= 0644,
1290 		.proc_handler	= percpu_pagelist_fraction_sysctl_handler,
1291 		.extra1		= &min_percpu_pagelist_fract,
1292 	},
1293 #ifdef CONFIG_MMU
1294 	{
1295 		.procname	= "max_map_count",
1296 		.data		= &sysctl_max_map_count,
1297 		.maxlen		= sizeof(sysctl_max_map_count),
1298 		.mode		= 0644,
1299 		.proc_handler	= proc_dointvec_minmax,
1300 		.extra1		= &zero,
1301 	},
1302 #else
1303 	{
1304 		.procname	= "nr_trim_pages",
1305 		.data		= &sysctl_nr_trim_pages,
1306 		.maxlen		= sizeof(sysctl_nr_trim_pages),
1307 		.mode		= 0644,
1308 		.proc_handler	= proc_dointvec_minmax,
1309 		.extra1		= &zero,
1310 	},
1311 #endif
1312 	{
1313 		.procname	= "laptop_mode",
1314 		.data		= &laptop_mode,
1315 		.maxlen		= sizeof(laptop_mode),
1316 		.mode		= 0644,
1317 		.proc_handler	= proc_dointvec_jiffies,
1318 	},
1319 	{
1320 		.procname	= "block_dump",
1321 		.data		= &block_dump,
1322 		.maxlen		= sizeof(block_dump),
1323 		.mode		= 0644,
1324 		.proc_handler	= proc_dointvec,
1325 		.extra1		= &zero,
1326 	},
1327 	{
1328 		.procname	= "vfs_cache_pressure",
1329 		.data		= &sysctl_vfs_cache_pressure,
1330 		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
1331 		.mode		= 0644,
1332 		.proc_handler	= proc_dointvec,
1333 		.extra1		= &zero,
1334 	},
1335 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1336 	{
1337 		.procname	= "legacy_va_layout",
1338 		.data		= &sysctl_legacy_va_layout,
1339 		.maxlen		= sizeof(sysctl_legacy_va_layout),
1340 		.mode		= 0644,
1341 		.proc_handler	= proc_dointvec,
1342 		.extra1		= &zero,
1343 	},
1344 #endif
1345 #ifdef CONFIG_NUMA
1346 	{
1347 		.procname	= "zone_reclaim_mode",
1348 		.data		= &zone_reclaim_mode,
1349 		.maxlen		= sizeof(zone_reclaim_mode),
1350 		.mode		= 0644,
1351 		.proc_handler	= proc_dointvec,
1352 		.extra1		= &zero,
1353 	},
1354 	{
1355 		.procname	= "min_unmapped_ratio",
1356 		.data		= &sysctl_min_unmapped_ratio,
1357 		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
1358 		.mode		= 0644,
1359 		.proc_handler	= sysctl_min_unmapped_ratio_sysctl_handler,
1360 		.extra1		= &zero,
1361 		.extra2		= &one_hundred,
1362 	},
1363 	{
1364 		.procname	= "min_slab_ratio",
1365 		.data		= &sysctl_min_slab_ratio,
1366 		.maxlen		= sizeof(sysctl_min_slab_ratio),
1367 		.mode		= 0644,
1368 		.proc_handler	= sysctl_min_slab_ratio_sysctl_handler,
1369 		.extra1		= &zero,
1370 		.extra2		= &one_hundred,
1371 	},
1372 #endif
1373 #ifdef CONFIG_SMP
1374 	{
1375 		.procname	= "stat_interval",
1376 		.data		= &sysctl_stat_interval,
1377 		.maxlen		= sizeof(sysctl_stat_interval),
1378 		.mode		= 0644,
1379 		.proc_handler	= proc_dointvec_jiffies,
1380 	},
1381 #endif
1382 #ifdef CONFIG_MMU
1383 	{
1384 		.procname	= "mmap_min_addr",
1385 		.data		= &dac_mmap_min_addr,
1386 		.maxlen		= sizeof(unsigned long),
1387 		.mode		= 0644,
1388 		.proc_handler	= mmap_min_addr_handler,
1389 	},
1390 #endif
1391 #ifdef CONFIG_NUMA
1392 	{
1393 		.procname	= "numa_zonelist_order",
1394 		.data		= &numa_zonelist_order,
1395 		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
1396 		.mode		= 0644,
1397 		.proc_handler	= numa_zonelist_order_handler,
1398 	},
1399 #endif
1400 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1401    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1402 	{
1403 		.procname	= "vdso_enabled",
1404 		.data		= &vdso_enabled,
1405 		.maxlen		= sizeof(vdso_enabled),
1406 		.mode		= 0644,
1407 		.proc_handler	= proc_dointvec,
1408 		.extra1		= &zero,
1409 	},
1410 #endif
1411 #ifdef CONFIG_HIGHMEM
1412 	{
1413 		.procname	= "highmem_is_dirtyable",
1414 		.data		= &vm_highmem_is_dirtyable,
1415 		.maxlen		= sizeof(vm_highmem_is_dirtyable),
1416 		.mode		= 0644,
1417 		.proc_handler	= proc_dointvec_minmax,
1418 		.extra1		= &zero,
1419 		.extra2		= &one,
1420 	},
1421 #endif
1422 	{
1423 		.procname	= "scan_unevictable_pages",
1424 		.data		= &scan_unevictable_pages,
1425 		.maxlen		= sizeof(scan_unevictable_pages),
1426 		.mode		= 0644,
1427 		.proc_handler	= scan_unevictable_handler,
1428 	},
1429 #ifdef CONFIG_MEMORY_FAILURE
1430 	{
1431 		.procname	= "memory_failure_early_kill",
1432 		.data		= &sysctl_memory_failure_early_kill,
1433 		.maxlen		= sizeof(sysctl_memory_failure_early_kill),
1434 		.mode		= 0644,
1435 		.proc_handler	= proc_dointvec_minmax,
1436 		.extra1		= &zero,
1437 		.extra2		= &one,
1438 	},
1439 	{
1440 		.procname	= "memory_failure_recovery",
1441 		.data		= &sysctl_memory_failure_recovery,
1442 		.maxlen		= sizeof(sysctl_memory_failure_recovery),
1443 		.mode		= 0644,
1444 		.proc_handler	= proc_dointvec_minmax,
1445 		.extra1		= &zero,
1446 		.extra2		= &one,
1447 	},
1448 #endif
1449 	{
1450 		.procname	= "user_reserve_kbytes",
1451 		.data		= &sysctl_user_reserve_kbytes,
1452 		.maxlen		= sizeof(sysctl_user_reserve_kbytes),
1453 		.mode		= 0644,
1454 		.proc_handler	= proc_doulongvec_minmax,
1455 	},
1456 	{
1457 		.procname	= "admin_reserve_kbytes",
1458 		.data		= &sysctl_admin_reserve_kbytes,
1459 		.maxlen		= sizeof(sysctl_admin_reserve_kbytes),
1460 		.mode		= 0644,
1461 		.proc_handler	= proc_doulongvec_minmax,
1462 	},
1463 	{ }
1464 };
1465 
1466 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1467 static struct ctl_table binfmt_misc_table[] = {
1468 	{ }
1469 };
1470 #endif
1471 
1472 static struct ctl_table fs_table[] = {
1473 	{
1474 		.procname	= "inode-nr",
1475 		.data		= &inodes_stat,
1476 		.maxlen		= 2*sizeof(long),
1477 		.mode		= 0444,
1478 		.proc_handler	= proc_nr_inodes,
1479 	},
1480 	{
1481 		.procname	= "inode-state",
1482 		.data		= &inodes_stat,
1483 		.maxlen		= 7*sizeof(long),
1484 		.mode		= 0444,
1485 		.proc_handler	= proc_nr_inodes,
1486 	},
1487 	{
1488 		.procname	= "file-nr",
1489 		.data		= &files_stat,
1490 		.maxlen		= sizeof(files_stat),
1491 		.mode		= 0444,
1492 		.proc_handler	= proc_nr_files,
1493 	},
1494 	{
1495 		.procname	= "file-max",
1496 		.data		= &files_stat.max_files,
1497 		.maxlen		= sizeof(files_stat.max_files),
1498 		.mode		= 0644,
1499 		.proc_handler	= proc_doulongvec_minmax,
1500 	},
1501 	{
1502 		.procname	= "nr_open",
1503 		.data		= &sysctl_nr_open,
1504 		.maxlen		= sizeof(int),
1505 		.mode		= 0644,
1506 		.proc_handler	= proc_dointvec_minmax,
1507 		.extra1		= &sysctl_nr_open_min,
1508 		.extra2		= &sysctl_nr_open_max,
1509 	},
1510 	{
1511 		.procname	= "dentry-state",
1512 		.data		= &dentry_stat,
1513 		.maxlen		= 6*sizeof(long),
1514 		.mode		= 0444,
1515 		.proc_handler	= proc_nr_dentry,
1516 	},
1517 	{
1518 		.procname	= "overflowuid",
1519 		.data		= &fs_overflowuid,
1520 		.maxlen		= sizeof(int),
1521 		.mode		= 0644,
1522 		.proc_handler	= proc_dointvec_minmax,
1523 		.extra1		= &minolduid,
1524 		.extra2		= &maxolduid,
1525 	},
1526 	{
1527 		.procname	= "overflowgid",
1528 		.data		= &fs_overflowgid,
1529 		.maxlen		= sizeof(int),
1530 		.mode		= 0644,
1531 		.proc_handler	= proc_dointvec_minmax,
1532 		.extra1		= &minolduid,
1533 		.extra2		= &maxolduid,
1534 	},
1535 #ifdef CONFIG_FILE_LOCKING
1536 	{
1537 		.procname	= "leases-enable",
1538 		.data		= &leases_enable,
1539 		.maxlen		= sizeof(int),
1540 		.mode		= 0644,
1541 		.proc_handler	= proc_dointvec,
1542 	},
1543 #endif
1544 #ifdef CONFIG_DNOTIFY
1545 	{
1546 		.procname	= "dir-notify-enable",
1547 		.data		= &dir_notify_enable,
1548 		.maxlen		= sizeof(int),
1549 		.mode		= 0644,
1550 		.proc_handler	= proc_dointvec,
1551 	},
1552 #endif
1553 #ifdef CONFIG_MMU
1554 #ifdef CONFIG_FILE_LOCKING
1555 	{
1556 		.procname	= "lease-break-time",
1557 		.data		= &lease_break_time,
1558 		.maxlen		= sizeof(int),
1559 		.mode		= 0644,
1560 		.proc_handler	= proc_dointvec,
1561 	},
1562 #endif
1563 #ifdef CONFIG_AIO
1564 	{
1565 		.procname	= "aio-nr",
1566 		.data		= &aio_nr,
1567 		.maxlen		= sizeof(aio_nr),
1568 		.mode		= 0444,
1569 		.proc_handler	= proc_doulongvec_minmax,
1570 	},
1571 	{
1572 		.procname	= "aio-max-nr",
1573 		.data		= &aio_max_nr,
1574 		.maxlen		= sizeof(aio_max_nr),
1575 		.mode		= 0644,
1576 		.proc_handler	= proc_doulongvec_minmax,
1577 	},
1578 #endif /* CONFIG_AIO */
1579 #ifdef CONFIG_INOTIFY_USER
1580 	{
1581 		.procname	= "inotify",
1582 		.mode		= 0555,
1583 		.child		= inotify_table,
1584 	},
1585 #endif
1586 #ifdef CONFIG_EPOLL
1587 	{
1588 		.procname	= "epoll",
1589 		.mode		= 0555,
1590 		.child		= epoll_table,
1591 	},
1592 #endif
1593 #endif
1594 	{
1595 		.procname	= "protected_symlinks",
1596 		.data		= &sysctl_protected_symlinks,
1597 		.maxlen		= sizeof(int),
1598 		.mode		= 0600,
1599 		.proc_handler	= proc_dointvec_minmax,
1600 		.extra1		= &zero,
1601 		.extra2		= &one,
1602 	},
1603 	{
1604 		.procname	= "protected_hardlinks",
1605 		.data		= &sysctl_protected_hardlinks,
1606 		.maxlen		= sizeof(int),
1607 		.mode		= 0600,
1608 		.proc_handler	= proc_dointvec_minmax,
1609 		.extra1		= &zero,
1610 		.extra2		= &one,
1611 	},
1612 	{
1613 		.procname	= "suid_dumpable",
1614 		.data		= &suid_dumpable,
1615 		.maxlen		= sizeof(int),
1616 		.mode		= 0644,
1617 		.proc_handler	= proc_dointvec_minmax_coredump,
1618 		.extra1		= &zero,
1619 		.extra2		= &two,
1620 	},
1621 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1622 	{
1623 		.procname	= "binfmt_misc",
1624 		.mode		= 0555,
1625 		.child		= binfmt_misc_table,
1626 	},
1627 #endif
1628 	{
1629 		.procname	= "pipe-max-size",
1630 		.data		= &pipe_max_size,
1631 		.maxlen		= sizeof(int),
1632 		.mode		= 0644,
1633 		.proc_handler	= &pipe_proc_fn,
1634 		.extra1		= &pipe_min_size,
1635 	},
1636 	{ }
1637 };
1638 
1639 static struct ctl_table debug_table[] = {
1640 #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
1641 	{
1642 		.procname	= "exception-trace",
1643 		.data		= &show_unhandled_signals,
1644 		.maxlen		= sizeof(int),
1645 		.mode		= 0644,
1646 		.proc_handler	= proc_dointvec
1647 	},
1648 #endif
1649 #if defined(CONFIG_OPTPROBES)
1650 	{
1651 		.procname	= "kprobes-optimization",
1652 		.data		= &sysctl_kprobes_optimization,
1653 		.maxlen		= sizeof(int),
1654 		.mode		= 0644,
1655 		.proc_handler	= proc_kprobes_optimization_handler,
1656 		.extra1		= &zero,
1657 		.extra2		= &one,
1658 	},
1659 #endif
1660 	{ }
1661 };
1662 
1663 static struct ctl_table dev_table[] = {
1664 	{ }
1665 };
1666 
1667 int __init sysctl_init(void)
1668 {
1669 	struct ctl_table_header *hdr;
1670 
1671 	hdr = register_sysctl_table(sysctl_base_table);
1672 	kmemleak_not_leak(hdr);
1673 	return 0;
1674 }
1675 
1676 #endif /* CONFIG_SYSCTL */
1677 
1678 /*
1679  * /proc/sys support
1680  */
1681 
1682 #ifdef CONFIG_PROC_SYSCTL
1683 
1684 static int _proc_do_string(void* data, int maxlen, int write,
1685 			   void __user *buffer,
1686 			   size_t *lenp, loff_t *ppos)
1687 {
1688 	size_t len;
1689 	char __user *p;
1690 	char c;
1691 
1692 	if (!data || !maxlen || !*lenp) {
1693 		*lenp = 0;
1694 		return 0;
1695 	}
1696 
1697 	if (write) {
1698 		len = 0;
1699 		p = buffer;
1700 		while (len < *lenp) {
1701 			if (get_user(c, p++))
1702 				return -EFAULT;
1703 			if (c == 0 || c == '\n')
1704 				break;
1705 			len++;
1706 		}
1707 		if (len >= maxlen)
1708 			len = maxlen-1;
1709 		if(copy_from_user(data, buffer, len))
1710 			return -EFAULT;
1711 		((char *) data)[len] = 0;
1712 		*ppos += *lenp;
1713 	} else {
1714 		len = strlen(data);
1715 		if (len > maxlen)
1716 			len = maxlen;
1717 
1718 		if (*ppos > len) {
1719 			*lenp = 0;
1720 			return 0;
1721 		}
1722 
1723 		data += *ppos;
1724 		len  -= *ppos;
1725 
1726 		if (len > *lenp)
1727 			len = *lenp;
1728 		if (len)
1729 			if(copy_to_user(buffer, data, len))
1730 				return -EFAULT;
1731 		if (len < *lenp) {
1732 			if(put_user('\n', ((char __user *) buffer) + len))
1733 				return -EFAULT;
1734 			len++;
1735 		}
1736 		*lenp = len;
1737 		*ppos += len;
1738 	}
1739 	return 0;
1740 }
1741 
1742 /**
1743  * proc_dostring - read a string sysctl
1744  * @table: the sysctl table
1745  * @write: %TRUE if this is a write to the sysctl file
1746  * @buffer: the user buffer
1747  * @lenp: the size of the user buffer
1748  * @ppos: file position
1749  *
1750  * Reads/writes a string from/to the user buffer. If the kernel
1751  * buffer provided is not large enough to hold the string, the
1752  * string is truncated. The copied string is %NULL-terminated.
1753  * If the string is being read by the user process, it is copied
1754  * and a newline '\n' is added. It is truncated if the buffer is
1755  * not large enough.
1756  *
1757  * Returns 0 on success.
1758  */
1759 int proc_dostring(struct ctl_table *table, int write,
1760 		  void __user *buffer, size_t *lenp, loff_t *ppos)
1761 {
1762 	return _proc_do_string(table->data, table->maxlen, write,
1763 			       buffer, lenp, ppos);
1764 }
1765 
1766 static size_t proc_skip_spaces(char **buf)
1767 {
1768 	size_t ret;
1769 	char *tmp = skip_spaces(*buf);
1770 	ret = tmp - *buf;
1771 	*buf = tmp;
1772 	return ret;
1773 }
1774 
1775 static void proc_skip_char(char **buf, size_t *size, const char v)
1776 {
1777 	while (*size) {
1778 		if (**buf != v)
1779 			break;
1780 		(*size)--;
1781 		(*buf)++;
1782 	}
1783 }
1784 
1785 #define TMPBUFLEN 22
1786 /**
1787  * proc_get_long - reads an ASCII formatted integer from a user buffer
1788  *
1789  * @buf: a kernel buffer
1790  * @size: size of the kernel buffer
1791  * @val: this is where the number will be stored
1792  * @neg: set to %TRUE if number is negative
1793  * @perm_tr: a vector which contains the allowed trailers
1794  * @perm_tr_len: size of the perm_tr vector
1795  * @tr: pointer to store the trailer character
1796  *
1797  * In case of success %0 is returned and @buf and @size are updated with
1798  * the amount of bytes read. If @tr is non-NULL and a trailing
1799  * character exists (size is non-zero after returning from this
1800  * function), @tr is updated with the trailing character.
1801  */
1802 static int proc_get_long(char **buf, size_t *size,
1803 			  unsigned long *val, bool *neg,
1804 			  const char *perm_tr, unsigned perm_tr_len, char *tr)
1805 {
1806 	int len;
1807 	char *p, tmp[TMPBUFLEN];
1808 
1809 	if (!*size)
1810 		return -EINVAL;
1811 
1812 	len = *size;
1813 	if (len > TMPBUFLEN - 1)
1814 		len = TMPBUFLEN - 1;
1815 
1816 	memcpy(tmp, *buf, len);
1817 
1818 	tmp[len] = 0;
1819 	p = tmp;
1820 	if (*p == '-' && *size > 1) {
1821 		*neg = true;
1822 		p++;
1823 	} else
1824 		*neg = false;
1825 	if (!isdigit(*p))
1826 		return -EINVAL;
1827 
1828 	*val = simple_strtoul(p, &p, 0);
1829 
1830 	len = p - tmp;
1831 
1832 	/* We don't know if the next char is whitespace thus we may accept
1833 	 * invalid integers (e.g. 1234...a) or two integers instead of one
1834 	 * (e.g. 123...1). So lets not allow such large numbers. */
1835 	if (len == TMPBUFLEN - 1)
1836 		return -EINVAL;
1837 
1838 	if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
1839 		return -EINVAL;
1840 
1841 	if (tr && (len < *size))
1842 		*tr = *p;
1843 
1844 	*buf += len;
1845 	*size -= len;
1846 
1847 	return 0;
1848 }
1849 
1850 /**
1851  * proc_put_long - converts an integer to a decimal ASCII formatted string
1852  *
1853  * @buf: the user buffer
1854  * @size: the size of the user buffer
1855  * @val: the integer to be converted
1856  * @neg: sign of the number, %TRUE for negative
1857  *
1858  * In case of success %0 is returned and @buf and @size are updated with
1859  * the amount of bytes written.
1860  */
1861 static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
1862 			  bool neg)
1863 {
1864 	int len;
1865 	char tmp[TMPBUFLEN], *p = tmp;
1866 
1867 	sprintf(p, "%s%lu", neg ? "-" : "", val);
1868 	len = strlen(tmp);
1869 	if (len > *size)
1870 		len = *size;
1871 	if (copy_to_user(*buf, tmp, len))
1872 		return -EFAULT;
1873 	*size -= len;
1874 	*buf += len;
1875 	return 0;
1876 }
1877 #undef TMPBUFLEN
1878 
1879 static int proc_put_char(void __user **buf, size_t *size, char c)
1880 {
1881 	if (*size) {
1882 		char __user **buffer = (char __user **)buf;
1883 		if (put_user(c, *buffer))
1884 			return -EFAULT;
1885 		(*size)--, (*buffer)++;
1886 		*buf = *buffer;
1887 	}
1888 	return 0;
1889 }
1890 
1891 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
1892 				 int *valp,
1893 				 int write, void *data)
1894 {
1895 	if (write) {
1896 		*valp = *negp ? -*lvalp : *lvalp;
1897 	} else {
1898 		int val = *valp;
1899 		if (val < 0) {
1900 			*negp = true;
1901 			*lvalp = (unsigned long)-val;
1902 		} else {
1903 			*negp = false;
1904 			*lvalp = (unsigned long)val;
1905 		}
1906 	}
1907 	return 0;
1908 }
1909 
1910 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
1911 
1912 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
1913 		  int write, void __user *buffer,
1914 		  size_t *lenp, loff_t *ppos,
1915 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
1916 			      int write, void *data),
1917 		  void *data)
1918 {
1919 	int *i, vleft, first = 1, err = 0;
1920 	unsigned long page = 0;
1921 	size_t left;
1922 	char *kbuf;
1923 
1924 	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
1925 		*lenp = 0;
1926 		return 0;
1927 	}
1928 
1929 	i = (int *) tbl_data;
1930 	vleft = table->maxlen / sizeof(*i);
1931 	left = *lenp;
1932 
1933 	if (!conv)
1934 		conv = do_proc_dointvec_conv;
1935 
1936 	if (write) {
1937 		if (left > PAGE_SIZE - 1)
1938 			left = PAGE_SIZE - 1;
1939 		page = __get_free_page(GFP_TEMPORARY);
1940 		kbuf = (char *) page;
1941 		if (!kbuf)
1942 			return -ENOMEM;
1943 		if (copy_from_user(kbuf, buffer, left)) {
1944 			err = -EFAULT;
1945 			goto free;
1946 		}
1947 		kbuf[left] = 0;
1948 	}
1949 
1950 	for (; left && vleft--; i++, first=0) {
1951 		unsigned long lval;
1952 		bool neg;
1953 
1954 		if (write) {
1955 			left -= proc_skip_spaces(&kbuf);
1956 
1957 			if (!left)
1958 				break;
1959 			err = proc_get_long(&kbuf, &left, &lval, &neg,
1960 					     proc_wspace_sep,
1961 					     sizeof(proc_wspace_sep), NULL);
1962 			if (err)
1963 				break;
1964 			if (conv(&neg, &lval, i, 1, data)) {
1965 				err = -EINVAL;
1966 				break;
1967 			}
1968 		} else {
1969 			if (conv(&neg, &lval, i, 0, data)) {
1970 				err = -EINVAL;
1971 				break;
1972 			}
1973 			if (!first)
1974 				err = proc_put_char(&buffer, &left, '\t');
1975 			if (err)
1976 				break;
1977 			err = proc_put_long(&buffer, &left, lval, neg);
1978 			if (err)
1979 				break;
1980 		}
1981 	}
1982 
1983 	if (!write && !first && left && !err)
1984 		err = proc_put_char(&buffer, &left, '\n');
1985 	if (write && !err && left)
1986 		left -= proc_skip_spaces(&kbuf);
1987 free:
1988 	if (write) {
1989 		free_page(page);
1990 		if (first)
1991 			return err ? : -EINVAL;
1992 	}
1993 	*lenp -= left;
1994 	*ppos += *lenp;
1995 	return err;
1996 }
1997 
1998 static int do_proc_dointvec(struct ctl_table *table, int write,
1999 		  void __user *buffer, size_t *lenp, loff_t *ppos,
2000 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2001 			      int write, void *data),
2002 		  void *data)
2003 {
2004 	return __do_proc_dointvec(table->data, table, write,
2005 			buffer, lenp, ppos, conv, data);
2006 }
2007 
2008 /**
2009  * proc_dointvec - read a vector of integers
2010  * @table: the sysctl table
2011  * @write: %TRUE if this is a write to the sysctl file
2012  * @buffer: the user buffer
2013  * @lenp: the size of the user buffer
2014  * @ppos: file position
2015  *
2016  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2017  * values from/to the user buffer, treated as an ASCII string.
2018  *
2019  * Returns 0 on success.
2020  */
2021 int proc_dointvec(struct ctl_table *table, int write,
2022 		     void __user *buffer, size_t *lenp, loff_t *ppos)
2023 {
2024     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2025 		    	    NULL,NULL);
2026 }
2027 
2028 /*
2029  * Taint values can only be increased
2030  * This means we can safely use a temporary.
2031  */
2032 static int proc_taint(struct ctl_table *table, int write,
2033 			       void __user *buffer, size_t *lenp, loff_t *ppos)
2034 {
2035 	struct ctl_table t;
2036 	unsigned long tmptaint = get_taint();
2037 	int err;
2038 
2039 	if (write && !capable(CAP_SYS_ADMIN))
2040 		return -EPERM;
2041 
2042 	t = *table;
2043 	t.data = &tmptaint;
2044 	err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2045 	if (err < 0)
2046 		return err;
2047 
2048 	if (write) {
2049 		/*
2050 		 * Poor man's atomic or. Not worth adding a primitive
2051 		 * to everyone's atomic.h for this
2052 		 */
2053 		int i;
2054 		for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2055 			if ((tmptaint >> i) & 1)
2056 				add_taint(i, LOCKDEP_STILL_OK);
2057 		}
2058 	}
2059 
2060 	return err;
2061 }
2062 
2063 #ifdef CONFIG_PRINTK
2064 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
2065 				void __user *buffer, size_t *lenp, loff_t *ppos)
2066 {
2067 	if (write && !capable(CAP_SYS_ADMIN))
2068 		return -EPERM;
2069 
2070 	return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2071 }
2072 #endif
2073 
2074 struct do_proc_dointvec_minmax_conv_param {
2075 	int *min;
2076 	int *max;
2077 };
2078 
2079 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2080 					int *valp,
2081 					int write, void *data)
2082 {
2083 	struct do_proc_dointvec_minmax_conv_param *param = data;
2084 	if (write) {
2085 		int val = *negp ? -*lvalp : *lvalp;
2086 		if ((param->min && *param->min > val) ||
2087 		    (param->max && *param->max < val))
2088 			return -EINVAL;
2089 		*valp = val;
2090 	} else {
2091 		int val = *valp;
2092 		if (val < 0) {
2093 			*negp = true;
2094 			*lvalp = (unsigned long)-val;
2095 		} else {
2096 			*negp = false;
2097 			*lvalp = (unsigned long)val;
2098 		}
2099 	}
2100 	return 0;
2101 }
2102 
2103 /**
2104  * proc_dointvec_minmax - read a vector of integers with min/max values
2105  * @table: the sysctl table
2106  * @write: %TRUE if this is a write to the sysctl file
2107  * @buffer: the user buffer
2108  * @lenp: the size of the user buffer
2109  * @ppos: file position
2110  *
2111  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2112  * values from/to the user buffer, treated as an ASCII string.
2113  *
2114  * This routine will ensure the values are within the range specified by
2115  * table->extra1 (min) and table->extra2 (max).
2116  *
2117  * Returns 0 on success.
2118  */
2119 int proc_dointvec_minmax(struct ctl_table *table, int write,
2120 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2121 {
2122 	struct do_proc_dointvec_minmax_conv_param param = {
2123 		.min = (int *) table->extra1,
2124 		.max = (int *) table->extra2,
2125 	};
2126 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
2127 				do_proc_dointvec_minmax_conv, &param);
2128 }
2129 
2130 static void validate_coredump_safety(void)
2131 {
2132 #ifdef CONFIG_COREDUMP
2133 	if (suid_dumpable == SUID_DUMP_ROOT &&
2134 	    core_pattern[0] != '/' && core_pattern[0] != '|') {
2135 		printk(KERN_WARNING "Unsafe core_pattern used with "\
2136 			"suid_dumpable=2. Pipe handler or fully qualified "\
2137 			"core dump path required.\n");
2138 	}
2139 #endif
2140 }
2141 
2142 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
2143 		void __user *buffer, size_t *lenp, loff_t *ppos)
2144 {
2145 	int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2146 	if (!error)
2147 		validate_coredump_safety();
2148 	return error;
2149 }
2150 
2151 #ifdef CONFIG_COREDUMP
2152 static int proc_dostring_coredump(struct ctl_table *table, int write,
2153 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2154 {
2155 	int error = proc_dostring(table, write, buffer, lenp, ppos);
2156 	if (!error)
2157 		validate_coredump_safety();
2158 	return error;
2159 }
2160 #endif
2161 
2162 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2163 				     void __user *buffer,
2164 				     size_t *lenp, loff_t *ppos,
2165 				     unsigned long convmul,
2166 				     unsigned long convdiv)
2167 {
2168 	unsigned long *i, *min, *max;
2169 	int vleft, first = 1, err = 0;
2170 	unsigned long page = 0;
2171 	size_t left;
2172 	char *kbuf;
2173 
2174 	if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
2175 		*lenp = 0;
2176 		return 0;
2177 	}
2178 
2179 	i = (unsigned long *) data;
2180 	min = (unsigned long *) table->extra1;
2181 	max = (unsigned long *) table->extra2;
2182 	vleft = table->maxlen / sizeof(unsigned long);
2183 	left = *lenp;
2184 
2185 	if (write) {
2186 		if (left > PAGE_SIZE - 1)
2187 			left = PAGE_SIZE - 1;
2188 		page = __get_free_page(GFP_TEMPORARY);
2189 		kbuf = (char *) page;
2190 		if (!kbuf)
2191 			return -ENOMEM;
2192 		if (copy_from_user(kbuf, buffer, left)) {
2193 			err = -EFAULT;
2194 			goto free;
2195 		}
2196 		kbuf[left] = 0;
2197 	}
2198 
2199 	for (; left && vleft--; i++, first = 0) {
2200 		unsigned long val;
2201 
2202 		if (write) {
2203 			bool neg;
2204 
2205 			left -= proc_skip_spaces(&kbuf);
2206 
2207 			err = proc_get_long(&kbuf, &left, &val, &neg,
2208 					     proc_wspace_sep,
2209 					     sizeof(proc_wspace_sep), NULL);
2210 			if (err)
2211 				break;
2212 			if (neg)
2213 				continue;
2214 			if ((min && val < *min) || (max && val > *max))
2215 				continue;
2216 			*i = val;
2217 		} else {
2218 			val = convdiv * (*i) / convmul;
2219 			if (!first) {
2220 				err = proc_put_char(&buffer, &left, '\t');
2221 				if (err)
2222 					break;
2223 			}
2224 			err = proc_put_long(&buffer, &left, val, false);
2225 			if (err)
2226 				break;
2227 		}
2228 	}
2229 
2230 	if (!write && !first && left && !err)
2231 		err = proc_put_char(&buffer, &left, '\n');
2232 	if (write && !err)
2233 		left -= proc_skip_spaces(&kbuf);
2234 free:
2235 	if (write) {
2236 		free_page(page);
2237 		if (first)
2238 			return err ? : -EINVAL;
2239 	}
2240 	*lenp -= left;
2241 	*ppos += *lenp;
2242 	return err;
2243 }
2244 
2245 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2246 				     void __user *buffer,
2247 				     size_t *lenp, loff_t *ppos,
2248 				     unsigned long convmul,
2249 				     unsigned long convdiv)
2250 {
2251 	return __do_proc_doulongvec_minmax(table->data, table, write,
2252 			buffer, lenp, ppos, convmul, convdiv);
2253 }
2254 
2255 /**
2256  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2257  * @table: the sysctl table
2258  * @write: %TRUE if this is a write to the sysctl file
2259  * @buffer: the user buffer
2260  * @lenp: the size of the user buffer
2261  * @ppos: file position
2262  *
2263  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2264  * values from/to the user buffer, treated as an ASCII string.
2265  *
2266  * This routine will ensure the values are within the range specified by
2267  * table->extra1 (min) and table->extra2 (max).
2268  *
2269  * Returns 0 on success.
2270  */
2271 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2272 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2273 {
2274     return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2275 }
2276 
2277 /**
2278  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2279  * @table: the sysctl table
2280  * @write: %TRUE if this is a write to the sysctl file
2281  * @buffer: the user buffer
2282  * @lenp: the size of the user buffer
2283  * @ppos: file position
2284  *
2285  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2286  * values from/to the user buffer, treated as an ASCII string. The values
2287  * are treated as milliseconds, and converted to jiffies when they are stored.
2288  *
2289  * This routine will ensure the values are within the range specified by
2290  * table->extra1 (min) and table->extra2 (max).
2291  *
2292  * Returns 0 on success.
2293  */
2294 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2295 				      void __user *buffer,
2296 				      size_t *lenp, loff_t *ppos)
2297 {
2298     return do_proc_doulongvec_minmax(table, write, buffer,
2299 				     lenp, ppos, HZ, 1000l);
2300 }
2301 
2302 
2303 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
2304 					 int *valp,
2305 					 int write, void *data)
2306 {
2307 	if (write) {
2308 		if (*lvalp > LONG_MAX / HZ)
2309 			return 1;
2310 		*valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2311 	} else {
2312 		int val = *valp;
2313 		unsigned long lval;
2314 		if (val < 0) {
2315 			*negp = true;
2316 			lval = (unsigned long)-val;
2317 		} else {
2318 			*negp = false;
2319 			lval = (unsigned long)val;
2320 		}
2321 		*lvalp = lval / HZ;
2322 	}
2323 	return 0;
2324 }
2325 
2326 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
2327 						int *valp,
2328 						int write, void *data)
2329 {
2330 	if (write) {
2331 		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2332 			return 1;
2333 		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2334 	} else {
2335 		int val = *valp;
2336 		unsigned long lval;
2337 		if (val < 0) {
2338 			*negp = true;
2339 			lval = (unsigned long)-val;
2340 		} else {
2341 			*negp = false;
2342 			lval = (unsigned long)val;
2343 		}
2344 		*lvalp = jiffies_to_clock_t(lval);
2345 	}
2346 	return 0;
2347 }
2348 
2349 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
2350 					    int *valp,
2351 					    int write, void *data)
2352 {
2353 	if (write) {
2354 		unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2355 
2356 		if (jif > INT_MAX)
2357 			return 1;
2358 		*valp = (int)jif;
2359 	} else {
2360 		int val = *valp;
2361 		unsigned long lval;
2362 		if (val < 0) {
2363 			*negp = true;
2364 			lval = (unsigned long)-val;
2365 		} else {
2366 			*negp = false;
2367 			lval = (unsigned long)val;
2368 		}
2369 		*lvalp = jiffies_to_msecs(lval);
2370 	}
2371 	return 0;
2372 }
2373 
2374 /**
2375  * proc_dointvec_jiffies - read a vector of integers as seconds
2376  * @table: the sysctl table
2377  * @write: %TRUE if this is a write to the sysctl file
2378  * @buffer: the user buffer
2379  * @lenp: the size of the user buffer
2380  * @ppos: file position
2381  *
2382  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2383  * values from/to the user buffer, treated as an ASCII string.
2384  * The values read are assumed to be in seconds, and are converted into
2385  * jiffies.
2386  *
2387  * Returns 0 on success.
2388  */
2389 int proc_dointvec_jiffies(struct ctl_table *table, int write,
2390 			  void __user *buffer, size_t *lenp, loff_t *ppos)
2391 {
2392     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2393 		    	    do_proc_dointvec_jiffies_conv,NULL);
2394 }
2395 
2396 /**
2397  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2398  * @table: the sysctl table
2399  * @write: %TRUE if this is a write to the sysctl file
2400  * @buffer: the user buffer
2401  * @lenp: the size of the user buffer
2402  * @ppos: pointer to the file position
2403  *
2404  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2405  * values from/to the user buffer, treated as an ASCII string.
2406  * The values read are assumed to be in 1/USER_HZ seconds, and
2407  * are converted into jiffies.
2408  *
2409  * Returns 0 on success.
2410  */
2411 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2412 				 void __user *buffer, size_t *lenp, loff_t *ppos)
2413 {
2414     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2415 		    	    do_proc_dointvec_userhz_jiffies_conv,NULL);
2416 }
2417 
2418 /**
2419  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2420  * @table: the sysctl table
2421  * @write: %TRUE if this is a write to the sysctl file
2422  * @buffer: the user buffer
2423  * @lenp: the size of the user buffer
2424  * @ppos: file position
2425  * @ppos: the current position in the file
2426  *
2427  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2428  * values from/to the user buffer, treated as an ASCII string.
2429  * The values read are assumed to be in 1/1000 seconds, and
2430  * are converted into jiffies.
2431  *
2432  * Returns 0 on success.
2433  */
2434 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2435 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2436 {
2437 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
2438 				do_proc_dointvec_ms_jiffies_conv, NULL);
2439 }
2440 
2441 static int proc_do_cad_pid(struct ctl_table *table, int write,
2442 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2443 {
2444 	struct pid *new_pid;
2445 	pid_t tmp;
2446 	int r;
2447 
2448 	tmp = pid_vnr(cad_pid);
2449 
2450 	r = __do_proc_dointvec(&tmp, table, write, buffer,
2451 			       lenp, ppos, NULL, NULL);
2452 	if (r || !write)
2453 		return r;
2454 
2455 	new_pid = find_get_pid(tmp);
2456 	if (!new_pid)
2457 		return -ESRCH;
2458 
2459 	put_pid(xchg(&cad_pid, new_pid));
2460 	return 0;
2461 }
2462 
2463 /**
2464  * proc_do_large_bitmap - read/write from/to a large bitmap
2465  * @table: the sysctl table
2466  * @write: %TRUE if this is a write to the sysctl file
2467  * @buffer: the user buffer
2468  * @lenp: the size of the user buffer
2469  * @ppos: file position
2470  *
2471  * The bitmap is stored at table->data and the bitmap length (in bits)
2472  * in table->maxlen.
2473  *
2474  * We use a range comma separated format (e.g. 1,3-4,10-10) so that
2475  * large bitmaps may be represented in a compact manner. Writing into
2476  * the file will clear the bitmap then update it with the given input.
2477  *
2478  * Returns 0 on success.
2479  */
2480 int proc_do_large_bitmap(struct ctl_table *table, int write,
2481 			 void __user *buffer, size_t *lenp, loff_t *ppos)
2482 {
2483 	int err = 0;
2484 	bool first = 1;
2485 	size_t left = *lenp;
2486 	unsigned long bitmap_len = table->maxlen;
2487 	unsigned long *bitmap = (unsigned long *) table->data;
2488 	unsigned long *tmp_bitmap = NULL;
2489 	char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
2490 
2491 	if (!bitmap_len || !left || (*ppos && !write)) {
2492 		*lenp = 0;
2493 		return 0;
2494 	}
2495 
2496 	if (write) {
2497 		unsigned long page = 0;
2498 		char *kbuf;
2499 
2500 		if (left > PAGE_SIZE - 1)
2501 			left = PAGE_SIZE - 1;
2502 
2503 		page = __get_free_page(GFP_TEMPORARY);
2504 		kbuf = (char *) page;
2505 		if (!kbuf)
2506 			return -ENOMEM;
2507 		if (copy_from_user(kbuf, buffer, left)) {
2508 			free_page(page);
2509 			return -EFAULT;
2510                 }
2511 		kbuf[left] = 0;
2512 
2513 		tmp_bitmap = kzalloc(BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long),
2514 				     GFP_KERNEL);
2515 		if (!tmp_bitmap) {
2516 			free_page(page);
2517 			return -ENOMEM;
2518 		}
2519 		proc_skip_char(&kbuf, &left, '\n');
2520 		while (!err && left) {
2521 			unsigned long val_a, val_b;
2522 			bool neg;
2523 
2524 			err = proc_get_long(&kbuf, &left, &val_a, &neg, tr_a,
2525 					     sizeof(tr_a), &c);
2526 			if (err)
2527 				break;
2528 			if (val_a >= bitmap_len || neg) {
2529 				err = -EINVAL;
2530 				break;
2531 			}
2532 
2533 			val_b = val_a;
2534 			if (left) {
2535 				kbuf++;
2536 				left--;
2537 			}
2538 
2539 			if (c == '-') {
2540 				err = proc_get_long(&kbuf, &left, &val_b,
2541 						     &neg, tr_b, sizeof(tr_b),
2542 						     &c);
2543 				if (err)
2544 					break;
2545 				if (val_b >= bitmap_len || neg ||
2546 				    val_a > val_b) {
2547 					err = -EINVAL;
2548 					break;
2549 				}
2550 				if (left) {
2551 					kbuf++;
2552 					left--;
2553 				}
2554 			}
2555 
2556 			bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
2557 			first = 0;
2558 			proc_skip_char(&kbuf, &left, '\n');
2559 		}
2560 		free_page(page);
2561 	} else {
2562 		unsigned long bit_a, bit_b = 0;
2563 
2564 		while (left) {
2565 			bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
2566 			if (bit_a >= bitmap_len)
2567 				break;
2568 			bit_b = find_next_zero_bit(bitmap, bitmap_len,
2569 						   bit_a + 1) - 1;
2570 
2571 			if (!first) {
2572 				err = proc_put_char(&buffer, &left, ',');
2573 				if (err)
2574 					break;
2575 			}
2576 			err = proc_put_long(&buffer, &left, bit_a, false);
2577 			if (err)
2578 				break;
2579 			if (bit_a != bit_b) {
2580 				err = proc_put_char(&buffer, &left, '-');
2581 				if (err)
2582 					break;
2583 				err = proc_put_long(&buffer, &left, bit_b, false);
2584 				if (err)
2585 					break;
2586 			}
2587 
2588 			first = 0; bit_b++;
2589 		}
2590 		if (!err)
2591 			err = proc_put_char(&buffer, &left, '\n');
2592 	}
2593 
2594 	if (!err) {
2595 		if (write) {
2596 			if (*ppos)
2597 				bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
2598 			else
2599 				bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
2600 		}
2601 		kfree(tmp_bitmap);
2602 		*lenp -= left;
2603 		*ppos += *lenp;
2604 		return 0;
2605 	} else {
2606 		kfree(tmp_bitmap);
2607 		return err;
2608 	}
2609 }
2610 
2611 #else /* CONFIG_PROC_SYSCTL */
2612 
2613 int proc_dostring(struct ctl_table *table, int write,
2614 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2615 {
2616 	return -ENOSYS;
2617 }
2618 
2619 int proc_dointvec(struct ctl_table *table, int write,
2620 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2621 {
2622 	return -ENOSYS;
2623 }
2624 
2625 int proc_dointvec_minmax(struct ctl_table *table, int write,
2626 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2627 {
2628 	return -ENOSYS;
2629 }
2630 
2631 int proc_dointvec_jiffies(struct ctl_table *table, int write,
2632 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2633 {
2634 	return -ENOSYS;
2635 }
2636 
2637 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2638 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2639 {
2640 	return -ENOSYS;
2641 }
2642 
2643 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2644 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2645 {
2646 	return -ENOSYS;
2647 }
2648 
2649 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2650 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2651 {
2652 	return -ENOSYS;
2653 }
2654 
2655 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2656 				      void __user *buffer,
2657 				      size_t *lenp, loff_t *ppos)
2658 {
2659     return -ENOSYS;
2660 }
2661 
2662 
2663 #endif /* CONFIG_PROC_SYSCTL */
2664 
2665 /*
2666  * No sense putting this after each symbol definition, twice,
2667  * exception granted :-)
2668  */
2669 EXPORT_SYMBOL(proc_dointvec);
2670 EXPORT_SYMBOL(proc_dointvec_jiffies);
2671 EXPORT_SYMBOL(proc_dointvec_minmax);
2672 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2673 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2674 EXPORT_SYMBOL(proc_dostring);
2675 EXPORT_SYMBOL(proc_doulongvec_minmax);
2676 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2677