xref: /linux-6.15/kernel/sysctl.c (revision c4399016)
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20 
21 #include <linux/module.h>
22 #include <linux/mm.h>
23 #include <linux/swap.h>
24 #include <linux/slab.h>
25 #include <linux/sysctl.h>
26 #include <linux/proc_fs.h>
27 #include <linux/capability.h>
28 #include <linux/ctype.h>
29 #include <linux/utsname.h>
30 #include <linux/capability.h>
31 #include <linux/smp_lock.h>
32 #include <linux/init.h>
33 #include <linux/kernel.h>
34 #include <linux/kobject.h>
35 #include <linux/net.h>
36 #include <linux/sysrq.h>
37 #include <linux/highuid.h>
38 #include <linux/writeback.h>
39 #include <linux/hugetlb.h>
40 #include <linux/security.h>
41 #include <linux/initrd.h>
42 #include <linux/times.h>
43 #include <linux/limits.h>
44 #include <linux/dcache.h>
45 #include <linux/syscalls.h>
46 #include <linux/nfs_fs.h>
47 #include <linux/acpi.h>
48 
49 #include <asm/uaccess.h>
50 #include <asm/processor.h>
51 
52 extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
53                      void __user *buffer, size_t *lenp, loff_t *ppos);
54 
55 #ifdef CONFIG_X86
56 #include <asm/nmi.h>
57 #include <asm/stacktrace.h>
58 #endif
59 
60 #if defined(CONFIG_SYSCTL)
61 
62 /* External variables not in a header file. */
63 extern int C_A_D;
64 extern int sysctl_overcommit_memory;
65 extern int sysctl_overcommit_ratio;
66 extern int sysctl_panic_on_oom;
67 extern int max_threads;
68 extern int core_uses_pid;
69 extern int suid_dumpable;
70 extern char core_pattern[];
71 extern int pid_max;
72 extern int min_free_kbytes;
73 extern int printk_ratelimit_jiffies;
74 extern int printk_ratelimit_burst;
75 extern int pid_max_min, pid_max_max;
76 extern int sysctl_drop_caches;
77 extern int percpu_pagelist_fraction;
78 extern int compat_log;
79 extern int maps_protect;
80 extern int sysctl_stat_interval;
81 
82 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
83 static int maxolduid = 65535;
84 static int minolduid;
85 static int min_percpu_pagelist_fract = 8;
86 
87 static int ngroups_max = NGROUPS_MAX;
88 
89 #ifdef CONFIG_KMOD
90 extern char modprobe_path[];
91 #endif
92 #ifdef CONFIG_CHR_DEV_SG
93 extern int sg_big_buff;
94 #endif
95 
96 #ifdef __sparc__
97 extern char reboot_command [];
98 extern int stop_a_enabled;
99 extern int scons_pwroff;
100 #endif
101 
102 #ifdef __hppa__
103 extern int pwrsw_enabled;
104 extern int unaligned_enabled;
105 #endif
106 
107 #ifdef CONFIG_S390
108 #ifdef CONFIG_MATHEMU
109 extern int sysctl_ieee_emulation_warnings;
110 #endif
111 extern int sysctl_userprocess_debug;
112 extern int spin_retry;
113 #endif
114 
115 extern int sysctl_hz_timer;
116 
117 #ifdef CONFIG_BSD_PROCESS_ACCT
118 extern int acct_parm[];
119 #endif
120 
121 #ifdef CONFIG_IA64
122 extern int no_unaligned_warning;
123 #endif
124 
125 #ifdef CONFIG_RT_MUTEXES
126 extern int max_lock_depth;
127 #endif
128 
129 #ifdef CONFIG_SYSCTL_SYSCALL
130 static int parse_table(int __user *, int, void __user *, size_t __user *,
131 		void __user *, size_t, ctl_table *);
132 #endif
133 
134 
135 #ifdef CONFIG_PROC_SYSCTL
136 static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
137 		  void __user *buffer, size_t *lenp, loff_t *ppos);
138 static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp,
139 			       void __user *buffer, size_t *lenp, loff_t *ppos);
140 #endif
141 
142 static ctl_table root_table[];
143 static struct ctl_table_header root_table_header =
144 	{ root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) };
145 
146 static ctl_table kern_table[];
147 static ctl_table vm_table[];
148 static ctl_table fs_table[];
149 static ctl_table debug_table[];
150 static ctl_table dev_table[];
151 extern ctl_table random_table[];
152 #ifdef CONFIG_UNIX98_PTYS
153 extern ctl_table pty_table[];
154 #endif
155 #ifdef CONFIG_INOTIFY_USER
156 extern ctl_table inotify_table[];
157 #endif
158 
159 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
160 int sysctl_legacy_va_layout;
161 #endif
162 
163 
164 /* The default sysctl tables: */
165 
166 static ctl_table root_table[] = {
167 	{
168 		.ctl_name	= CTL_KERN,
169 		.procname	= "kernel",
170 		.mode		= 0555,
171 		.child		= kern_table,
172 	},
173 	{
174 		.ctl_name	= CTL_VM,
175 		.procname	= "vm",
176 		.mode		= 0555,
177 		.child		= vm_table,
178 	},
179 #ifdef CONFIG_NET
180 	{
181 		.ctl_name	= CTL_NET,
182 		.procname	= "net",
183 		.mode		= 0555,
184 		.child		= net_table,
185 	},
186 #endif
187 	{
188 		.ctl_name	= CTL_FS,
189 		.procname	= "fs",
190 		.mode		= 0555,
191 		.child		= fs_table,
192 	},
193 	{
194 		.ctl_name	= CTL_DEBUG,
195 		.procname	= "debug",
196 		.mode		= 0555,
197 		.child		= debug_table,
198 	},
199 	{
200 		.ctl_name	= CTL_DEV,
201 		.procname	= "dev",
202 		.mode		= 0555,
203 		.child		= dev_table,
204 	},
205 
206 	{ .ctl_name = 0 }
207 };
208 
209 #ifdef CONFIG_SCHED_DEBUG
210 static unsigned long min_sched_granularity_ns = 100000;		/* 100 usecs */
211 static unsigned long max_sched_granularity_ns = 1000000000;	/* 1 second */
212 static unsigned long min_wakeup_granularity_ns;			/* 0 usecs */
213 static unsigned long max_wakeup_granularity_ns = 1000000000;	/* 1 second */
214 #endif
215 
216 static ctl_table kern_table[] = {
217 #ifdef CONFIG_SCHED_DEBUG
218 	{
219 		.ctl_name	= CTL_UNNUMBERED,
220 		.procname	= "sched_granularity_ns",
221 		.data		= &sysctl_sched_granularity,
222 		.maxlen		= sizeof(unsigned int),
223 		.mode		= 0644,
224 		.proc_handler	= &proc_dointvec_minmax,
225 		.strategy	= &sysctl_intvec,
226 		.extra1		= &min_sched_granularity_ns,
227 		.extra2		= &max_sched_granularity_ns,
228 	},
229 	{
230 		.ctl_name	= CTL_UNNUMBERED,
231 		.procname	= "sched_wakeup_granularity_ns",
232 		.data		= &sysctl_sched_wakeup_granularity,
233 		.maxlen		= sizeof(unsigned int),
234 		.mode		= 0644,
235 		.proc_handler	= &proc_dointvec_minmax,
236 		.strategy	= &sysctl_intvec,
237 		.extra1		= &min_wakeup_granularity_ns,
238 		.extra2		= &max_wakeup_granularity_ns,
239 	},
240 	{
241 		.ctl_name	= CTL_UNNUMBERED,
242 		.procname	= "sched_batch_wakeup_granularity_ns",
243 		.data		= &sysctl_sched_batch_wakeup_granularity,
244 		.maxlen		= sizeof(unsigned int),
245 		.mode		= 0644,
246 		.proc_handler	= &proc_dointvec_minmax,
247 		.strategy	= &sysctl_intvec,
248 		.extra1		= &min_wakeup_granularity_ns,
249 		.extra2		= &max_wakeup_granularity_ns,
250 	},
251 	{
252 		.ctl_name	= CTL_UNNUMBERED,
253 		.procname	= "sched_stat_granularity_ns",
254 		.data		= &sysctl_sched_stat_granularity,
255 		.maxlen		= sizeof(unsigned int),
256 		.mode		= 0644,
257 		.proc_handler	= &proc_dointvec_minmax,
258 		.strategy	= &sysctl_intvec,
259 		.extra1		= &min_wakeup_granularity_ns,
260 		.extra2		= &max_wakeup_granularity_ns,
261 	},
262 	{
263 		.ctl_name	= CTL_UNNUMBERED,
264 		.procname	= "sched_runtime_limit_ns",
265 		.data		= &sysctl_sched_runtime_limit,
266 		.maxlen		= sizeof(unsigned int),
267 		.mode		= 0644,
268 		.proc_handler	= &proc_dointvec_minmax,
269 		.strategy	= &sysctl_intvec,
270 		.extra1		= &min_sched_granularity_ns,
271 		.extra2		= &max_sched_granularity_ns,
272 	},
273 	{
274 		.ctl_name	= CTL_UNNUMBERED,
275 		.procname	= "sched_child_runs_first",
276 		.data		= &sysctl_sched_child_runs_first,
277 		.maxlen		= sizeof(unsigned int),
278 		.mode		= 0644,
279 		.proc_handler	= &proc_dointvec,
280 	},
281 	{
282 		.ctl_name	= CTL_UNNUMBERED,
283 		.procname	= "sched_features",
284 		.data		= &sysctl_sched_features,
285 		.maxlen		= sizeof(unsigned int),
286 		.mode		= 0644,
287 		.proc_handler	= &proc_dointvec,
288 	},
289 #endif
290 	{
291 		.ctl_name	= KERN_PANIC,
292 		.procname	= "panic",
293 		.data		= &panic_timeout,
294 		.maxlen		= sizeof(int),
295 		.mode		= 0644,
296 		.proc_handler	= &proc_dointvec,
297 	},
298 	{
299 		.ctl_name	= KERN_CORE_USES_PID,
300 		.procname	= "core_uses_pid",
301 		.data		= &core_uses_pid,
302 		.maxlen		= sizeof(int),
303 		.mode		= 0644,
304 		.proc_handler	= &proc_dointvec,
305 	},
306 	{
307 		.ctl_name	= KERN_CORE_PATTERN,
308 		.procname	= "core_pattern",
309 		.data		= core_pattern,
310 		.maxlen		= CORENAME_MAX_SIZE,
311 		.mode		= 0644,
312 		.proc_handler	= &proc_dostring,
313 		.strategy	= &sysctl_string,
314 	},
315 #ifdef CONFIG_PROC_SYSCTL
316 	{
317 		.ctl_name	= KERN_TAINTED,
318 		.procname	= "tainted",
319 		.data		= &tainted,
320 		.maxlen		= sizeof(int),
321 		.mode		= 0644,
322 		.proc_handler	= &proc_dointvec_taint,
323 	},
324 #endif
325 	{
326 		.ctl_name	= KERN_CAP_BSET,
327 		.procname	= "cap-bound",
328 		.data		= &cap_bset,
329 		.maxlen		= sizeof(kernel_cap_t),
330 		.mode		= 0600,
331 		.proc_handler	= &proc_dointvec_bset,
332 	},
333 #ifdef CONFIG_BLK_DEV_INITRD
334 	{
335 		.ctl_name	= KERN_REALROOTDEV,
336 		.procname	= "real-root-dev",
337 		.data		= &real_root_dev,
338 		.maxlen		= sizeof(int),
339 		.mode		= 0644,
340 		.proc_handler	= &proc_dointvec,
341 	},
342 #endif
343 #ifdef __sparc__
344 	{
345 		.ctl_name	= KERN_SPARC_REBOOT,
346 		.procname	= "reboot-cmd",
347 		.data		= reboot_command,
348 		.maxlen		= 256,
349 		.mode		= 0644,
350 		.proc_handler	= &proc_dostring,
351 		.strategy	= &sysctl_string,
352 	},
353 	{
354 		.ctl_name	= KERN_SPARC_STOP_A,
355 		.procname	= "stop-a",
356 		.data		= &stop_a_enabled,
357 		.maxlen		= sizeof (int),
358 		.mode		= 0644,
359 		.proc_handler	= &proc_dointvec,
360 	},
361 	{
362 		.ctl_name	= KERN_SPARC_SCONS_PWROFF,
363 		.procname	= "scons-poweroff",
364 		.data		= &scons_pwroff,
365 		.maxlen		= sizeof (int),
366 		.mode		= 0644,
367 		.proc_handler	= &proc_dointvec,
368 	},
369 #endif
370 #ifdef __hppa__
371 	{
372 		.ctl_name	= KERN_HPPA_PWRSW,
373 		.procname	= "soft-power",
374 		.data		= &pwrsw_enabled,
375 		.maxlen		= sizeof (int),
376 	 	.mode		= 0644,
377 		.proc_handler	= &proc_dointvec,
378 	},
379 	{
380 		.ctl_name	= KERN_HPPA_UNALIGNED,
381 		.procname	= "unaligned-trap",
382 		.data		= &unaligned_enabled,
383 		.maxlen		= sizeof (int),
384 		.mode		= 0644,
385 		.proc_handler	= &proc_dointvec,
386 	},
387 #endif
388 	{
389 		.ctl_name	= KERN_CTLALTDEL,
390 		.procname	= "ctrl-alt-del",
391 		.data		= &C_A_D,
392 		.maxlen		= sizeof(int),
393 		.mode		= 0644,
394 		.proc_handler	= &proc_dointvec,
395 	},
396 	{
397 		.ctl_name	= KERN_PRINTK,
398 		.procname	= "printk",
399 		.data		= &console_loglevel,
400 		.maxlen		= 4*sizeof(int),
401 		.mode		= 0644,
402 		.proc_handler	= &proc_dointvec,
403 	},
404 #ifdef CONFIG_KMOD
405 	{
406 		.ctl_name	= KERN_MODPROBE,
407 		.procname	= "modprobe",
408 		.data		= &modprobe_path,
409 		.maxlen		= KMOD_PATH_LEN,
410 		.mode		= 0644,
411 		.proc_handler	= &proc_dostring,
412 		.strategy	= &sysctl_string,
413 	},
414 #endif
415 #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
416 	{
417 		.ctl_name	= KERN_HOTPLUG,
418 		.procname	= "hotplug",
419 		.data		= &uevent_helper,
420 		.maxlen		= UEVENT_HELPER_PATH_LEN,
421 		.mode		= 0644,
422 		.proc_handler	= &proc_dostring,
423 		.strategy	= &sysctl_string,
424 	},
425 #endif
426 #ifdef CONFIG_CHR_DEV_SG
427 	{
428 		.ctl_name	= KERN_SG_BIG_BUFF,
429 		.procname	= "sg-big-buff",
430 		.data		= &sg_big_buff,
431 		.maxlen		= sizeof (int),
432 		.mode		= 0444,
433 		.proc_handler	= &proc_dointvec,
434 	},
435 #endif
436 #ifdef CONFIG_BSD_PROCESS_ACCT
437 	{
438 		.ctl_name	= KERN_ACCT,
439 		.procname	= "acct",
440 		.data		= &acct_parm,
441 		.maxlen		= 3*sizeof(int),
442 		.mode		= 0644,
443 		.proc_handler	= &proc_dointvec,
444 	},
445 #endif
446 #ifdef CONFIG_MAGIC_SYSRQ
447 	{
448 		.ctl_name	= KERN_SYSRQ,
449 		.procname	= "sysrq",
450 		.data		= &__sysrq_enabled,
451 		.maxlen		= sizeof (int),
452 		.mode		= 0644,
453 		.proc_handler	= &proc_dointvec,
454 	},
455 #endif
456 #ifdef CONFIG_PROC_SYSCTL
457 	{
458 		.ctl_name	= KERN_CADPID,
459 		.procname	= "cad_pid",
460 		.data		= NULL,
461 		.maxlen		= sizeof (int),
462 		.mode		= 0600,
463 		.proc_handler	= &proc_do_cad_pid,
464 	},
465 #endif
466 	{
467 		.ctl_name	= KERN_MAX_THREADS,
468 		.procname	= "threads-max",
469 		.data		= &max_threads,
470 		.maxlen		= sizeof(int),
471 		.mode		= 0644,
472 		.proc_handler	= &proc_dointvec,
473 	},
474 	{
475 		.ctl_name	= KERN_RANDOM,
476 		.procname	= "random",
477 		.mode		= 0555,
478 		.child		= random_table,
479 	},
480 #ifdef CONFIG_UNIX98_PTYS
481 	{
482 		.ctl_name	= KERN_PTY,
483 		.procname	= "pty",
484 		.mode		= 0555,
485 		.child		= pty_table,
486 	},
487 #endif
488 	{
489 		.ctl_name	= KERN_OVERFLOWUID,
490 		.procname	= "overflowuid",
491 		.data		= &overflowuid,
492 		.maxlen		= sizeof(int),
493 		.mode		= 0644,
494 		.proc_handler	= &proc_dointvec_minmax,
495 		.strategy	= &sysctl_intvec,
496 		.extra1		= &minolduid,
497 		.extra2		= &maxolduid,
498 	},
499 	{
500 		.ctl_name	= KERN_OVERFLOWGID,
501 		.procname	= "overflowgid",
502 		.data		= &overflowgid,
503 		.maxlen		= sizeof(int),
504 		.mode		= 0644,
505 		.proc_handler	= &proc_dointvec_minmax,
506 		.strategy	= &sysctl_intvec,
507 		.extra1		= &minolduid,
508 		.extra2		= &maxolduid,
509 	},
510 #ifdef CONFIG_S390
511 #ifdef CONFIG_MATHEMU
512 	{
513 		.ctl_name	= KERN_IEEE_EMULATION_WARNINGS,
514 		.procname	= "ieee_emulation_warnings",
515 		.data		= &sysctl_ieee_emulation_warnings,
516 		.maxlen		= sizeof(int),
517 		.mode		= 0644,
518 		.proc_handler	= &proc_dointvec,
519 	},
520 #endif
521 #ifdef CONFIG_NO_IDLE_HZ
522 	{
523 		.ctl_name       = KERN_HZ_TIMER,
524 		.procname       = "hz_timer",
525 		.data           = &sysctl_hz_timer,
526 		.maxlen         = sizeof(int),
527 		.mode           = 0644,
528 		.proc_handler   = &proc_dointvec,
529 	},
530 #endif
531 	{
532 		.ctl_name	= KERN_S390_USER_DEBUG_LOGGING,
533 		.procname	= "userprocess_debug",
534 		.data		= &sysctl_userprocess_debug,
535 		.maxlen		= sizeof(int),
536 		.mode		= 0644,
537 		.proc_handler	= &proc_dointvec,
538 	},
539 #endif
540 	{
541 		.ctl_name	= KERN_PIDMAX,
542 		.procname	= "pid_max",
543 		.data		= &pid_max,
544 		.maxlen		= sizeof (int),
545 		.mode		= 0644,
546 		.proc_handler	= &proc_dointvec_minmax,
547 		.strategy	= sysctl_intvec,
548 		.extra1		= &pid_max_min,
549 		.extra2		= &pid_max_max,
550 	},
551 	{
552 		.ctl_name	= KERN_PANIC_ON_OOPS,
553 		.procname	= "panic_on_oops",
554 		.data		= &panic_on_oops,
555 		.maxlen		= sizeof(int),
556 		.mode		= 0644,
557 		.proc_handler	= &proc_dointvec,
558 	},
559 	{
560 		.ctl_name	= KERN_PRINTK_RATELIMIT,
561 		.procname	= "printk_ratelimit",
562 		.data		= &printk_ratelimit_jiffies,
563 		.maxlen		= sizeof(int),
564 		.mode		= 0644,
565 		.proc_handler	= &proc_dointvec_jiffies,
566 		.strategy	= &sysctl_jiffies,
567 	},
568 	{
569 		.ctl_name	= KERN_PRINTK_RATELIMIT_BURST,
570 		.procname	= "printk_ratelimit_burst",
571 		.data		= &printk_ratelimit_burst,
572 		.maxlen		= sizeof(int),
573 		.mode		= 0644,
574 		.proc_handler	= &proc_dointvec,
575 	},
576 	{
577 		.ctl_name	= KERN_NGROUPS_MAX,
578 		.procname	= "ngroups_max",
579 		.data		= &ngroups_max,
580 		.maxlen		= sizeof (int),
581 		.mode		= 0444,
582 		.proc_handler	= &proc_dointvec,
583 	},
584 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
585 	{
586 		.ctl_name       = KERN_UNKNOWN_NMI_PANIC,
587 		.procname       = "unknown_nmi_panic",
588 		.data           = &unknown_nmi_panic,
589 		.maxlen         = sizeof (int),
590 		.mode           = 0644,
591 		.proc_handler   = &proc_dointvec,
592 	},
593 	{
594 		.ctl_name       = KERN_NMI_WATCHDOG,
595 		.procname       = "nmi_watchdog",
596 		.data           = &nmi_watchdog_enabled,
597 		.maxlen         = sizeof (int),
598 		.mode           = 0644,
599 		.proc_handler   = &proc_nmi_enabled,
600 	},
601 #endif
602 #if defined(CONFIG_X86)
603 	{
604 		.ctl_name	= KERN_PANIC_ON_NMI,
605 		.procname	= "panic_on_unrecovered_nmi",
606 		.data		= &panic_on_unrecovered_nmi,
607 		.maxlen		= sizeof(int),
608 		.mode		= 0644,
609 		.proc_handler	= &proc_dointvec,
610 	},
611 	{
612 		.ctl_name	= KERN_BOOTLOADER_TYPE,
613 		.procname	= "bootloader_type",
614 		.data		= &bootloader_type,
615 		.maxlen		= sizeof (int),
616 		.mode		= 0444,
617 		.proc_handler	= &proc_dointvec,
618 	},
619 	{
620 		.ctl_name	= CTL_UNNUMBERED,
621 		.procname	= "kstack_depth_to_print",
622 		.data		= &kstack_depth_to_print,
623 		.maxlen		= sizeof(int),
624 		.mode		= 0644,
625 		.proc_handler	= &proc_dointvec,
626 	},
627 #endif
628 #if defined(CONFIG_MMU)
629 	{
630 		.ctl_name	= KERN_RANDOMIZE,
631 		.procname	= "randomize_va_space",
632 		.data		= &randomize_va_space,
633 		.maxlen		= sizeof(int),
634 		.mode		= 0644,
635 		.proc_handler	= &proc_dointvec,
636 	},
637 #endif
638 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
639 	{
640 		.ctl_name	= KERN_SPIN_RETRY,
641 		.procname	= "spin_retry",
642 		.data		= &spin_retry,
643 		.maxlen		= sizeof (int),
644 		.mode		= 0644,
645 		.proc_handler	= &proc_dointvec,
646 	},
647 #endif
648 #ifdef CONFIG_ACPI_SLEEP
649 	{
650 		.ctl_name	= KERN_ACPI_VIDEO_FLAGS,
651 		.procname	= "acpi_video_flags",
652 		.data		= &acpi_video_flags,
653 		.maxlen		= sizeof (unsigned long),
654 		.mode		= 0644,
655 		.proc_handler	= &proc_doulongvec_minmax,
656 	},
657 #endif
658 #ifdef CONFIG_IA64
659 	{
660 		.ctl_name	= KERN_IA64_UNALIGNED,
661 		.procname	= "ignore-unaligned-usertrap",
662 		.data		= &no_unaligned_warning,
663 		.maxlen		= sizeof (int),
664 	 	.mode		= 0644,
665 		.proc_handler	= &proc_dointvec,
666 	},
667 #endif
668 #ifdef CONFIG_COMPAT
669 	{
670 		.ctl_name	= KERN_COMPAT_LOG,
671 		.procname	= "compat-log",
672 		.data		= &compat_log,
673 		.maxlen		= sizeof (int),
674 	 	.mode		= 0644,
675 		.proc_handler	= &proc_dointvec,
676 	},
677 #endif
678 #ifdef CONFIG_RT_MUTEXES
679 	{
680 		.ctl_name	= KERN_MAX_LOCK_DEPTH,
681 		.procname	= "max_lock_depth",
682 		.data		= &max_lock_depth,
683 		.maxlen		= sizeof(int),
684 		.mode		= 0644,
685 		.proc_handler	= &proc_dointvec,
686 	},
687 #endif
688 #ifdef CONFIG_PROC_FS
689 	{
690 		.ctl_name       = CTL_UNNUMBERED,
691 		.procname       = "maps_protect",
692 		.data           = &maps_protect,
693 		.maxlen         = sizeof(int),
694 		.mode           = 0644,
695 		.proc_handler   = &proc_dointvec,
696 	},
697 #endif
698 
699 	{ .ctl_name = 0 }
700 };
701 
702 /* Constants for minimum and maximum testing in vm_table.
703    We use these as one-element integer vectors. */
704 static int zero;
705 static int one_hundred = 100;
706 
707 
708 static ctl_table vm_table[] = {
709 	{
710 		.ctl_name	= VM_OVERCOMMIT_MEMORY,
711 		.procname	= "overcommit_memory",
712 		.data		= &sysctl_overcommit_memory,
713 		.maxlen		= sizeof(sysctl_overcommit_memory),
714 		.mode		= 0644,
715 		.proc_handler	= &proc_dointvec,
716 	},
717 	{
718 		.ctl_name	= VM_PANIC_ON_OOM,
719 		.procname	= "panic_on_oom",
720 		.data		= &sysctl_panic_on_oom,
721 		.maxlen		= sizeof(sysctl_panic_on_oom),
722 		.mode		= 0644,
723 		.proc_handler	= &proc_dointvec,
724 	},
725 	{
726 		.ctl_name	= VM_OVERCOMMIT_RATIO,
727 		.procname	= "overcommit_ratio",
728 		.data		= &sysctl_overcommit_ratio,
729 		.maxlen		= sizeof(sysctl_overcommit_ratio),
730 		.mode		= 0644,
731 		.proc_handler	= &proc_dointvec,
732 	},
733 	{
734 		.ctl_name	= VM_PAGE_CLUSTER,
735 		.procname	= "page-cluster",
736 		.data		= &page_cluster,
737 		.maxlen		= sizeof(int),
738 		.mode		= 0644,
739 		.proc_handler	= &proc_dointvec,
740 	},
741 	{
742 		.ctl_name	= VM_DIRTY_BACKGROUND,
743 		.procname	= "dirty_background_ratio",
744 		.data		= &dirty_background_ratio,
745 		.maxlen		= sizeof(dirty_background_ratio),
746 		.mode		= 0644,
747 		.proc_handler	= &proc_dointvec_minmax,
748 		.strategy	= &sysctl_intvec,
749 		.extra1		= &zero,
750 		.extra2		= &one_hundred,
751 	},
752 	{
753 		.ctl_name	= VM_DIRTY_RATIO,
754 		.procname	= "dirty_ratio",
755 		.data		= &vm_dirty_ratio,
756 		.maxlen		= sizeof(vm_dirty_ratio),
757 		.mode		= 0644,
758 		.proc_handler	= &proc_dointvec_minmax,
759 		.strategy	= &sysctl_intvec,
760 		.extra1		= &zero,
761 		.extra2		= &one_hundred,
762 	},
763 	{
764 		.ctl_name	= VM_DIRTY_WB_CS,
765 		.procname	= "dirty_writeback_centisecs",
766 		.data		= &dirty_writeback_interval,
767 		.maxlen		= sizeof(dirty_writeback_interval),
768 		.mode		= 0644,
769 		.proc_handler	= &dirty_writeback_centisecs_handler,
770 	},
771 	{
772 		.ctl_name	= VM_DIRTY_EXPIRE_CS,
773 		.procname	= "dirty_expire_centisecs",
774 		.data		= &dirty_expire_interval,
775 		.maxlen		= sizeof(dirty_expire_interval),
776 		.mode		= 0644,
777 		.proc_handler	= &proc_dointvec_userhz_jiffies,
778 	},
779 	{
780 		.ctl_name	= VM_NR_PDFLUSH_THREADS,
781 		.procname	= "nr_pdflush_threads",
782 		.data		= &nr_pdflush_threads,
783 		.maxlen		= sizeof nr_pdflush_threads,
784 		.mode		= 0444 /* read-only*/,
785 		.proc_handler	= &proc_dointvec,
786 	},
787 	{
788 		.ctl_name	= VM_SWAPPINESS,
789 		.procname	= "swappiness",
790 		.data		= &vm_swappiness,
791 		.maxlen		= sizeof(vm_swappiness),
792 		.mode		= 0644,
793 		.proc_handler	= &proc_dointvec_minmax,
794 		.strategy	= &sysctl_intvec,
795 		.extra1		= &zero,
796 		.extra2		= &one_hundred,
797 	},
798 #ifdef CONFIG_HUGETLB_PAGE
799 	 {
800 		.ctl_name	= VM_HUGETLB_PAGES,
801 		.procname	= "nr_hugepages",
802 		.data		= &max_huge_pages,
803 		.maxlen		= sizeof(unsigned long),
804 		.mode		= 0644,
805 		.proc_handler	= &hugetlb_sysctl_handler,
806 		.extra1		= (void *)&hugetlb_zero,
807 		.extra2		= (void *)&hugetlb_infinity,
808 	 },
809 	 {
810 		.ctl_name	= VM_HUGETLB_GROUP,
811 		.procname	= "hugetlb_shm_group",
812 		.data		= &sysctl_hugetlb_shm_group,
813 		.maxlen		= sizeof(gid_t),
814 		.mode		= 0644,
815 		.proc_handler	= &proc_dointvec,
816 	 },
817 #endif
818 	{
819 		.ctl_name	= VM_LOWMEM_RESERVE_RATIO,
820 		.procname	= "lowmem_reserve_ratio",
821 		.data		= &sysctl_lowmem_reserve_ratio,
822 		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
823 		.mode		= 0644,
824 		.proc_handler	= &lowmem_reserve_ratio_sysctl_handler,
825 		.strategy	= &sysctl_intvec,
826 	},
827 	{
828 		.ctl_name	= VM_DROP_PAGECACHE,
829 		.procname	= "drop_caches",
830 		.data		= &sysctl_drop_caches,
831 		.maxlen		= sizeof(int),
832 		.mode		= 0644,
833 		.proc_handler	= drop_caches_sysctl_handler,
834 		.strategy	= &sysctl_intvec,
835 	},
836 	{
837 		.ctl_name	= VM_MIN_FREE_KBYTES,
838 		.procname	= "min_free_kbytes",
839 		.data		= &min_free_kbytes,
840 		.maxlen		= sizeof(min_free_kbytes),
841 		.mode		= 0644,
842 		.proc_handler	= &min_free_kbytes_sysctl_handler,
843 		.strategy	= &sysctl_intvec,
844 		.extra1		= &zero,
845 	},
846 	{
847 		.ctl_name	= VM_PERCPU_PAGELIST_FRACTION,
848 		.procname	= "percpu_pagelist_fraction",
849 		.data		= &percpu_pagelist_fraction,
850 		.maxlen		= sizeof(percpu_pagelist_fraction),
851 		.mode		= 0644,
852 		.proc_handler	= &percpu_pagelist_fraction_sysctl_handler,
853 		.strategy	= &sysctl_intvec,
854 		.extra1		= &min_percpu_pagelist_fract,
855 	},
856 #ifdef CONFIG_MMU
857 	{
858 		.ctl_name	= VM_MAX_MAP_COUNT,
859 		.procname	= "max_map_count",
860 		.data		= &sysctl_max_map_count,
861 		.maxlen		= sizeof(sysctl_max_map_count),
862 		.mode		= 0644,
863 		.proc_handler	= &proc_dointvec
864 	},
865 #endif
866 	{
867 		.ctl_name	= VM_LAPTOP_MODE,
868 		.procname	= "laptop_mode",
869 		.data		= &laptop_mode,
870 		.maxlen		= sizeof(laptop_mode),
871 		.mode		= 0644,
872 		.proc_handler	= &proc_dointvec_jiffies,
873 		.strategy	= &sysctl_jiffies,
874 	},
875 	{
876 		.ctl_name	= VM_BLOCK_DUMP,
877 		.procname	= "block_dump",
878 		.data		= &block_dump,
879 		.maxlen		= sizeof(block_dump),
880 		.mode		= 0644,
881 		.proc_handler	= &proc_dointvec,
882 		.strategy	= &sysctl_intvec,
883 		.extra1		= &zero,
884 	},
885 	{
886 		.ctl_name	= VM_VFS_CACHE_PRESSURE,
887 		.procname	= "vfs_cache_pressure",
888 		.data		= &sysctl_vfs_cache_pressure,
889 		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
890 		.mode		= 0644,
891 		.proc_handler	= &proc_dointvec,
892 		.strategy	= &sysctl_intvec,
893 		.extra1		= &zero,
894 	},
895 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
896 	{
897 		.ctl_name	= VM_LEGACY_VA_LAYOUT,
898 		.procname	= "legacy_va_layout",
899 		.data		= &sysctl_legacy_va_layout,
900 		.maxlen		= sizeof(sysctl_legacy_va_layout),
901 		.mode		= 0644,
902 		.proc_handler	= &proc_dointvec,
903 		.strategy	= &sysctl_intvec,
904 		.extra1		= &zero,
905 	},
906 #endif
907 #ifdef CONFIG_NUMA
908 	{
909 		.ctl_name	= VM_ZONE_RECLAIM_MODE,
910 		.procname	= "zone_reclaim_mode",
911 		.data		= &zone_reclaim_mode,
912 		.maxlen		= sizeof(zone_reclaim_mode),
913 		.mode		= 0644,
914 		.proc_handler	= &proc_dointvec,
915 		.strategy	= &sysctl_intvec,
916 		.extra1		= &zero,
917 	},
918 	{
919 		.ctl_name	= VM_MIN_UNMAPPED,
920 		.procname	= "min_unmapped_ratio",
921 		.data		= &sysctl_min_unmapped_ratio,
922 		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
923 		.mode		= 0644,
924 		.proc_handler	= &sysctl_min_unmapped_ratio_sysctl_handler,
925 		.strategy	= &sysctl_intvec,
926 		.extra1		= &zero,
927 		.extra2		= &one_hundred,
928 	},
929 	{
930 		.ctl_name	= VM_MIN_SLAB,
931 		.procname	= "min_slab_ratio",
932 		.data		= &sysctl_min_slab_ratio,
933 		.maxlen		= sizeof(sysctl_min_slab_ratio),
934 		.mode		= 0644,
935 		.proc_handler	= &sysctl_min_slab_ratio_sysctl_handler,
936 		.strategy	= &sysctl_intvec,
937 		.extra1		= &zero,
938 		.extra2		= &one_hundred,
939 	},
940 #endif
941 #ifdef CONFIG_SMP
942 	{
943 		.ctl_name	= CTL_UNNUMBERED,
944 		.procname	= "stat_interval",
945 		.data		= &sysctl_stat_interval,
946 		.maxlen		= sizeof(sysctl_stat_interval),
947 		.mode		= 0644,
948 		.proc_handler	= &proc_dointvec_jiffies,
949 		.strategy	= &sysctl_jiffies,
950 	},
951 #endif
952 #ifdef CONFIG_SECURITY
953 	{
954 		.ctl_name	= CTL_UNNUMBERED,
955 		.procname	= "mmap_min_addr",
956 		.data		= &mmap_min_addr,
957 		.maxlen         = sizeof(unsigned long),
958 		.mode		= 0644,
959 		.proc_handler	= &proc_doulongvec_minmax,
960 	},
961 #ifdef CONFIG_NUMA
962 	{
963 		.ctl_name	= CTL_UNNUMBERED,
964 		.procname	= "numa_zonelist_order",
965 		.data		= &numa_zonelist_order,
966 		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
967 		.mode		= 0644,
968 		.proc_handler	= &numa_zonelist_order_handler,
969 		.strategy	= &sysctl_string,
970 	},
971 #endif
972 #endif
973 #if defined(CONFIG_X86_32) || \
974    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
975 	{
976 		.ctl_name	= VM_VDSO_ENABLED,
977 		.procname	= "vdso_enabled",
978 		.data		= &vdso_enabled,
979 		.maxlen		= sizeof(vdso_enabled),
980 		.mode		= 0644,
981 		.proc_handler	= &proc_dointvec,
982 		.strategy	= &sysctl_intvec,
983 		.extra1		= &zero,
984 	},
985 #endif
986 	{ .ctl_name = 0 }
987 };
988 
989 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
990 static ctl_table binfmt_misc_table[] = {
991 	{ .ctl_name = 0 }
992 };
993 #endif
994 
995 static ctl_table fs_table[] = {
996 	{
997 		.ctl_name	= FS_NRINODE,
998 		.procname	= "inode-nr",
999 		.data		= &inodes_stat,
1000 		.maxlen		= 2*sizeof(int),
1001 		.mode		= 0444,
1002 		.proc_handler	= &proc_dointvec,
1003 	},
1004 	{
1005 		.ctl_name	= FS_STATINODE,
1006 		.procname	= "inode-state",
1007 		.data		= &inodes_stat,
1008 		.maxlen		= 7*sizeof(int),
1009 		.mode		= 0444,
1010 		.proc_handler	= &proc_dointvec,
1011 	},
1012 	{
1013 		.ctl_name	= FS_NRFILE,
1014 		.procname	= "file-nr",
1015 		.data		= &files_stat,
1016 		.maxlen		= 3*sizeof(int),
1017 		.mode		= 0444,
1018 		.proc_handler	= &proc_nr_files,
1019 	},
1020 	{
1021 		.ctl_name	= FS_MAXFILE,
1022 		.procname	= "file-max",
1023 		.data		= &files_stat.max_files,
1024 		.maxlen		= sizeof(int),
1025 		.mode		= 0644,
1026 		.proc_handler	= &proc_dointvec,
1027 	},
1028 	{
1029 		.ctl_name	= FS_DENTRY,
1030 		.procname	= "dentry-state",
1031 		.data		= &dentry_stat,
1032 		.maxlen		= 6*sizeof(int),
1033 		.mode		= 0444,
1034 		.proc_handler	= &proc_dointvec,
1035 	},
1036 	{
1037 		.ctl_name	= FS_OVERFLOWUID,
1038 		.procname	= "overflowuid",
1039 		.data		= &fs_overflowuid,
1040 		.maxlen		= sizeof(int),
1041 		.mode		= 0644,
1042 		.proc_handler	= &proc_dointvec_minmax,
1043 		.strategy	= &sysctl_intvec,
1044 		.extra1		= &minolduid,
1045 		.extra2		= &maxolduid,
1046 	},
1047 	{
1048 		.ctl_name	= FS_OVERFLOWGID,
1049 		.procname	= "overflowgid",
1050 		.data		= &fs_overflowgid,
1051 		.maxlen		= sizeof(int),
1052 		.mode		= 0644,
1053 		.proc_handler	= &proc_dointvec_minmax,
1054 		.strategy	= &sysctl_intvec,
1055 		.extra1		= &minolduid,
1056 		.extra2		= &maxolduid,
1057 	},
1058 	{
1059 		.ctl_name	= FS_LEASES,
1060 		.procname	= "leases-enable",
1061 		.data		= &leases_enable,
1062 		.maxlen		= sizeof(int),
1063 		.mode		= 0644,
1064 		.proc_handler	= &proc_dointvec,
1065 	},
1066 #ifdef CONFIG_DNOTIFY
1067 	{
1068 		.ctl_name	= FS_DIR_NOTIFY,
1069 		.procname	= "dir-notify-enable",
1070 		.data		= &dir_notify_enable,
1071 		.maxlen		= sizeof(int),
1072 		.mode		= 0644,
1073 		.proc_handler	= &proc_dointvec,
1074 	},
1075 #endif
1076 #ifdef CONFIG_MMU
1077 	{
1078 		.ctl_name	= FS_LEASE_TIME,
1079 		.procname	= "lease-break-time",
1080 		.data		= &lease_break_time,
1081 		.maxlen		= sizeof(int),
1082 		.mode		= 0644,
1083 		.proc_handler	= &proc_dointvec,
1084 	},
1085 	{
1086 		.ctl_name	= FS_AIO_NR,
1087 		.procname	= "aio-nr",
1088 		.data		= &aio_nr,
1089 		.maxlen		= sizeof(aio_nr),
1090 		.mode		= 0444,
1091 		.proc_handler	= &proc_doulongvec_minmax,
1092 	},
1093 	{
1094 		.ctl_name	= FS_AIO_MAX_NR,
1095 		.procname	= "aio-max-nr",
1096 		.data		= &aio_max_nr,
1097 		.maxlen		= sizeof(aio_max_nr),
1098 		.mode		= 0644,
1099 		.proc_handler	= &proc_doulongvec_minmax,
1100 	},
1101 #ifdef CONFIG_INOTIFY_USER
1102 	{
1103 		.ctl_name	= FS_INOTIFY,
1104 		.procname	= "inotify",
1105 		.mode		= 0555,
1106 		.child		= inotify_table,
1107 	},
1108 #endif
1109 #endif
1110 	{
1111 		.ctl_name	= KERN_SETUID_DUMPABLE,
1112 		.procname	= "suid_dumpable",
1113 		.data		= &suid_dumpable,
1114 		.maxlen		= sizeof(int),
1115 		.mode		= 0644,
1116 		.proc_handler	= &proc_dointvec,
1117 	},
1118 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1119 	{
1120 		.ctl_name	= CTL_UNNUMBERED,
1121 		.procname	= "binfmt_misc",
1122 		.mode		= 0555,
1123 		.child		= binfmt_misc_table,
1124 	},
1125 #endif
1126 	{ .ctl_name = 0 }
1127 };
1128 
1129 static ctl_table debug_table[] = {
1130 	{ .ctl_name = 0 }
1131 };
1132 
1133 static ctl_table dev_table[] = {
1134 	{ .ctl_name = 0 }
1135 };
1136 
1137 static DEFINE_SPINLOCK(sysctl_lock);
1138 
1139 /* called under sysctl_lock */
1140 static int use_table(struct ctl_table_header *p)
1141 {
1142 	if (unlikely(p->unregistering))
1143 		return 0;
1144 	p->used++;
1145 	return 1;
1146 }
1147 
1148 /* called under sysctl_lock */
1149 static void unuse_table(struct ctl_table_header *p)
1150 {
1151 	if (!--p->used)
1152 		if (unlikely(p->unregistering))
1153 			complete(p->unregistering);
1154 }
1155 
1156 /* called under sysctl_lock, will reacquire if has to wait */
1157 static void start_unregistering(struct ctl_table_header *p)
1158 {
1159 	/*
1160 	 * if p->used is 0, nobody will ever touch that entry again;
1161 	 * we'll eliminate all paths to it before dropping sysctl_lock
1162 	 */
1163 	if (unlikely(p->used)) {
1164 		struct completion wait;
1165 		init_completion(&wait);
1166 		p->unregistering = &wait;
1167 		spin_unlock(&sysctl_lock);
1168 		wait_for_completion(&wait);
1169 		spin_lock(&sysctl_lock);
1170 	}
1171 	/*
1172 	 * do not remove from the list until nobody holds it; walking the
1173 	 * list in do_sysctl() relies on that.
1174 	 */
1175 	list_del_init(&p->ctl_entry);
1176 }
1177 
1178 void sysctl_head_finish(struct ctl_table_header *head)
1179 {
1180 	if (!head)
1181 		return;
1182 	spin_lock(&sysctl_lock);
1183 	unuse_table(head);
1184 	spin_unlock(&sysctl_lock);
1185 }
1186 
1187 struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1188 {
1189 	struct ctl_table_header *head;
1190 	struct list_head *tmp;
1191 	spin_lock(&sysctl_lock);
1192 	if (prev) {
1193 		tmp = &prev->ctl_entry;
1194 		unuse_table(prev);
1195 		goto next;
1196 	}
1197 	tmp = &root_table_header.ctl_entry;
1198 	for (;;) {
1199 		head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1200 
1201 		if (!use_table(head))
1202 			goto next;
1203 		spin_unlock(&sysctl_lock);
1204 		return head;
1205 	next:
1206 		tmp = tmp->next;
1207 		if (tmp == &root_table_header.ctl_entry)
1208 			break;
1209 	}
1210 	spin_unlock(&sysctl_lock);
1211 	return NULL;
1212 }
1213 
1214 #ifdef CONFIG_SYSCTL_SYSCALL
1215 int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
1216 	       void __user *newval, size_t newlen)
1217 {
1218 	struct ctl_table_header *head;
1219 	int error = -ENOTDIR;
1220 
1221 	if (nlen <= 0 || nlen >= CTL_MAXNAME)
1222 		return -ENOTDIR;
1223 	if (oldval) {
1224 		int old_len;
1225 		if (!oldlenp || get_user(old_len, oldlenp))
1226 			return -EFAULT;
1227 	}
1228 
1229 	for (head = sysctl_head_next(NULL); head;
1230 			head = sysctl_head_next(head)) {
1231 		error = parse_table(name, nlen, oldval, oldlenp,
1232 					newval, newlen, head->ctl_table);
1233 		if (error != -ENOTDIR) {
1234 			sysctl_head_finish(head);
1235 			break;
1236 		}
1237 	}
1238 	return error;
1239 }
1240 
1241 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
1242 {
1243 	struct __sysctl_args tmp;
1244 	int error;
1245 
1246 	if (copy_from_user(&tmp, args, sizeof(tmp)))
1247 		return -EFAULT;
1248 
1249 	lock_kernel();
1250 	error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
1251 			  tmp.newval, tmp.newlen);
1252 	unlock_kernel();
1253 	return error;
1254 }
1255 #endif /* CONFIG_SYSCTL_SYSCALL */
1256 
1257 /*
1258  * sysctl_perm does NOT grant the superuser all rights automatically, because
1259  * some sysctl variables are readonly even to root.
1260  */
1261 
1262 static int test_perm(int mode, int op)
1263 {
1264 	if (!current->euid)
1265 		mode >>= 6;
1266 	else if (in_egroup_p(0))
1267 		mode >>= 3;
1268 	if ((mode & op & 0007) == op)
1269 		return 0;
1270 	return -EACCES;
1271 }
1272 
1273 int sysctl_perm(ctl_table *table, int op)
1274 {
1275 	int error;
1276 	error = security_sysctl(table, op);
1277 	if (error)
1278 		return error;
1279 	return test_perm(table->mode, op);
1280 }
1281 
1282 #ifdef CONFIG_SYSCTL_SYSCALL
1283 static int parse_table(int __user *name, int nlen,
1284 		       void __user *oldval, size_t __user *oldlenp,
1285 		       void __user *newval, size_t newlen,
1286 		       ctl_table *table)
1287 {
1288 	int n;
1289 repeat:
1290 	if (!nlen)
1291 		return -ENOTDIR;
1292 	if (get_user(n, name))
1293 		return -EFAULT;
1294 	for ( ; table->ctl_name || table->procname; table++) {
1295 		if (!table->ctl_name)
1296 			continue;
1297 		if (n == table->ctl_name) {
1298 			int error;
1299 			if (table->child) {
1300 				if (sysctl_perm(table, 001))
1301 					return -EPERM;
1302 				name++;
1303 				nlen--;
1304 				table = table->child;
1305 				goto repeat;
1306 			}
1307 			error = do_sysctl_strategy(table, name, nlen,
1308 						   oldval, oldlenp,
1309 						   newval, newlen);
1310 			return error;
1311 		}
1312 	}
1313 	return -ENOTDIR;
1314 }
1315 
1316 /* Perform the actual read/write of a sysctl table entry. */
1317 int do_sysctl_strategy (ctl_table *table,
1318 			int __user *name, int nlen,
1319 			void __user *oldval, size_t __user *oldlenp,
1320 			void __user *newval, size_t newlen)
1321 {
1322 	int op = 0, rc;
1323 	size_t len;
1324 
1325 	if (oldval)
1326 		op |= 004;
1327 	if (newval)
1328 		op |= 002;
1329 	if (sysctl_perm(table, op))
1330 		return -EPERM;
1331 
1332 	if (table->strategy) {
1333 		rc = table->strategy(table, name, nlen, oldval, oldlenp,
1334 				     newval, newlen);
1335 		if (rc < 0)
1336 			return rc;
1337 		if (rc > 0)
1338 			return 0;
1339 	}
1340 
1341 	/* If there is no strategy routine, or if the strategy returns
1342 	 * zero, proceed with automatic r/w */
1343 	if (table->data && table->maxlen) {
1344 		if (oldval && oldlenp) {
1345 			if (get_user(len, oldlenp))
1346 				return -EFAULT;
1347 			if (len) {
1348 				if (len > table->maxlen)
1349 					len = table->maxlen;
1350 				if(copy_to_user(oldval, table->data, len))
1351 					return -EFAULT;
1352 				if(put_user(len, oldlenp))
1353 					return -EFAULT;
1354 			}
1355 		}
1356 		if (newval && newlen) {
1357 			len = newlen;
1358 			if (len > table->maxlen)
1359 				len = table->maxlen;
1360 			if(copy_from_user(table->data, newval, len))
1361 				return -EFAULT;
1362 		}
1363 	}
1364 	return 0;
1365 }
1366 #endif /* CONFIG_SYSCTL_SYSCALL */
1367 
1368 static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1369 {
1370 	for (; table->ctl_name || table->procname; table++) {
1371 		table->parent = parent;
1372 		if (table->child)
1373 			sysctl_set_parent(table, table->child);
1374 	}
1375 }
1376 
1377 static __init int sysctl_init(void)
1378 {
1379 	sysctl_set_parent(NULL, root_table);
1380 	return 0;
1381 }
1382 
1383 core_initcall(sysctl_init);
1384 
1385 /**
1386  * register_sysctl_table - register a sysctl hierarchy
1387  * @table: the top-level table structure
1388  *
1389  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1390  * array. An entry with a ctl_name of 0 terminates the table.
1391  *
1392  * The members of the &ctl_table structure are used as follows:
1393  *
1394  * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
1395  *            must be unique within that level of sysctl
1396  *
1397  * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1398  *            enter a sysctl file
1399  *
1400  * data - a pointer to data for use by proc_handler
1401  *
1402  * maxlen - the maximum size in bytes of the data
1403  *
1404  * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1405  *
1406  * child - a pointer to the child sysctl table if this entry is a directory, or
1407  *         %NULL.
1408  *
1409  * proc_handler - the text handler routine (described below)
1410  *
1411  * strategy - the strategy routine (described below)
1412  *
1413  * de - for internal use by the sysctl routines
1414  *
1415  * extra1, extra2 - extra pointers usable by the proc handler routines
1416  *
1417  * Leaf nodes in the sysctl tree will be represented by a single file
1418  * under /proc; non-leaf nodes will be represented by directories.
1419  *
1420  * sysctl(2) can automatically manage read and write requests through
1421  * the sysctl table.  The data and maxlen fields of the ctl_table
1422  * struct enable minimal validation of the values being written to be
1423  * performed, and the mode field allows minimal authentication.
1424  *
1425  * More sophisticated management can be enabled by the provision of a
1426  * strategy routine with the table entry.  This will be called before
1427  * any automatic read or write of the data is performed.
1428  *
1429  * The strategy routine may return
1430  *
1431  * < 0 - Error occurred (error is passed to user process)
1432  *
1433  * 0   - OK - proceed with automatic read or write.
1434  *
1435  * > 0 - OK - read or write has been done by the strategy routine, so
1436  *       return immediately.
1437  *
1438  * There must be a proc_handler routine for any terminal nodes
1439  * mirrored under /proc/sys (non-terminals are handled by a built-in
1440  * directory handler).  Several default handlers are available to
1441  * cover common cases -
1442  *
1443  * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1444  * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1445  * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1446  *
1447  * It is the handler's job to read the input buffer from user memory
1448  * and process it. The handler should return 0 on success.
1449  *
1450  * This routine returns %NULL on a failure to register, and a pointer
1451  * to the table header on success.
1452  */
1453 struct ctl_table_header *register_sysctl_table(ctl_table * table)
1454 {
1455 	struct ctl_table_header *tmp;
1456 	tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
1457 	if (!tmp)
1458 		return NULL;
1459 	tmp->ctl_table = table;
1460 	INIT_LIST_HEAD(&tmp->ctl_entry);
1461 	tmp->used = 0;
1462 	tmp->unregistering = NULL;
1463 	sysctl_set_parent(NULL, table);
1464 	spin_lock(&sysctl_lock);
1465 	list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
1466 	spin_unlock(&sysctl_lock);
1467 	return tmp;
1468 }
1469 
1470 /**
1471  * unregister_sysctl_table - unregister a sysctl table hierarchy
1472  * @header: the header returned from register_sysctl_table
1473  *
1474  * Unregisters the sysctl table and all children. proc entries may not
1475  * actually be removed until they are no longer used by anyone.
1476  */
1477 void unregister_sysctl_table(struct ctl_table_header * header)
1478 {
1479 	might_sleep();
1480 	spin_lock(&sysctl_lock);
1481 	start_unregistering(header);
1482 	spin_unlock(&sysctl_lock);
1483 	kfree(header);
1484 }
1485 
1486 #else /* !CONFIG_SYSCTL */
1487 struct ctl_table_header *register_sysctl_table(ctl_table * table)
1488 {
1489 	return NULL;
1490 }
1491 
1492 void unregister_sysctl_table(struct ctl_table_header * table)
1493 {
1494 }
1495 
1496 #endif /* CONFIG_SYSCTL */
1497 
1498 /*
1499  * /proc/sys support
1500  */
1501 
1502 #ifdef CONFIG_PROC_SYSCTL
1503 
1504 static int _proc_do_string(void* data, int maxlen, int write,
1505 			   struct file *filp, void __user *buffer,
1506 			   size_t *lenp, loff_t *ppos)
1507 {
1508 	size_t len;
1509 	char __user *p;
1510 	char c;
1511 
1512 	if (!data || !maxlen || !*lenp) {
1513 		*lenp = 0;
1514 		return 0;
1515 	}
1516 
1517 	if (write) {
1518 		len = 0;
1519 		p = buffer;
1520 		while (len < *lenp) {
1521 			if (get_user(c, p++))
1522 				return -EFAULT;
1523 			if (c == 0 || c == '\n')
1524 				break;
1525 			len++;
1526 		}
1527 		if (len >= maxlen)
1528 			len = maxlen-1;
1529 		if(copy_from_user(data, buffer, len))
1530 			return -EFAULT;
1531 		((char *) data)[len] = 0;
1532 		*ppos += *lenp;
1533 	} else {
1534 		len = strlen(data);
1535 		if (len > maxlen)
1536 			len = maxlen;
1537 
1538 		if (*ppos > len) {
1539 			*lenp = 0;
1540 			return 0;
1541 		}
1542 
1543 		data += *ppos;
1544 		len  -= *ppos;
1545 
1546 		if (len > *lenp)
1547 			len = *lenp;
1548 		if (len)
1549 			if(copy_to_user(buffer, data, len))
1550 				return -EFAULT;
1551 		if (len < *lenp) {
1552 			if(put_user('\n', ((char __user *) buffer) + len))
1553 				return -EFAULT;
1554 			len++;
1555 		}
1556 		*lenp = len;
1557 		*ppos += len;
1558 	}
1559 	return 0;
1560 }
1561 
1562 /**
1563  * proc_dostring - read a string sysctl
1564  * @table: the sysctl table
1565  * @write: %TRUE if this is a write to the sysctl file
1566  * @filp: the file structure
1567  * @buffer: the user buffer
1568  * @lenp: the size of the user buffer
1569  * @ppos: file position
1570  *
1571  * Reads/writes a string from/to the user buffer. If the kernel
1572  * buffer provided is not large enough to hold the string, the
1573  * string is truncated. The copied string is %NULL-terminated.
1574  * If the string is being read by the user process, it is copied
1575  * and a newline '\n' is added. It is truncated if the buffer is
1576  * not large enough.
1577  *
1578  * Returns 0 on success.
1579  */
1580 int proc_dostring(ctl_table *table, int write, struct file *filp,
1581 		  void __user *buffer, size_t *lenp, loff_t *ppos)
1582 {
1583 	return _proc_do_string(table->data, table->maxlen, write, filp,
1584 			       buffer, lenp, ppos);
1585 }
1586 
1587 
1588 static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
1589 				 int *valp,
1590 				 int write, void *data)
1591 {
1592 	if (write) {
1593 		*valp = *negp ? -*lvalp : *lvalp;
1594 	} else {
1595 		int val = *valp;
1596 		if (val < 0) {
1597 			*negp = -1;
1598 			*lvalp = (unsigned long)-val;
1599 		} else {
1600 			*negp = 0;
1601 			*lvalp = (unsigned long)val;
1602 		}
1603 	}
1604 	return 0;
1605 }
1606 
1607 static int __do_proc_dointvec(void *tbl_data, ctl_table *table,
1608 		  int write, struct file *filp, void __user *buffer,
1609 		  size_t *lenp, loff_t *ppos,
1610 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1611 			      int write, void *data),
1612 		  void *data)
1613 {
1614 #define TMPBUFLEN 21
1615 	int *i, vleft, first=1, neg, val;
1616 	unsigned long lval;
1617 	size_t left, len;
1618 
1619 	char buf[TMPBUFLEN], *p;
1620 	char __user *s = buffer;
1621 
1622 	if (!tbl_data || !table->maxlen || !*lenp ||
1623 	    (*ppos && !write)) {
1624 		*lenp = 0;
1625 		return 0;
1626 	}
1627 
1628 	i = (int *) tbl_data;
1629 	vleft = table->maxlen / sizeof(*i);
1630 	left = *lenp;
1631 
1632 	if (!conv)
1633 		conv = do_proc_dointvec_conv;
1634 
1635 	for (; left && vleft--; i++, first=0) {
1636 		if (write) {
1637 			while (left) {
1638 				char c;
1639 				if (get_user(c, s))
1640 					return -EFAULT;
1641 				if (!isspace(c))
1642 					break;
1643 				left--;
1644 				s++;
1645 			}
1646 			if (!left)
1647 				break;
1648 			neg = 0;
1649 			len = left;
1650 			if (len > sizeof(buf) - 1)
1651 				len = sizeof(buf) - 1;
1652 			if (copy_from_user(buf, s, len))
1653 				return -EFAULT;
1654 			buf[len] = 0;
1655 			p = buf;
1656 			if (*p == '-' && left > 1) {
1657 				neg = 1;
1658 				p++;
1659 			}
1660 			if (*p < '0' || *p > '9')
1661 				break;
1662 
1663 			lval = simple_strtoul(p, &p, 0);
1664 
1665 			len = p-buf;
1666 			if ((len < left) && *p && !isspace(*p))
1667 				break;
1668 			if (neg)
1669 				val = -val;
1670 			s += len;
1671 			left -= len;
1672 
1673 			if (conv(&neg, &lval, i, 1, data))
1674 				break;
1675 		} else {
1676 			p = buf;
1677 			if (!first)
1678 				*p++ = '\t';
1679 
1680 			if (conv(&neg, &lval, i, 0, data))
1681 				break;
1682 
1683 			sprintf(p, "%s%lu", neg ? "-" : "", lval);
1684 			len = strlen(buf);
1685 			if (len > left)
1686 				len = left;
1687 			if(copy_to_user(s, buf, len))
1688 				return -EFAULT;
1689 			left -= len;
1690 			s += len;
1691 		}
1692 	}
1693 
1694 	if (!write && !first && left) {
1695 		if(put_user('\n', s))
1696 			return -EFAULT;
1697 		left--, s++;
1698 	}
1699 	if (write) {
1700 		while (left) {
1701 			char c;
1702 			if (get_user(c, s++))
1703 				return -EFAULT;
1704 			if (!isspace(c))
1705 				break;
1706 			left--;
1707 		}
1708 	}
1709 	if (write && first)
1710 		return -EINVAL;
1711 	*lenp -= left;
1712 	*ppos += *lenp;
1713 	return 0;
1714 #undef TMPBUFLEN
1715 }
1716 
1717 static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
1718 		  void __user *buffer, size_t *lenp, loff_t *ppos,
1719 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1720 			      int write, void *data),
1721 		  void *data)
1722 {
1723 	return __do_proc_dointvec(table->data, table, write, filp,
1724 			buffer, lenp, ppos, conv, data);
1725 }
1726 
1727 /**
1728  * proc_dointvec - read a vector of integers
1729  * @table: the sysctl table
1730  * @write: %TRUE if this is a write to the sysctl file
1731  * @filp: the file structure
1732  * @buffer: the user buffer
1733  * @lenp: the size of the user buffer
1734  * @ppos: file position
1735  *
1736  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1737  * values from/to the user buffer, treated as an ASCII string.
1738  *
1739  * Returns 0 on success.
1740  */
1741 int proc_dointvec(ctl_table *table, int write, struct file *filp,
1742 		     void __user *buffer, size_t *lenp, loff_t *ppos)
1743 {
1744     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1745 		    	    NULL,NULL);
1746 }
1747 
1748 #define OP_SET	0
1749 #define OP_AND	1
1750 #define OP_OR	2
1751 
1752 static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
1753 				      int *valp,
1754 				      int write, void *data)
1755 {
1756 	int op = *(int *)data;
1757 	if (write) {
1758 		int val = *negp ? -*lvalp : *lvalp;
1759 		switch(op) {
1760 		case OP_SET:	*valp = val; break;
1761 		case OP_AND:	*valp &= val; break;
1762 		case OP_OR:	*valp |= val; break;
1763 		}
1764 	} else {
1765 		int val = *valp;
1766 		if (val < 0) {
1767 			*negp = -1;
1768 			*lvalp = (unsigned long)-val;
1769 		} else {
1770 			*negp = 0;
1771 			*lvalp = (unsigned long)val;
1772 		}
1773 	}
1774 	return 0;
1775 }
1776 
1777 /*
1778  *	init may raise the set.
1779  */
1780 
1781 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
1782 			void __user *buffer, size_t *lenp, loff_t *ppos)
1783 {
1784 	int op;
1785 
1786 	if (write && !capable(CAP_SYS_MODULE)) {
1787 		return -EPERM;
1788 	}
1789 
1790 	op = is_init(current) ? OP_SET : OP_AND;
1791 	return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1792 				do_proc_dointvec_bset_conv,&op);
1793 }
1794 
1795 /*
1796  *	Taint values can only be increased
1797  */
1798 static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp,
1799 			       void __user *buffer, size_t *lenp, loff_t *ppos)
1800 {
1801 	int op;
1802 
1803 	if (write && !capable(CAP_SYS_ADMIN))
1804 		return -EPERM;
1805 
1806 	op = OP_OR;
1807 	return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1808 				do_proc_dointvec_bset_conv,&op);
1809 }
1810 
1811 struct do_proc_dointvec_minmax_conv_param {
1812 	int *min;
1813 	int *max;
1814 };
1815 
1816 static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
1817 					int *valp,
1818 					int write, void *data)
1819 {
1820 	struct do_proc_dointvec_minmax_conv_param *param = data;
1821 	if (write) {
1822 		int val = *negp ? -*lvalp : *lvalp;
1823 		if ((param->min && *param->min > val) ||
1824 		    (param->max && *param->max < val))
1825 			return -EINVAL;
1826 		*valp = val;
1827 	} else {
1828 		int val = *valp;
1829 		if (val < 0) {
1830 			*negp = -1;
1831 			*lvalp = (unsigned long)-val;
1832 		} else {
1833 			*negp = 0;
1834 			*lvalp = (unsigned long)val;
1835 		}
1836 	}
1837 	return 0;
1838 }
1839 
1840 /**
1841  * proc_dointvec_minmax - read a vector of integers with min/max values
1842  * @table: the sysctl table
1843  * @write: %TRUE if this is a write to the sysctl file
1844  * @filp: the file structure
1845  * @buffer: the user buffer
1846  * @lenp: the size of the user buffer
1847  * @ppos: file position
1848  *
1849  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1850  * values from/to the user buffer, treated as an ASCII string.
1851  *
1852  * This routine will ensure the values are within the range specified by
1853  * table->extra1 (min) and table->extra2 (max).
1854  *
1855  * Returns 0 on success.
1856  */
1857 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
1858 		  void __user *buffer, size_t *lenp, loff_t *ppos)
1859 {
1860 	struct do_proc_dointvec_minmax_conv_param param = {
1861 		.min = (int *) table->extra1,
1862 		.max = (int *) table->extra2,
1863 	};
1864 	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
1865 				do_proc_dointvec_minmax_conv, &param);
1866 }
1867 
1868 static int __do_proc_doulongvec_minmax(void *data, ctl_table *table, int write,
1869 				     struct file *filp,
1870 				     void __user *buffer,
1871 				     size_t *lenp, loff_t *ppos,
1872 				     unsigned long convmul,
1873 				     unsigned long convdiv)
1874 {
1875 #define TMPBUFLEN 21
1876 	unsigned long *i, *min, *max, val;
1877 	int vleft, first=1, neg;
1878 	size_t len, left;
1879 	char buf[TMPBUFLEN], *p;
1880 	char __user *s = buffer;
1881 
1882 	if (!data || !table->maxlen || !*lenp ||
1883 	    (*ppos && !write)) {
1884 		*lenp = 0;
1885 		return 0;
1886 	}
1887 
1888 	i = (unsigned long *) data;
1889 	min = (unsigned long *) table->extra1;
1890 	max = (unsigned long *) table->extra2;
1891 	vleft = table->maxlen / sizeof(unsigned long);
1892 	left = *lenp;
1893 
1894 	for (; left && vleft--; i++, min++, max++, first=0) {
1895 		if (write) {
1896 			while (left) {
1897 				char c;
1898 				if (get_user(c, s))
1899 					return -EFAULT;
1900 				if (!isspace(c))
1901 					break;
1902 				left--;
1903 				s++;
1904 			}
1905 			if (!left)
1906 				break;
1907 			neg = 0;
1908 			len = left;
1909 			if (len > TMPBUFLEN-1)
1910 				len = TMPBUFLEN-1;
1911 			if (copy_from_user(buf, s, len))
1912 				return -EFAULT;
1913 			buf[len] = 0;
1914 			p = buf;
1915 			if (*p == '-' && left > 1) {
1916 				neg = 1;
1917 				p++;
1918 			}
1919 			if (*p < '0' || *p > '9')
1920 				break;
1921 			val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
1922 			len = p-buf;
1923 			if ((len < left) && *p && !isspace(*p))
1924 				break;
1925 			if (neg)
1926 				val = -val;
1927 			s += len;
1928 			left -= len;
1929 
1930 			if(neg)
1931 				continue;
1932 			if ((min && val < *min) || (max && val > *max))
1933 				continue;
1934 			*i = val;
1935 		} else {
1936 			p = buf;
1937 			if (!first)
1938 				*p++ = '\t';
1939 			sprintf(p, "%lu", convdiv * (*i) / convmul);
1940 			len = strlen(buf);
1941 			if (len > left)
1942 				len = left;
1943 			if(copy_to_user(s, buf, len))
1944 				return -EFAULT;
1945 			left -= len;
1946 			s += len;
1947 		}
1948 	}
1949 
1950 	if (!write && !first && left) {
1951 		if(put_user('\n', s))
1952 			return -EFAULT;
1953 		left--, s++;
1954 	}
1955 	if (write) {
1956 		while (left) {
1957 			char c;
1958 			if (get_user(c, s++))
1959 				return -EFAULT;
1960 			if (!isspace(c))
1961 				break;
1962 			left--;
1963 		}
1964 	}
1965 	if (write && first)
1966 		return -EINVAL;
1967 	*lenp -= left;
1968 	*ppos += *lenp;
1969 	return 0;
1970 #undef TMPBUFLEN
1971 }
1972 
1973 static int do_proc_doulongvec_minmax(ctl_table *table, int write,
1974 				     struct file *filp,
1975 				     void __user *buffer,
1976 				     size_t *lenp, loff_t *ppos,
1977 				     unsigned long convmul,
1978 				     unsigned long convdiv)
1979 {
1980 	return __do_proc_doulongvec_minmax(table->data, table, write,
1981 			filp, buffer, lenp, ppos, convmul, convdiv);
1982 }
1983 
1984 /**
1985  * proc_doulongvec_minmax - read a vector of long integers with min/max values
1986  * @table: the sysctl table
1987  * @write: %TRUE if this is a write to the sysctl file
1988  * @filp: the file structure
1989  * @buffer: the user buffer
1990  * @lenp: the size of the user buffer
1991  * @ppos: file position
1992  *
1993  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1994  * values from/to the user buffer, treated as an ASCII string.
1995  *
1996  * This routine will ensure the values are within the range specified by
1997  * table->extra1 (min) and table->extra2 (max).
1998  *
1999  * Returns 0 on success.
2000  */
2001 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2002 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2003 {
2004     return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
2005 }
2006 
2007 /**
2008  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2009  * @table: the sysctl table
2010  * @write: %TRUE if this is a write to the sysctl file
2011  * @filp: the file structure
2012  * @buffer: the user buffer
2013  * @lenp: the size of the user buffer
2014  * @ppos: file position
2015  *
2016  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2017  * values from/to the user buffer, treated as an ASCII string. The values
2018  * are treated as milliseconds, and converted to jiffies when they are stored.
2019  *
2020  * This routine will ensure the values are within the range specified by
2021  * table->extra1 (min) and table->extra2 (max).
2022  *
2023  * Returns 0 on success.
2024  */
2025 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2026 				      struct file *filp,
2027 				      void __user *buffer,
2028 				      size_t *lenp, loff_t *ppos)
2029 {
2030     return do_proc_doulongvec_minmax(table, write, filp, buffer,
2031 				     lenp, ppos, HZ, 1000l);
2032 }
2033 
2034 
2035 static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
2036 					 int *valp,
2037 					 int write, void *data)
2038 {
2039 	if (write) {
2040 		if (*lvalp > LONG_MAX / HZ)
2041 			return 1;
2042 		*valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2043 	} else {
2044 		int val = *valp;
2045 		unsigned long lval;
2046 		if (val < 0) {
2047 			*negp = -1;
2048 			lval = (unsigned long)-val;
2049 		} else {
2050 			*negp = 0;
2051 			lval = (unsigned long)val;
2052 		}
2053 		*lvalp = lval / HZ;
2054 	}
2055 	return 0;
2056 }
2057 
2058 static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
2059 						int *valp,
2060 						int write, void *data)
2061 {
2062 	if (write) {
2063 		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2064 			return 1;
2065 		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2066 	} else {
2067 		int val = *valp;
2068 		unsigned long lval;
2069 		if (val < 0) {
2070 			*negp = -1;
2071 			lval = (unsigned long)-val;
2072 		} else {
2073 			*negp = 0;
2074 			lval = (unsigned long)val;
2075 		}
2076 		*lvalp = jiffies_to_clock_t(lval);
2077 	}
2078 	return 0;
2079 }
2080 
2081 static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
2082 					    int *valp,
2083 					    int write, void *data)
2084 {
2085 	if (write) {
2086 		*valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2087 	} else {
2088 		int val = *valp;
2089 		unsigned long lval;
2090 		if (val < 0) {
2091 			*negp = -1;
2092 			lval = (unsigned long)-val;
2093 		} else {
2094 			*negp = 0;
2095 			lval = (unsigned long)val;
2096 		}
2097 		*lvalp = jiffies_to_msecs(lval);
2098 	}
2099 	return 0;
2100 }
2101 
2102 /**
2103  * proc_dointvec_jiffies - read a vector of integers as seconds
2104  * @table: the sysctl table
2105  * @write: %TRUE if this is a write to the sysctl file
2106  * @filp: the file structure
2107  * @buffer: the user buffer
2108  * @lenp: the size of the user buffer
2109  * @ppos: file position
2110  *
2111  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2112  * values from/to the user buffer, treated as an ASCII string.
2113  * The values read are assumed to be in seconds, and are converted into
2114  * jiffies.
2115  *
2116  * Returns 0 on success.
2117  */
2118 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2119 			  void __user *buffer, size_t *lenp, loff_t *ppos)
2120 {
2121     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2122 		    	    do_proc_dointvec_jiffies_conv,NULL);
2123 }
2124 
2125 /**
2126  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2127  * @table: the sysctl table
2128  * @write: %TRUE if this is a write to the sysctl file
2129  * @filp: the file structure
2130  * @buffer: the user buffer
2131  * @lenp: the size of the user buffer
2132  * @ppos: pointer to the file position
2133  *
2134  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2135  * values from/to the user buffer, treated as an ASCII string.
2136  * The values read are assumed to be in 1/USER_HZ seconds, and
2137  * are converted into jiffies.
2138  *
2139  * Returns 0 on success.
2140  */
2141 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2142 				 void __user *buffer, size_t *lenp, loff_t *ppos)
2143 {
2144     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2145 		    	    do_proc_dointvec_userhz_jiffies_conv,NULL);
2146 }
2147 
2148 /**
2149  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2150  * @table: the sysctl table
2151  * @write: %TRUE if this is a write to the sysctl file
2152  * @filp: the file structure
2153  * @buffer: the user buffer
2154  * @lenp: the size of the user buffer
2155  * @ppos: file position
2156  * @ppos: the current position in the file
2157  *
2158  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2159  * values from/to the user buffer, treated as an ASCII string.
2160  * The values read are assumed to be in 1/1000 seconds, and
2161  * are converted into jiffies.
2162  *
2163  * Returns 0 on success.
2164  */
2165 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2166 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2167 {
2168 	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2169 				do_proc_dointvec_ms_jiffies_conv, NULL);
2170 }
2171 
2172 static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
2173 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2174 {
2175 	struct pid *new_pid;
2176 	pid_t tmp;
2177 	int r;
2178 
2179 	tmp = pid_nr(cad_pid);
2180 
2181 	r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
2182 			       lenp, ppos, NULL, NULL);
2183 	if (r || !write)
2184 		return r;
2185 
2186 	new_pid = find_get_pid(tmp);
2187 	if (!new_pid)
2188 		return -ESRCH;
2189 
2190 	put_pid(xchg(&cad_pid, new_pid));
2191 	return 0;
2192 }
2193 
2194 #else /* CONFIG_PROC_FS */
2195 
2196 int proc_dostring(ctl_table *table, int write, struct file *filp,
2197 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2198 {
2199 	return -ENOSYS;
2200 }
2201 
2202 int proc_dointvec(ctl_table *table, int write, struct file *filp,
2203 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2204 {
2205 	return -ENOSYS;
2206 }
2207 
2208 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
2209 			void __user *buffer, size_t *lenp, loff_t *ppos)
2210 {
2211 	return -ENOSYS;
2212 }
2213 
2214 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
2215 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2216 {
2217 	return -ENOSYS;
2218 }
2219 
2220 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2221 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2222 {
2223 	return -ENOSYS;
2224 }
2225 
2226 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2227 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2228 {
2229 	return -ENOSYS;
2230 }
2231 
2232 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2233 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2234 {
2235 	return -ENOSYS;
2236 }
2237 
2238 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2239 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2240 {
2241 	return -ENOSYS;
2242 }
2243 
2244 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2245 				      struct file *filp,
2246 				      void __user *buffer,
2247 				      size_t *lenp, loff_t *ppos)
2248 {
2249     return -ENOSYS;
2250 }
2251 
2252 
2253 #endif /* CONFIG_PROC_FS */
2254 
2255 
2256 #ifdef CONFIG_SYSCTL_SYSCALL
2257 /*
2258  * General sysctl support routines
2259  */
2260 
2261 /* The generic string strategy routine: */
2262 int sysctl_string(ctl_table *table, int __user *name, int nlen,
2263 		  void __user *oldval, size_t __user *oldlenp,
2264 		  void __user *newval, size_t newlen)
2265 {
2266 	if (!table->data || !table->maxlen)
2267 		return -ENOTDIR;
2268 
2269 	if (oldval && oldlenp) {
2270 		size_t bufsize;
2271 		if (get_user(bufsize, oldlenp))
2272 			return -EFAULT;
2273 		if (bufsize) {
2274 			size_t len = strlen(table->data), copied;
2275 
2276 			/* This shouldn't trigger for a well-formed sysctl */
2277 			if (len > table->maxlen)
2278 				len = table->maxlen;
2279 
2280 			/* Copy up to a max of bufsize-1 bytes of the string */
2281 			copied = (len >= bufsize) ? bufsize - 1 : len;
2282 
2283 			if (copy_to_user(oldval, table->data, copied) ||
2284 			    put_user(0, (char __user *)(oldval + copied)))
2285 				return -EFAULT;
2286 			if (put_user(len, oldlenp))
2287 				return -EFAULT;
2288 		}
2289 	}
2290 	if (newval && newlen) {
2291 		size_t len = newlen;
2292 		if (len > table->maxlen)
2293 			len = table->maxlen;
2294 		if(copy_from_user(table->data, newval, len))
2295 			return -EFAULT;
2296 		if (len == table->maxlen)
2297 			len--;
2298 		((char *) table->data)[len] = 0;
2299 	}
2300 	return 1;
2301 }
2302 
2303 /*
2304  * This function makes sure that all of the integers in the vector
2305  * are between the minimum and maximum values given in the arrays
2306  * table->extra1 and table->extra2, respectively.
2307  */
2308 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2309 		void __user *oldval, size_t __user *oldlenp,
2310 		void __user *newval, size_t newlen)
2311 {
2312 
2313 	if (newval && newlen) {
2314 		int __user *vec = (int __user *) newval;
2315 		int *min = (int *) table->extra1;
2316 		int *max = (int *) table->extra2;
2317 		size_t length;
2318 		int i;
2319 
2320 		if (newlen % sizeof(int) != 0)
2321 			return -EINVAL;
2322 
2323 		if (!table->extra1 && !table->extra2)
2324 			return 0;
2325 
2326 		if (newlen > table->maxlen)
2327 			newlen = table->maxlen;
2328 		length = newlen / sizeof(int);
2329 
2330 		for (i = 0; i < length; i++) {
2331 			int value;
2332 			if (get_user(value, vec + i))
2333 				return -EFAULT;
2334 			if (min && value < min[i])
2335 				return -EINVAL;
2336 			if (max && value > max[i])
2337 				return -EINVAL;
2338 		}
2339 	}
2340 	return 0;
2341 }
2342 
2343 /* Strategy function to convert jiffies to seconds */
2344 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2345 		void __user *oldval, size_t __user *oldlenp,
2346 		void __user *newval, size_t newlen)
2347 {
2348 	if (oldval && oldlenp) {
2349 		size_t olen;
2350 
2351 		if (get_user(olen, oldlenp))
2352 			return -EFAULT;
2353 		if (olen) {
2354 			int val;
2355 
2356 			if (olen < sizeof(int))
2357 				return -EINVAL;
2358 
2359 			val = *(int *)(table->data) / HZ;
2360 			if (put_user(val, (int __user *)oldval))
2361 				return -EFAULT;
2362 			if (put_user(sizeof(int), oldlenp))
2363 				return -EFAULT;
2364 		}
2365 	}
2366 	if (newval && newlen) {
2367 		int new;
2368 		if (newlen != sizeof(int))
2369 			return -EINVAL;
2370 		if (get_user(new, (int __user *)newval))
2371 			return -EFAULT;
2372 		*(int *)(table->data) = new*HZ;
2373 	}
2374 	return 1;
2375 }
2376 
2377 /* Strategy function to convert jiffies to seconds */
2378 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2379 		void __user *oldval, size_t __user *oldlenp,
2380 		void __user *newval, size_t newlen)
2381 {
2382 	if (oldval && oldlenp) {
2383 		size_t olen;
2384 
2385 		if (get_user(olen, oldlenp))
2386 			return -EFAULT;
2387 		if (olen) {
2388 			int val;
2389 
2390 			if (olen < sizeof(int))
2391 				return -EINVAL;
2392 
2393 			val = jiffies_to_msecs(*(int *)(table->data));
2394 			if (put_user(val, (int __user *)oldval))
2395 				return -EFAULT;
2396 			if (put_user(sizeof(int), oldlenp))
2397 				return -EFAULT;
2398 		}
2399 	}
2400 	if (newval && newlen) {
2401 		int new;
2402 		if (newlen != sizeof(int))
2403 			return -EINVAL;
2404 		if (get_user(new, (int __user *)newval))
2405 			return -EFAULT;
2406 		*(int *)(table->data) = msecs_to_jiffies(new);
2407 	}
2408 	return 1;
2409 }
2410 
2411 
2412 
2413 #else /* CONFIG_SYSCTL_SYSCALL */
2414 
2415 
2416 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
2417 {
2418 	static int msg_count;
2419 	struct __sysctl_args tmp;
2420 	int name[CTL_MAXNAME];
2421 	int i;
2422 
2423 	/* Read in the sysctl name for better debug message logging */
2424 	if (copy_from_user(&tmp, args, sizeof(tmp)))
2425 		return -EFAULT;
2426 	if (tmp.nlen <= 0 || tmp.nlen >= CTL_MAXNAME)
2427 		return -ENOTDIR;
2428 	for (i = 0; i < tmp.nlen; i++)
2429 		if (get_user(name[i], tmp.name + i))
2430 			return -EFAULT;
2431 
2432 	/* Ignore accesses to kernel.version */
2433 	if ((tmp.nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION))
2434 		goto out;
2435 
2436 	if (msg_count < 5) {
2437 		msg_count++;
2438 		printk(KERN_INFO
2439 			"warning: process `%s' used the removed sysctl "
2440 			"system call with ", current->comm);
2441 		for (i = 0; i < tmp.nlen; i++)
2442 			printk("%d.", name[i]);
2443 		printk("\n");
2444 	}
2445 out:
2446 	return -ENOSYS;
2447 }
2448 
2449 int sysctl_string(ctl_table *table, int __user *name, int nlen,
2450 		  void __user *oldval, size_t __user *oldlenp,
2451 		  void __user *newval, size_t newlen)
2452 {
2453 	return -ENOSYS;
2454 }
2455 
2456 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2457 		void __user *oldval, size_t __user *oldlenp,
2458 		void __user *newval, size_t newlen)
2459 {
2460 	return -ENOSYS;
2461 }
2462 
2463 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2464 		void __user *oldval, size_t __user *oldlenp,
2465 		void __user *newval, size_t newlen)
2466 {
2467 	return -ENOSYS;
2468 }
2469 
2470 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2471 		void __user *oldval, size_t __user *oldlenp,
2472 		void __user *newval, size_t newlen)
2473 {
2474 	return -ENOSYS;
2475 }
2476 
2477 #endif /* CONFIG_SYSCTL_SYSCALL */
2478 
2479 /*
2480  * No sense putting this after each symbol definition, twice,
2481  * exception granted :-)
2482  */
2483 EXPORT_SYMBOL(proc_dointvec);
2484 EXPORT_SYMBOL(proc_dointvec_jiffies);
2485 EXPORT_SYMBOL(proc_dointvec_minmax);
2486 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2487 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2488 EXPORT_SYMBOL(proc_dostring);
2489 EXPORT_SYMBOL(proc_doulongvec_minmax);
2490 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2491 EXPORT_SYMBOL(register_sysctl_table);
2492 EXPORT_SYMBOL(sysctl_intvec);
2493 EXPORT_SYMBOL(sysctl_jiffies);
2494 EXPORT_SYMBOL(sysctl_ms_jiffies);
2495 EXPORT_SYMBOL(sysctl_string);
2496 EXPORT_SYMBOL(unregister_sysctl_table);
2497