xref: /linux-6.15/kernel/sysctl.c (revision 757dea93)
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20 
21 #include <linux/module.h>
22 #include <linux/mm.h>
23 #include <linux/swap.h>
24 #include <linux/slab.h>
25 #include <linux/sysctl.h>
26 #include <linux/proc_fs.h>
27 #include <linux/capability.h>
28 #include <linux/ctype.h>
29 #include <linux/utsname.h>
30 #include <linux/capability.h>
31 #include <linux/smp_lock.h>
32 #include <linux/init.h>
33 #include <linux/kernel.h>
34 #include <linux/kobject.h>
35 #include <linux/net.h>
36 #include <linux/sysrq.h>
37 #include <linux/highuid.h>
38 #include <linux/writeback.h>
39 #include <linux/hugetlb.h>
40 #include <linux/security.h>
41 #include <linux/initrd.h>
42 #include <linux/times.h>
43 #include <linux/limits.h>
44 #include <linux/dcache.h>
45 #include <linux/syscalls.h>
46 #include <linux/nfs_fs.h>
47 #include <linux/acpi.h>
48 
49 #include <asm/uaccess.h>
50 #include <asm/processor.h>
51 
52 extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
53                      void __user *buffer, size_t *lenp, loff_t *ppos);
54 
55 #ifdef CONFIG_X86
56 #include <asm/nmi.h>
57 #include <asm/stacktrace.h>
58 #endif
59 
60 #if defined(CONFIG_SYSCTL)
61 
62 /* External variables not in a header file. */
63 extern int C_A_D;
64 extern int sysctl_overcommit_memory;
65 extern int sysctl_overcommit_ratio;
66 extern int sysctl_panic_on_oom;
67 extern int max_threads;
68 extern int core_uses_pid;
69 extern int suid_dumpable;
70 extern char core_pattern[];
71 extern int pid_max;
72 extern int min_free_kbytes;
73 extern int printk_ratelimit_jiffies;
74 extern int printk_ratelimit_burst;
75 extern int pid_max_min, pid_max_max;
76 extern int sysctl_drop_caches;
77 extern int percpu_pagelist_fraction;
78 extern int compat_log;
79 extern int maps_protect;
80 
81 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
82 static int maxolduid = 65535;
83 static int minolduid;
84 static int min_percpu_pagelist_fract = 8;
85 
86 static int ngroups_max = NGROUPS_MAX;
87 
88 #ifdef CONFIG_KMOD
89 extern char modprobe_path[];
90 #endif
91 #ifdef CONFIG_CHR_DEV_SG
92 extern int sg_big_buff;
93 #endif
94 
95 #ifdef __sparc__
96 extern char reboot_command [];
97 extern int stop_a_enabled;
98 extern int scons_pwroff;
99 #endif
100 
101 #ifdef __hppa__
102 extern int pwrsw_enabled;
103 extern int unaligned_enabled;
104 #endif
105 
106 #ifdef CONFIG_S390
107 #ifdef CONFIG_MATHEMU
108 extern int sysctl_ieee_emulation_warnings;
109 #endif
110 extern int sysctl_userprocess_debug;
111 extern int spin_retry;
112 #endif
113 
114 extern int sysctl_hz_timer;
115 
116 #ifdef CONFIG_BSD_PROCESS_ACCT
117 extern int acct_parm[];
118 #endif
119 
120 #ifdef CONFIG_IA64
121 extern int no_unaligned_warning;
122 #endif
123 
124 #ifdef CONFIG_RT_MUTEXES
125 extern int max_lock_depth;
126 #endif
127 
128 #ifdef CONFIG_SYSCTL_SYSCALL
129 static int parse_table(int __user *, int, void __user *, size_t __user *,
130 		void __user *, size_t, ctl_table *);
131 #endif
132 
133 
134 #ifdef CONFIG_PROC_SYSCTL
135 static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
136 		  void __user *buffer, size_t *lenp, loff_t *ppos);
137 static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp,
138 			       void __user *buffer, size_t *lenp, loff_t *ppos);
139 #endif
140 
141 static ctl_table root_table[];
142 static struct ctl_table_header root_table_header =
143 	{ root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) };
144 
145 static ctl_table kern_table[];
146 static ctl_table vm_table[];
147 static ctl_table fs_table[];
148 static ctl_table debug_table[];
149 static ctl_table dev_table[];
150 extern ctl_table random_table[];
151 #ifdef CONFIG_UNIX98_PTYS
152 extern ctl_table pty_table[];
153 #endif
154 #ifdef CONFIG_INOTIFY_USER
155 extern ctl_table inotify_table[];
156 #endif
157 
158 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
159 int sysctl_legacy_va_layout;
160 #endif
161 
162 
163 /* The default sysctl tables: */
164 
165 static ctl_table root_table[] = {
166 	{
167 		.ctl_name	= CTL_KERN,
168 		.procname	= "kernel",
169 		.mode		= 0555,
170 		.child		= kern_table,
171 	},
172 	{
173 		.ctl_name	= CTL_VM,
174 		.procname	= "vm",
175 		.mode		= 0555,
176 		.child		= vm_table,
177 	},
178 #ifdef CONFIG_NET
179 	{
180 		.ctl_name	= CTL_NET,
181 		.procname	= "net",
182 		.mode		= 0555,
183 		.child		= net_table,
184 	},
185 #endif
186 	{
187 		.ctl_name	= CTL_FS,
188 		.procname	= "fs",
189 		.mode		= 0555,
190 		.child		= fs_table,
191 	},
192 	{
193 		.ctl_name	= CTL_DEBUG,
194 		.procname	= "debug",
195 		.mode		= 0555,
196 		.child		= debug_table,
197 	},
198 	{
199 		.ctl_name	= CTL_DEV,
200 		.procname	= "dev",
201 		.mode		= 0555,
202 		.child		= dev_table,
203 	},
204 
205 	{ .ctl_name = 0 }
206 };
207 
208 static ctl_table kern_table[] = {
209 	{
210 		.ctl_name	= KERN_PANIC,
211 		.procname	= "panic",
212 		.data		= &panic_timeout,
213 		.maxlen		= sizeof(int),
214 		.mode		= 0644,
215 		.proc_handler	= &proc_dointvec,
216 	},
217 	{
218 		.ctl_name	= KERN_CORE_USES_PID,
219 		.procname	= "core_uses_pid",
220 		.data		= &core_uses_pid,
221 		.maxlen		= sizeof(int),
222 		.mode		= 0644,
223 		.proc_handler	= &proc_dointvec,
224 	},
225 	{
226 		.ctl_name	= KERN_CORE_PATTERN,
227 		.procname	= "core_pattern",
228 		.data		= core_pattern,
229 		.maxlen		= 128,
230 		.mode		= 0644,
231 		.proc_handler	= &proc_dostring,
232 		.strategy	= &sysctl_string,
233 	},
234 #ifdef CONFIG_PROC_SYSCTL
235 	{
236 		.ctl_name	= KERN_TAINTED,
237 		.procname	= "tainted",
238 		.data		= &tainted,
239 		.maxlen		= sizeof(int),
240 		.mode		= 0644,
241 		.proc_handler	= &proc_dointvec_taint,
242 	},
243 #endif
244 	{
245 		.ctl_name	= KERN_CAP_BSET,
246 		.procname	= "cap-bound",
247 		.data		= &cap_bset,
248 		.maxlen		= sizeof(kernel_cap_t),
249 		.mode		= 0600,
250 		.proc_handler	= &proc_dointvec_bset,
251 	},
252 #ifdef CONFIG_BLK_DEV_INITRD
253 	{
254 		.ctl_name	= KERN_REALROOTDEV,
255 		.procname	= "real-root-dev",
256 		.data		= &real_root_dev,
257 		.maxlen		= sizeof(int),
258 		.mode		= 0644,
259 		.proc_handler	= &proc_dointvec,
260 	},
261 #endif
262 #ifdef __sparc__
263 	{
264 		.ctl_name	= KERN_SPARC_REBOOT,
265 		.procname	= "reboot-cmd",
266 		.data		= reboot_command,
267 		.maxlen		= 256,
268 		.mode		= 0644,
269 		.proc_handler	= &proc_dostring,
270 		.strategy	= &sysctl_string,
271 	},
272 	{
273 		.ctl_name	= KERN_SPARC_STOP_A,
274 		.procname	= "stop-a",
275 		.data		= &stop_a_enabled,
276 		.maxlen		= sizeof (int),
277 		.mode		= 0644,
278 		.proc_handler	= &proc_dointvec,
279 	},
280 	{
281 		.ctl_name	= KERN_SPARC_SCONS_PWROFF,
282 		.procname	= "scons-poweroff",
283 		.data		= &scons_pwroff,
284 		.maxlen		= sizeof (int),
285 		.mode		= 0644,
286 		.proc_handler	= &proc_dointvec,
287 	},
288 #endif
289 #ifdef __hppa__
290 	{
291 		.ctl_name	= KERN_HPPA_PWRSW,
292 		.procname	= "soft-power",
293 		.data		= &pwrsw_enabled,
294 		.maxlen		= sizeof (int),
295 	 	.mode		= 0644,
296 		.proc_handler	= &proc_dointvec,
297 	},
298 	{
299 		.ctl_name	= KERN_HPPA_UNALIGNED,
300 		.procname	= "unaligned-trap",
301 		.data		= &unaligned_enabled,
302 		.maxlen		= sizeof (int),
303 		.mode		= 0644,
304 		.proc_handler	= &proc_dointvec,
305 	},
306 #endif
307 	{
308 		.ctl_name	= KERN_CTLALTDEL,
309 		.procname	= "ctrl-alt-del",
310 		.data		= &C_A_D,
311 		.maxlen		= sizeof(int),
312 		.mode		= 0644,
313 		.proc_handler	= &proc_dointvec,
314 	},
315 	{
316 		.ctl_name	= KERN_PRINTK,
317 		.procname	= "printk",
318 		.data		= &console_loglevel,
319 		.maxlen		= 4*sizeof(int),
320 		.mode		= 0644,
321 		.proc_handler	= &proc_dointvec,
322 	},
323 #ifdef CONFIG_KMOD
324 	{
325 		.ctl_name	= KERN_MODPROBE,
326 		.procname	= "modprobe",
327 		.data		= &modprobe_path,
328 		.maxlen		= KMOD_PATH_LEN,
329 		.mode		= 0644,
330 		.proc_handler	= &proc_dostring,
331 		.strategy	= &sysctl_string,
332 	},
333 #endif
334 #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
335 	{
336 		.ctl_name	= KERN_HOTPLUG,
337 		.procname	= "hotplug",
338 		.data		= &uevent_helper,
339 		.maxlen		= UEVENT_HELPER_PATH_LEN,
340 		.mode		= 0644,
341 		.proc_handler	= &proc_dostring,
342 		.strategy	= &sysctl_string,
343 	},
344 #endif
345 #ifdef CONFIG_CHR_DEV_SG
346 	{
347 		.ctl_name	= KERN_SG_BIG_BUFF,
348 		.procname	= "sg-big-buff",
349 		.data		= &sg_big_buff,
350 		.maxlen		= sizeof (int),
351 		.mode		= 0444,
352 		.proc_handler	= &proc_dointvec,
353 	},
354 #endif
355 #ifdef CONFIG_BSD_PROCESS_ACCT
356 	{
357 		.ctl_name	= KERN_ACCT,
358 		.procname	= "acct",
359 		.data		= &acct_parm,
360 		.maxlen		= 3*sizeof(int),
361 		.mode		= 0644,
362 		.proc_handler	= &proc_dointvec,
363 	},
364 #endif
365 #ifdef CONFIG_MAGIC_SYSRQ
366 	{
367 		.ctl_name	= KERN_SYSRQ,
368 		.procname	= "sysrq",
369 		.data		= &__sysrq_enabled,
370 		.maxlen		= sizeof (int),
371 		.mode		= 0644,
372 		.proc_handler	= &proc_dointvec,
373 	},
374 #endif
375 #ifdef CONFIG_PROC_SYSCTL
376 	{
377 		.ctl_name	= KERN_CADPID,
378 		.procname	= "cad_pid",
379 		.data		= NULL,
380 		.maxlen		= sizeof (int),
381 		.mode		= 0600,
382 		.proc_handler	= &proc_do_cad_pid,
383 	},
384 #endif
385 	{
386 		.ctl_name	= KERN_MAX_THREADS,
387 		.procname	= "threads-max",
388 		.data		= &max_threads,
389 		.maxlen		= sizeof(int),
390 		.mode		= 0644,
391 		.proc_handler	= &proc_dointvec,
392 	},
393 	{
394 		.ctl_name	= KERN_RANDOM,
395 		.procname	= "random",
396 		.mode		= 0555,
397 		.child		= random_table,
398 	},
399 #ifdef CONFIG_UNIX98_PTYS
400 	{
401 		.ctl_name	= KERN_PTY,
402 		.procname	= "pty",
403 		.mode		= 0555,
404 		.child		= pty_table,
405 	},
406 #endif
407 	{
408 		.ctl_name	= KERN_OVERFLOWUID,
409 		.procname	= "overflowuid",
410 		.data		= &overflowuid,
411 		.maxlen		= sizeof(int),
412 		.mode		= 0644,
413 		.proc_handler	= &proc_dointvec_minmax,
414 		.strategy	= &sysctl_intvec,
415 		.extra1		= &minolduid,
416 		.extra2		= &maxolduid,
417 	},
418 	{
419 		.ctl_name	= KERN_OVERFLOWGID,
420 		.procname	= "overflowgid",
421 		.data		= &overflowgid,
422 		.maxlen		= sizeof(int),
423 		.mode		= 0644,
424 		.proc_handler	= &proc_dointvec_minmax,
425 		.strategy	= &sysctl_intvec,
426 		.extra1		= &minolduid,
427 		.extra2		= &maxolduid,
428 	},
429 #ifdef CONFIG_S390
430 #ifdef CONFIG_MATHEMU
431 	{
432 		.ctl_name	= KERN_IEEE_EMULATION_WARNINGS,
433 		.procname	= "ieee_emulation_warnings",
434 		.data		= &sysctl_ieee_emulation_warnings,
435 		.maxlen		= sizeof(int),
436 		.mode		= 0644,
437 		.proc_handler	= &proc_dointvec,
438 	},
439 #endif
440 #ifdef CONFIG_NO_IDLE_HZ
441 	{
442 		.ctl_name       = KERN_HZ_TIMER,
443 		.procname       = "hz_timer",
444 		.data           = &sysctl_hz_timer,
445 		.maxlen         = sizeof(int),
446 		.mode           = 0644,
447 		.proc_handler   = &proc_dointvec,
448 	},
449 #endif
450 	{
451 		.ctl_name	= KERN_S390_USER_DEBUG_LOGGING,
452 		.procname	= "userprocess_debug",
453 		.data		= &sysctl_userprocess_debug,
454 		.maxlen		= sizeof(int),
455 		.mode		= 0644,
456 		.proc_handler	= &proc_dointvec,
457 	},
458 #endif
459 	{
460 		.ctl_name	= KERN_PIDMAX,
461 		.procname	= "pid_max",
462 		.data		= &pid_max,
463 		.maxlen		= sizeof (int),
464 		.mode		= 0644,
465 		.proc_handler	= &proc_dointvec_minmax,
466 		.strategy	= sysctl_intvec,
467 		.extra1		= &pid_max_min,
468 		.extra2		= &pid_max_max,
469 	},
470 	{
471 		.ctl_name	= KERN_PANIC_ON_OOPS,
472 		.procname	= "panic_on_oops",
473 		.data		= &panic_on_oops,
474 		.maxlen		= sizeof(int),
475 		.mode		= 0644,
476 		.proc_handler	= &proc_dointvec,
477 	},
478 	{
479 		.ctl_name	= KERN_PRINTK_RATELIMIT,
480 		.procname	= "printk_ratelimit",
481 		.data		= &printk_ratelimit_jiffies,
482 		.maxlen		= sizeof(int),
483 		.mode		= 0644,
484 		.proc_handler	= &proc_dointvec_jiffies,
485 		.strategy	= &sysctl_jiffies,
486 	},
487 	{
488 		.ctl_name	= KERN_PRINTK_RATELIMIT_BURST,
489 		.procname	= "printk_ratelimit_burst",
490 		.data		= &printk_ratelimit_burst,
491 		.maxlen		= sizeof(int),
492 		.mode		= 0644,
493 		.proc_handler	= &proc_dointvec,
494 	},
495 	{
496 		.ctl_name	= KERN_NGROUPS_MAX,
497 		.procname	= "ngroups_max",
498 		.data		= &ngroups_max,
499 		.maxlen		= sizeof (int),
500 		.mode		= 0444,
501 		.proc_handler	= &proc_dointvec,
502 	},
503 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
504 	{
505 		.ctl_name       = KERN_UNKNOWN_NMI_PANIC,
506 		.procname       = "unknown_nmi_panic",
507 		.data           = &unknown_nmi_panic,
508 		.maxlen         = sizeof (int),
509 		.mode           = 0644,
510 		.proc_handler   = &proc_dointvec,
511 	},
512 	{
513 		.ctl_name       = KERN_NMI_WATCHDOG,
514 		.procname       = "nmi_watchdog",
515 		.data           = &nmi_watchdog_enabled,
516 		.maxlen         = sizeof (int),
517 		.mode           = 0644,
518 		.proc_handler   = &proc_nmi_enabled,
519 	},
520 #endif
521 #if defined(CONFIG_X86)
522 	{
523 		.ctl_name	= KERN_PANIC_ON_NMI,
524 		.procname	= "panic_on_unrecovered_nmi",
525 		.data		= &panic_on_unrecovered_nmi,
526 		.maxlen		= sizeof(int),
527 		.mode		= 0644,
528 		.proc_handler	= &proc_dointvec,
529 	},
530 	{
531 		.ctl_name	= KERN_BOOTLOADER_TYPE,
532 		.procname	= "bootloader_type",
533 		.data		= &bootloader_type,
534 		.maxlen		= sizeof (int),
535 		.mode		= 0444,
536 		.proc_handler	= &proc_dointvec,
537 	},
538 	{
539 		.ctl_name	= CTL_UNNUMBERED,
540 		.procname	= "kstack_depth_to_print",
541 		.data		= &kstack_depth_to_print,
542 		.maxlen		= sizeof(int),
543 		.mode		= 0644,
544 		.proc_handler	= &proc_dointvec,
545 	},
546 #endif
547 #if defined(CONFIG_MMU)
548 	{
549 		.ctl_name	= KERN_RANDOMIZE,
550 		.procname	= "randomize_va_space",
551 		.data		= &randomize_va_space,
552 		.maxlen		= sizeof(int),
553 		.mode		= 0644,
554 		.proc_handler	= &proc_dointvec,
555 	},
556 #endif
557 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
558 	{
559 		.ctl_name	= KERN_SPIN_RETRY,
560 		.procname	= "spin_retry",
561 		.data		= &spin_retry,
562 		.maxlen		= sizeof (int),
563 		.mode		= 0644,
564 		.proc_handler	= &proc_dointvec,
565 	},
566 #endif
567 #ifdef CONFIG_ACPI_SLEEP
568 	{
569 		.ctl_name	= KERN_ACPI_VIDEO_FLAGS,
570 		.procname	= "acpi_video_flags",
571 		.data		= &acpi_video_flags,
572 		.maxlen		= sizeof (unsigned long),
573 		.mode		= 0644,
574 		.proc_handler	= &proc_doulongvec_minmax,
575 	},
576 #endif
577 #ifdef CONFIG_IA64
578 	{
579 		.ctl_name	= KERN_IA64_UNALIGNED,
580 		.procname	= "ignore-unaligned-usertrap",
581 		.data		= &no_unaligned_warning,
582 		.maxlen		= sizeof (int),
583 	 	.mode		= 0644,
584 		.proc_handler	= &proc_dointvec,
585 	},
586 #endif
587 #ifdef CONFIG_COMPAT
588 	{
589 		.ctl_name	= KERN_COMPAT_LOG,
590 		.procname	= "compat-log",
591 		.data		= &compat_log,
592 		.maxlen		= sizeof (int),
593 	 	.mode		= 0644,
594 		.proc_handler	= &proc_dointvec,
595 	},
596 #endif
597 #ifdef CONFIG_RT_MUTEXES
598 	{
599 		.ctl_name	= KERN_MAX_LOCK_DEPTH,
600 		.procname	= "max_lock_depth",
601 		.data		= &max_lock_depth,
602 		.maxlen		= sizeof(int),
603 		.mode		= 0644,
604 		.proc_handler	= &proc_dointvec,
605 	},
606 #endif
607 #ifdef CONFIG_PROC_FS
608 	{
609 		.ctl_name       = CTL_UNNUMBERED,
610 		.procname       = "maps_protect",
611 		.data           = &maps_protect,
612 		.maxlen         = sizeof(int),
613 		.mode           = 0644,
614 		.proc_handler   = &proc_dointvec,
615 	},
616 #endif
617 
618 	{ .ctl_name = 0 }
619 };
620 
621 /* Constants for minimum and maximum testing in vm_table.
622    We use these as one-element integer vectors. */
623 static int zero;
624 static int one_hundred = 100;
625 
626 
627 static ctl_table vm_table[] = {
628 	{
629 		.ctl_name	= VM_OVERCOMMIT_MEMORY,
630 		.procname	= "overcommit_memory",
631 		.data		= &sysctl_overcommit_memory,
632 		.maxlen		= sizeof(sysctl_overcommit_memory),
633 		.mode		= 0644,
634 		.proc_handler	= &proc_dointvec,
635 	},
636 	{
637 		.ctl_name	= VM_PANIC_ON_OOM,
638 		.procname	= "panic_on_oom",
639 		.data		= &sysctl_panic_on_oom,
640 		.maxlen		= sizeof(sysctl_panic_on_oom),
641 		.mode		= 0644,
642 		.proc_handler	= &proc_dointvec,
643 	},
644 	{
645 		.ctl_name	= VM_OVERCOMMIT_RATIO,
646 		.procname	= "overcommit_ratio",
647 		.data		= &sysctl_overcommit_ratio,
648 		.maxlen		= sizeof(sysctl_overcommit_ratio),
649 		.mode		= 0644,
650 		.proc_handler	= &proc_dointvec,
651 	},
652 	{
653 		.ctl_name	= VM_PAGE_CLUSTER,
654 		.procname	= "page-cluster",
655 		.data		= &page_cluster,
656 		.maxlen		= sizeof(int),
657 		.mode		= 0644,
658 		.proc_handler	= &proc_dointvec,
659 	},
660 	{
661 		.ctl_name	= VM_DIRTY_BACKGROUND,
662 		.procname	= "dirty_background_ratio",
663 		.data		= &dirty_background_ratio,
664 		.maxlen		= sizeof(dirty_background_ratio),
665 		.mode		= 0644,
666 		.proc_handler	= &proc_dointvec_minmax,
667 		.strategy	= &sysctl_intvec,
668 		.extra1		= &zero,
669 		.extra2		= &one_hundred,
670 	},
671 	{
672 		.ctl_name	= VM_DIRTY_RATIO,
673 		.procname	= "dirty_ratio",
674 		.data		= &vm_dirty_ratio,
675 		.maxlen		= sizeof(vm_dirty_ratio),
676 		.mode		= 0644,
677 		.proc_handler	= &proc_dointvec_minmax,
678 		.strategy	= &sysctl_intvec,
679 		.extra1		= &zero,
680 		.extra2		= &one_hundred,
681 	},
682 	{
683 		.ctl_name	= VM_DIRTY_WB_CS,
684 		.procname	= "dirty_writeback_centisecs",
685 		.data		= &dirty_writeback_interval,
686 		.maxlen		= sizeof(dirty_writeback_interval),
687 		.mode		= 0644,
688 		.proc_handler	= &dirty_writeback_centisecs_handler,
689 	},
690 	{
691 		.ctl_name	= VM_DIRTY_EXPIRE_CS,
692 		.procname	= "dirty_expire_centisecs",
693 		.data		= &dirty_expire_interval,
694 		.maxlen		= sizeof(dirty_expire_interval),
695 		.mode		= 0644,
696 		.proc_handler	= &proc_dointvec_userhz_jiffies,
697 	},
698 	{
699 		.ctl_name	= VM_NR_PDFLUSH_THREADS,
700 		.procname	= "nr_pdflush_threads",
701 		.data		= &nr_pdflush_threads,
702 		.maxlen		= sizeof nr_pdflush_threads,
703 		.mode		= 0444 /* read-only*/,
704 		.proc_handler	= &proc_dointvec,
705 	},
706 	{
707 		.ctl_name	= VM_SWAPPINESS,
708 		.procname	= "swappiness",
709 		.data		= &vm_swappiness,
710 		.maxlen		= sizeof(vm_swappiness),
711 		.mode		= 0644,
712 		.proc_handler	= &proc_dointvec_minmax,
713 		.strategy	= &sysctl_intvec,
714 		.extra1		= &zero,
715 		.extra2		= &one_hundred,
716 	},
717 #ifdef CONFIG_HUGETLB_PAGE
718 	 {
719 		.ctl_name	= VM_HUGETLB_PAGES,
720 		.procname	= "nr_hugepages",
721 		.data		= &max_huge_pages,
722 		.maxlen		= sizeof(unsigned long),
723 		.mode		= 0644,
724 		.proc_handler	= &hugetlb_sysctl_handler,
725 		.extra1		= (void *)&hugetlb_zero,
726 		.extra2		= (void *)&hugetlb_infinity,
727 	 },
728 	 {
729 		.ctl_name	= VM_HUGETLB_GROUP,
730 		.procname	= "hugetlb_shm_group",
731 		.data		= &sysctl_hugetlb_shm_group,
732 		.maxlen		= sizeof(gid_t),
733 		.mode		= 0644,
734 		.proc_handler	= &proc_dointvec,
735 	 },
736 #endif
737 	{
738 		.ctl_name	= VM_LOWMEM_RESERVE_RATIO,
739 		.procname	= "lowmem_reserve_ratio",
740 		.data		= &sysctl_lowmem_reserve_ratio,
741 		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
742 		.mode		= 0644,
743 		.proc_handler	= &lowmem_reserve_ratio_sysctl_handler,
744 		.strategy	= &sysctl_intvec,
745 	},
746 	{
747 		.ctl_name	= VM_DROP_PAGECACHE,
748 		.procname	= "drop_caches",
749 		.data		= &sysctl_drop_caches,
750 		.maxlen		= sizeof(int),
751 		.mode		= 0644,
752 		.proc_handler	= drop_caches_sysctl_handler,
753 		.strategy	= &sysctl_intvec,
754 	},
755 	{
756 		.ctl_name	= VM_MIN_FREE_KBYTES,
757 		.procname	= "min_free_kbytes",
758 		.data		= &min_free_kbytes,
759 		.maxlen		= sizeof(min_free_kbytes),
760 		.mode		= 0644,
761 		.proc_handler	= &min_free_kbytes_sysctl_handler,
762 		.strategy	= &sysctl_intvec,
763 		.extra1		= &zero,
764 	},
765 	{
766 		.ctl_name	= VM_PERCPU_PAGELIST_FRACTION,
767 		.procname	= "percpu_pagelist_fraction",
768 		.data		= &percpu_pagelist_fraction,
769 		.maxlen		= sizeof(percpu_pagelist_fraction),
770 		.mode		= 0644,
771 		.proc_handler	= &percpu_pagelist_fraction_sysctl_handler,
772 		.strategy	= &sysctl_intvec,
773 		.extra1		= &min_percpu_pagelist_fract,
774 	},
775 #ifdef CONFIG_MMU
776 	{
777 		.ctl_name	= VM_MAX_MAP_COUNT,
778 		.procname	= "max_map_count",
779 		.data		= &sysctl_max_map_count,
780 		.maxlen		= sizeof(sysctl_max_map_count),
781 		.mode		= 0644,
782 		.proc_handler	= &proc_dointvec
783 	},
784 #endif
785 	{
786 		.ctl_name	= VM_LAPTOP_MODE,
787 		.procname	= "laptop_mode",
788 		.data		= &laptop_mode,
789 		.maxlen		= sizeof(laptop_mode),
790 		.mode		= 0644,
791 		.proc_handler	= &proc_dointvec_jiffies,
792 		.strategy	= &sysctl_jiffies,
793 	},
794 	{
795 		.ctl_name	= VM_BLOCK_DUMP,
796 		.procname	= "block_dump",
797 		.data		= &block_dump,
798 		.maxlen		= sizeof(block_dump),
799 		.mode		= 0644,
800 		.proc_handler	= &proc_dointvec,
801 		.strategy	= &sysctl_intvec,
802 		.extra1		= &zero,
803 	},
804 	{
805 		.ctl_name	= VM_VFS_CACHE_PRESSURE,
806 		.procname	= "vfs_cache_pressure",
807 		.data		= &sysctl_vfs_cache_pressure,
808 		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
809 		.mode		= 0644,
810 		.proc_handler	= &proc_dointvec,
811 		.strategy	= &sysctl_intvec,
812 		.extra1		= &zero,
813 	},
814 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
815 	{
816 		.ctl_name	= VM_LEGACY_VA_LAYOUT,
817 		.procname	= "legacy_va_layout",
818 		.data		= &sysctl_legacy_va_layout,
819 		.maxlen		= sizeof(sysctl_legacy_va_layout),
820 		.mode		= 0644,
821 		.proc_handler	= &proc_dointvec,
822 		.strategy	= &sysctl_intvec,
823 		.extra1		= &zero,
824 	},
825 #endif
826 #ifdef CONFIG_NUMA
827 	{
828 		.ctl_name	= VM_ZONE_RECLAIM_MODE,
829 		.procname	= "zone_reclaim_mode",
830 		.data		= &zone_reclaim_mode,
831 		.maxlen		= sizeof(zone_reclaim_mode),
832 		.mode		= 0644,
833 		.proc_handler	= &proc_dointvec,
834 		.strategy	= &sysctl_intvec,
835 		.extra1		= &zero,
836 	},
837 	{
838 		.ctl_name	= VM_MIN_UNMAPPED,
839 		.procname	= "min_unmapped_ratio",
840 		.data		= &sysctl_min_unmapped_ratio,
841 		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
842 		.mode		= 0644,
843 		.proc_handler	= &sysctl_min_unmapped_ratio_sysctl_handler,
844 		.strategy	= &sysctl_intvec,
845 		.extra1		= &zero,
846 		.extra2		= &one_hundred,
847 	},
848 	{
849 		.ctl_name	= VM_MIN_SLAB,
850 		.procname	= "min_slab_ratio",
851 		.data		= &sysctl_min_slab_ratio,
852 		.maxlen		= sizeof(sysctl_min_slab_ratio),
853 		.mode		= 0644,
854 		.proc_handler	= &sysctl_min_slab_ratio_sysctl_handler,
855 		.strategy	= &sysctl_intvec,
856 		.extra1		= &zero,
857 		.extra2		= &one_hundred,
858 	},
859 #endif
860 #if defined(CONFIG_X86_32) || \
861    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
862 	{
863 		.ctl_name	= VM_VDSO_ENABLED,
864 		.procname	= "vdso_enabled",
865 		.data		= &vdso_enabled,
866 		.maxlen		= sizeof(vdso_enabled),
867 		.mode		= 0644,
868 		.proc_handler	= &proc_dointvec,
869 		.strategy	= &sysctl_intvec,
870 		.extra1		= &zero,
871 	},
872 #endif
873 	{ .ctl_name = 0 }
874 };
875 
876 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
877 static ctl_table binfmt_misc_table[] = {
878 	{ .ctl_name = 0 }
879 };
880 #endif
881 
882 static ctl_table fs_table[] = {
883 	{
884 		.ctl_name	= FS_NRINODE,
885 		.procname	= "inode-nr",
886 		.data		= &inodes_stat,
887 		.maxlen		= 2*sizeof(int),
888 		.mode		= 0444,
889 		.proc_handler	= &proc_dointvec,
890 	},
891 	{
892 		.ctl_name	= FS_STATINODE,
893 		.procname	= "inode-state",
894 		.data		= &inodes_stat,
895 		.maxlen		= 7*sizeof(int),
896 		.mode		= 0444,
897 		.proc_handler	= &proc_dointvec,
898 	},
899 	{
900 		.ctl_name	= FS_NRFILE,
901 		.procname	= "file-nr",
902 		.data		= &files_stat,
903 		.maxlen		= 3*sizeof(int),
904 		.mode		= 0444,
905 		.proc_handler	= &proc_nr_files,
906 	},
907 	{
908 		.ctl_name	= FS_MAXFILE,
909 		.procname	= "file-max",
910 		.data		= &files_stat.max_files,
911 		.maxlen		= sizeof(int),
912 		.mode		= 0644,
913 		.proc_handler	= &proc_dointvec,
914 	},
915 	{
916 		.ctl_name	= FS_DENTRY,
917 		.procname	= "dentry-state",
918 		.data		= &dentry_stat,
919 		.maxlen		= 6*sizeof(int),
920 		.mode		= 0444,
921 		.proc_handler	= &proc_dointvec,
922 	},
923 	{
924 		.ctl_name	= FS_OVERFLOWUID,
925 		.procname	= "overflowuid",
926 		.data		= &fs_overflowuid,
927 		.maxlen		= sizeof(int),
928 		.mode		= 0644,
929 		.proc_handler	= &proc_dointvec_minmax,
930 		.strategy	= &sysctl_intvec,
931 		.extra1		= &minolduid,
932 		.extra2		= &maxolduid,
933 	},
934 	{
935 		.ctl_name	= FS_OVERFLOWGID,
936 		.procname	= "overflowgid",
937 		.data		= &fs_overflowgid,
938 		.maxlen		= sizeof(int),
939 		.mode		= 0644,
940 		.proc_handler	= &proc_dointvec_minmax,
941 		.strategy	= &sysctl_intvec,
942 		.extra1		= &minolduid,
943 		.extra2		= &maxolduid,
944 	},
945 	{
946 		.ctl_name	= FS_LEASES,
947 		.procname	= "leases-enable",
948 		.data		= &leases_enable,
949 		.maxlen		= sizeof(int),
950 		.mode		= 0644,
951 		.proc_handler	= &proc_dointvec,
952 	},
953 #ifdef CONFIG_DNOTIFY
954 	{
955 		.ctl_name	= FS_DIR_NOTIFY,
956 		.procname	= "dir-notify-enable",
957 		.data		= &dir_notify_enable,
958 		.maxlen		= sizeof(int),
959 		.mode		= 0644,
960 		.proc_handler	= &proc_dointvec,
961 	},
962 #endif
963 #ifdef CONFIG_MMU
964 	{
965 		.ctl_name	= FS_LEASE_TIME,
966 		.procname	= "lease-break-time",
967 		.data		= &lease_break_time,
968 		.maxlen		= sizeof(int),
969 		.mode		= 0644,
970 		.proc_handler	= &proc_dointvec,
971 	},
972 	{
973 		.ctl_name	= FS_AIO_NR,
974 		.procname	= "aio-nr",
975 		.data		= &aio_nr,
976 		.maxlen		= sizeof(aio_nr),
977 		.mode		= 0444,
978 		.proc_handler	= &proc_doulongvec_minmax,
979 	},
980 	{
981 		.ctl_name	= FS_AIO_MAX_NR,
982 		.procname	= "aio-max-nr",
983 		.data		= &aio_max_nr,
984 		.maxlen		= sizeof(aio_max_nr),
985 		.mode		= 0644,
986 		.proc_handler	= &proc_doulongvec_minmax,
987 	},
988 #ifdef CONFIG_INOTIFY_USER
989 	{
990 		.ctl_name	= FS_INOTIFY,
991 		.procname	= "inotify",
992 		.mode		= 0555,
993 		.child		= inotify_table,
994 	},
995 #endif
996 #endif
997 	{
998 		.ctl_name	= KERN_SETUID_DUMPABLE,
999 		.procname	= "suid_dumpable",
1000 		.data		= &suid_dumpable,
1001 		.maxlen		= sizeof(int),
1002 		.mode		= 0644,
1003 		.proc_handler	= &proc_dointvec,
1004 	},
1005 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1006 	{
1007 		.ctl_name	= CTL_UNNUMBERED,
1008 		.procname	= "binfmt_misc",
1009 		.mode		= 0555,
1010 		.child		= binfmt_misc_table,
1011 	},
1012 #endif
1013 	{ .ctl_name = 0 }
1014 };
1015 
1016 static ctl_table debug_table[] = {
1017 	{ .ctl_name = 0 }
1018 };
1019 
1020 static ctl_table dev_table[] = {
1021 	{ .ctl_name = 0 }
1022 };
1023 
1024 static DEFINE_SPINLOCK(sysctl_lock);
1025 
1026 /* called under sysctl_lock */
1027 static int use_table(struct ctl_table_header *p)
1028 {
1029 	if (unlikely(p->unregistering))
1030 		return 0;
1031 	p->used++;
1032 	return 1;
1033 }
1034 
1035 /* called under sysctl_lock */
1036 static void unuse_table(struct ctl_table_header *p)
1037 {
1038 	if (!--p->used)
1039 		if (unlikely(p->unregistering))
1040 			complete(p->unregistering);
1041 }
1042 
1043 /* called under sysctl_lock, will reacquire if has to wait */
1044 static void start_unregistering(struct ctl_table_header *p)
1045 {
1046 	/*
1047 	 * if p->used is 0, nobody will ever touch that entry again;
1048 	 * we'll eliminate all paths to it before dropping sysctl_lock
1049 	 */
1050 	if (unlikely(p->used)) {
1051 		struct completion wait;
1052 		init_completion(&wait);
1053 		p->unregistering = &wait;
1054 		spin_unlock(&sysctl_lock);
1055 		wait_for_completion(&wait);
1056 		spin_lock(&sysctl_lock);
1057 	}
1058 	/*
1059 	 * do not remove from the list until nobody holds it; walking the
1060 	 * list in do_sysctl() relies on that.
1061 	 */
1062 	list_del_init(&p->ctl_entry);
1063 }
1064 
1065 void sysctl_head_finish(struct ctl_table_header *head)
1066 {
1067 	if (!head)
1068 		return;
1069 	spin_lock(&sysctl_lock);
1070 	unuse_table(head);
1071 	spin_unlock(&sysctl_lock);
1072 }
1073 
1074 struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1075 {
1076 	struct ctl_table_header *head;
1077 	struct list_head *tmp;
1078 	spin_lock(&sysctl_lock);
1079 	if (prev) {
1080 		tmp = &prev->ctl_entry;
1081 		unuse_table(prev);
1082 		goto next;
1083 	}
1084 	tmp = &root_table_header.ctl_entry;
1085 	for (;;) {
1086 		head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1087 
1088 		if (!use_table(head))
1089 			goto next;
1090 		spin_unlock(&sysctl_lock);
1091 		return head;
1092 	next:
1093 		tmp = tmp->next;
1094 		if (tmp == &root_table_header.ctl_entry)
1095 			break;
1096 	}
1097 	spin_unlock(&sysctl_lock);
1098 	return NULL;
1099 }
1100 
1101 #ifdef CONFIG_SYSCTL_SYSCALL
1102 int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
1103 	       void __user *newval, size_t newlen)
1104 {
1105 	struct ctl_table_header *head;
1106 	int error = -ENOTDIR;
1107 
1108 	if (nlen <= 0 || nlen >= CTL_MAXNAME)
1109 		return -ENOTDIR;
1110 	if (oldval) {
1111 		int old_len;
1112 		if (!oldlenp || get_user(old_len, oldlenp))
1113 			return -EFAULT;
1114 	}
1115 
1116 	for (head = sysctl_head_next(NULL); head;
1117 			head = sysctl_head_next(head)) {
1118 		error = parse_table(name, nlen, oldval, oldlenp,
1119 					newval, newlen, head->ctl_table);
1120 		if (error != -ENOTDIR) {
1121 			sysctl_head_finish(head);
1122 			break;
1123 		}
1124 	}
1125 	return error;
1126 }
1127 
1128 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
1129 {
1130 	struct __sysctl_args tmp;
1131 	int error;
1132 
1133 	if (copy_from_user(&tmp, args, sizeof(tmp)))
1134 		return -EFAULT;
1135 
1136 	lock_kernel();
1137 	error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
1138 			  tmp.newval, tmp.newlen);
1139 	unlock_kernel();
1140 	return error;
1141 }
1142 #endif /* CONFIG_SYSCTL_SYSCALL */
1143 
1144 /*
1145  * sysctl_perm does NOT grant the superuser all rights automatically, because
1146  * some sysctl variables are readonly even to root.
1147  */
1148 
1149 static int test_perm(int mode, int op)
1150 {
1151 	if (!current->euid)
1152 		mode >>= 6;
1153 	else if (in_egroup_p(0))
1154 		mode >>= 3;
1155 	if ((mode & op & 0007) == op)
1156 		return 0;
1157 	return -EACCES;
1158 }
1159 
1160 int sysctl_perm(ctl_table *table, int op)
1161 {
1162 	int error;
1163 	error = security_sysctl(table, op);
1164 	if (error)
1165 		return error;
1166 	return test_perm(table->mode, op);
1167 }
1168 
1169 #ifdef CONFIG_SYSCTL_SYSCALL
1170 static int parse_table(int __user *name, int nlen,
1171 		       void __user *oldval, size_t __user *oldlenp,
1172 		       void __user *newval, size_t newlen,
1173 		       ctl_table *table)
1174 {
1175 	int n;
1176 repeat:
1177 	if (!nlen)
1178 		return -ENOTDIR;
1179 	if (get_user(n, name))
1180 		return -EFAULT;
1181 	for ( ; table->ctl_name || table->procname; table++) {
1182 		if (!table->ctl_name)
1183 			continue;
1184 		if (n == table->ctl_name) {
1185 			int error;
1186 			if (table->child) {
1187 				if (sysctl_perm(table, 001))
1188 					return -EPERM;
1189 				name++;
1190 				nlen--;
1191 				table = table->child;
1192 				goto repeat;
1193 			}
1194 			error = do_sysctl_strategy(table, name, nlen,
1195 						   oldval, oldlenp,
1196 						   newval, newlen);
1197 			return error;
1198 		}
1199 	}
1200 	return -ENOTDIR;
1201 }
1202 
1203 /* Perform the actual read/write of a sysctl table entry. */
1204 int do_sysctl_strategy (ctl_table *table,
1205 			int __user *name, int nlen,
1206 			void __user *oldval, size_t __user *oldlenp,
1207 			void __user *newval, size_t newlen)
1208 {
1209 	int op = 0, rc;
1210 	size_t len;
1211 
1212 	if (oldval)
1213 		op |= 004;
1214 	if (newval)
1215 		op |= 002;
1216 	if (sysctl_perm(table, op))
1217 		return -EPERM;
1218 
1219 	if (table->strategy) {
1220 		rc = table->strategy(table, name, nlen, oldval, oldlenp,
1221 				     newval, newlen);
1222 		if (rc < 0)
1223 			return rc;
1224 		if (rc > 0)
1225 			return 0;
1226 	}
1227 
1228 	/* If there is no strategy routine, or if the strategy returns
1229 	 * zero, proceed with automatic r/w */
1230 	if (table->data && table->maxlen) {
1231 		if (oldval && oldlenp) {
1232 			if (get_user(len, oldlenp))
1233 				return -EFAULT;
1234 			if (len) {
1235 				if (len > table->maxlen)
1236 					len = table->maxlen;
1237 				if(copy_to_user(oldval, table->data, len))
1238 					return -EFAULT;
1239 				if(put_user(len, oldlenp))
1240 					return -EFAULT;
1241 			}
1242 		}
1243 		if (newval && newlen) {
1244 			len = newlen;
1245 			if (len > table->maxlen)
1246 				len = table->maxlen;
1247 			if(copy_from_user(table->data, newval, len))
1248 				return -EFAULT;
1249 		}
1250 	}
1251 	return 0;
1252 }
1253 #endif /* CONFIG_SYSCTL_SYSCALL */
1254 
1255 static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1256 {
1257 	for (; table->ctl_name || table->procname; table++) {
1258 		table->parent = parent;
1259 		if (table->child)
1260 			sysctl_set_parent(table, table->child);
1261 	}
1262 }
1263 
1264 static __init int sysctl_init(void)
1265 {
1266 	sysctl_set_parent(NULL, root_table);
1267 	return 0;
1268 }
1269 
1270 core_initcall(sysctl_init);
1271 
1272 /**
1273  * register_sysctl_table - register a sysctl hierarchy
1274  * @table: the top-level table structure
1275  *
1276  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1277  * array. An entry with a ctl_name of 0 terminates the table.
1278  *
1279  * The members of the &ctl_table structure are used as follows:
1280  *
1281  * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
1282  *            must be unique within that level of sysctl
1283  *
1284  * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1285  *            enter a sysctl file
1286  *
1287  * data - a pointer to data for use by proc_handler
1288  *
1289  * maxlen - the maximum size in bytes of the data
1290  *
1291  * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1292  *
1293  * child - a pointer to the child sysctl table if this entry is a directory, or
1294  *         %NULL.
1295  *
1296  * proc_handler - the text handler routine (described below)
1297  *
1298  * strategy - the strategy routine (described below)
1299  *
1300  * de - for internal use by the sysctl routines
1301  *
1302  * extra1, extra2 - extra pointers usable by the proc handler routines
1303  *
1304  * Leaf nodes in the sysctl tree will be represented by a single file
1305  * under /proc; non-leaf nodes will be represented by directories.
1306  *
1307  * sysctl(2) can automatically manage read and write requests through
1308  * the sysctl table.  The data and maxlen fields of the ctl_table
1309  * struct enable minimal validation of the values being written to be
1310  * performed, and the mode field allows minimal authentication.
1311  *
1312  * More sophisticated management can be enabled by the provision of a
1313  * strategy routine with the table entry.  This will be called before
1314  * any automatic read or write of the data is performed.
1315  *
1316  * The strategy routine may return
1317  *
1318  * < 0 - Error occurred (error is passed to user process)
1319  *
1320  * 0   - OK - proceed with automatic read or write.
1321  *
1322  * > 0 - OK - read or write has been done by the strategy routine, so
1323  *       return immediately.
1324  *
1325  * There must be a proc_handler routine for any terminal nodes
1326  * mirrored under /proc/sys (non-terminals are handled by a built-in
1327  * directory handler).  Several default handlers are available to
1328  * cover common cases -
1329  *
1330  * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1331  * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1332  * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1333  *
1334  * It is the handler's job to read the input buffer from user memory
1335  * and process it. The handler should return 0 on success.
1336  *
1337  * This routine returns %NULL on a failure to register, and a pointer
1338  * to the table header on success.
1339  */
1340 struct ctl_table_header *register_sysctl_table(ctl_table * table)
1341 {
1342 	struct ctl_table_header *tmp;
1343 	tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
1344 	if (!tmp)
1345 		return NULL;
1346 	tmp->ctl_table = table;
1347 	INIT_LIST_HEAD(&tmp->ctl_entry);
1348 	tmp->used = 0;
1349 	tmp->unregistering = NULL;
1350 	sysctl_set_parent(NULL, table);
1351 	spin_lock(&sysctl_lock);
1352 	list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
1353 	spin_unlock(&sysctl_lock);
1354 	return tmp;
1355 }
1356 
1357 /**
1358  * unregister_sysctl_table - unregister a sysctl table hierarchy
1359  * @header: the header returned from register_sysctl_table
1360  *
1361  * Unregisters the sysctl table and all children. proc entries may not
1362  * actually be removed until they are no longer used by anyone.
1363  */
1364 void unregister_sysctl_table(struct ctl_table_header * header)
1365 {
1366 	might_sleep();
1367 	spin_lock(&sysctl_lock);
1368 	start_unregistering(header);
1369 	spin_unlock(&sysctl_lock);
1370 	kfree(header);
1371 }
1372 
1373 #else /* !CONFIG_SYSCTL */
1374 struct ctl_table_header *register_sysctl_table(ctl_table * table)
1375 {
1376 	return NULL;
1377 }
1378 
1379 void unregister_sysctl_table(struct ctl_table_header * table)
1380 {
1381 }
1382 
1383 #endif /* CONFIG_SYSCTL */
1384 
1385 /*
1386  * /proc/sys support
1387  */
1388 
1389 #ifdef CONFIG_PROC_SYSCTL
1390 
1391 static int _proc_do_string(void* data, int maxlen, int write,
1392 			   struct file *filp, void __user *buffer,
1393 			   size_t *lenp, loff_t *ppos)
1394 {
1395 	size_t len;
1396 	char __user *p;
1397 	char c;
1398 
1399 	if (!data || !maxlen || !*lenp) {
1400 		*lenp = 0;
1401 		return 0;
1402 	}
1403 
1404 	if (write) {
1405 		len = 0;
1406 		p = buffer;
1407 		while (len < *lenp) {
1408 			if (get_user(c, p++))
1409 				return -EFAULT;
1410 			if (c == 0 || c == '\n')
1411 				break;
1412 			len++;
1413 		}
1414 		if (len >= maxlen)
1415 			len = maxlen-1;
1416 		if(copy_from_user(data, buffer, len))
1417 			return -EFAULT;
1418 		((char *) data)[len] = 0;
1419 		*ppos += *lenp;
1420 	} else {
1421 		len = strlen(data);
1422 		if (len > maxlen)
1423 			len = maxlen;
1424 
1425 		if (*ppos > len) {
1426 			*lenp = 0;
1427 			return 0;
1428 		}
1429 
1430 		data += *ppos;
1431 		len  -= *ppos;
1432 
1433 		if (len > *lenp)
1434 			len = *lenp;
1435 		if (len)
1436 			if(copy_to_user(buffer, data, len))
1437 				return -EFAULT;
1438 		if (len < *lenp) {
1439 			if(put_user('\n', ((char __user *) buffer) + len))
1440 				return -EFAULT;
1441 			len++;
1442 		}
1443 		*lenp = len;
1444 		*ppos += len;
1445 	}
1446 	return 0;
1447 }
1448 
1449 /**
1450  * proc_dostring - read a string sysctl
1451  * @table: the sysctl table
1452  * @write: %TRUE if this is a write to the sysctl file
1453  * @filp: the file structure
1454  * @buffer: the user buffer
1455  * @lenp: the size of the user buffer
1456  * @ppos: file position
1457  *
1458  * Reads/writes a string from/to the user buffer. If the kernel
1459  * buffer provided is not large enough to hold the string, the
1460  * string is truncated. The copied string is %NULL-terminated.
1461  * If the string is being read by the user process, it is copied
1462  * and a newline '\n' is added. It is truncated if the buffer is
1463  * not large enough.
1464  *
1465  * Returns 0 on success.
1466  */
1467 int proc_dostring(ctl_table *table, int write, struct file *filp,
1468 		  void __user *buffer, size_t *lenp, loff_t *ppos)
1469 {
1470 	return _proc_do_string(table->data, table->maxlen, write, filp,
1471 			       buffer, lenp, ppos);
1472 }
1473 
1474 
1475 static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
1476 				 int *valp,
1477 				 int write, void *data)
1478 {
1479 	if (write) {
1480 		*valp = *negp ? -*lvalp : *lvalp;
1481 	} else {
1482 		int val = *valp;
1483 		if (val < 0) {
1484 			*negp = -1;
1485 			*lvalp = (unsigned long)-val;
1486 		} else {
1487 			*negp = 0;
1488 			*lvalp = (unsigned long)val;
1489 		}
1490 	}
1491 	return 0;
1492 }
1493 
1494 static int __do_proc_dointvec(void *tbl_data, ctl_table *table,
1495 		  int write, struct file *filp, void __user *buffer,
1496 		  size_t *lenp, loff_t *ppos,
1497 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1498 			      int write, void *data),
1499 		  void *data)
1500 {
1501 #define TMPBUFLEN 21
1502 	int *i, vleft, first=1, neg, val;
1503 	unsigned long lval;
1504 	size_t left, len;
1505 
1506 	char buf[TMPBUFLEN], *p;
1507 	char __user *s = buffer;
1508 
1509 	if (!tbl_data || !table->maxlen || !*lenp ||
1510 	    (*ppos && !write)) {
1511 		*lenp = 0;
1512 		return 0;
1513 	}
1514 
1515 	i = (int *) tbl_data;
1516 	vleft = table->maxlen / sizeof(*i);
1517 	left = *lenp;
1518 
1519 	if (!conv)
1520 		conv = do_proc_dointvec_conv;
1521 
1522 	for (; left && vleft--; i++, first=0) {
1523 		if (write) {
1524 			while (left) {
1525 				char c;
1526 				if (get_user(c, s))
1527 					return -EFAULT;
1528 				if (!isspace(c))
1529 					break;
1530 				left--;
1531 				s++;
1532 			}
1533 			if (!left)
1534 				break;
1535 			neg = 0;
1536 			len = left;
1537 			if (len > sizeof(buf) - 1)
1538 				len = sizeof(buf) - 1;
1539 			if (copy_from_user(buf, s, len))
1540 				return -EFAULT;
1541 			buf[len] = 0;
1542 			p = buf;
1543 			if (*p == '-' && left > 1) {
1544 				neg = 1;
1545 				p++;
1546 			}
1547 			if (*p < '0' || *p > '9')
1548 				break;
1549 
1550 			lval = simple_strtoul(p, &p, 0);
1551 
1552 			len = p-buf;
1553 			if ((len < left) && *p && !isspace(*p))
1554 				break;
1555 			if (neg)
1556 				val = -val;
1557 			s += len;
1558 			left -= len;
1559 
1560 			if (conv(&neg, &lval, i, 1, data))
1561 				break;
1562 		} else {
1563 			p = buf;
1564 			if (!first)
1565 				*p++ = '\t';
1566 
1567 			if (conv(&neg, &lval, i, 0, data))
1568 				break;
1569 
1570 			sprintf(p, "%s%lu", neg ? "-" : "", lval);
1571 			len = strlen(buf);
1572 			if (len > left)
1573 				len = left;
1574 			if(copy_to_user(s, buf, len))
1575 				return -EFAULT;
1576 			left -= len;
1577 			s += len;
1578 		}
1579 	}
1580 
1581 	if (!write && !first && left) {
1582 		if(put_user('\n', s))
1583 			return -EFAULT;
1584 		left--, s++;
1585 	}
1586 	if (write) {
1587 		while (left) {
1588 			char c;
1589 			if (get_user(c, s++))
1590 				return -EFAULT;
1591 			if (!isspace(c))
1592 				break;
1593 			left--;
1594 		}
1595 	}
1596 	if (write && first)
1597 		return -EINVAL;
1598 	*lenp -= left;
1599 	*ppos += *lenp;
1600 	return 0;
1601 #undef TMPBUFLEN
1602 }
1603 
1604 static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
1605 		  void __user *buffer, size_t *lenp, loff_t *ppos,
1606 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1607 			      int write, void *data),
1608 		  void *data)
1609 {
1610 	return __do_proc_dointvec(table->data, table, write, filp,
1611 			buffer, lenp, ppos, conv, data);
1612 }
1613 
1614 /**
1615  * proc_dointvec - read a vector of integers
1616  * @table: the sysctl table
1617  * @write: %TRUE if this is a write to the sysctl file
1618  * @filp: the file structure
1619  * @buffer: the user buffer
1620  * @lenp: the size of the user buffer
1621  * @ppos: file position
1622  *
1623  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1624  * values from/to the user buffer, treated as an ASCII string.
1625  *
1626  * Returns 0 on success.
1627  */
1628 int proc_dointvec(ctl_table *table, int write, struct file *filp,
1629 		     void __user *buffer, size_t *lenp, loff_t *ppos)
1630 {
1631     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1632 		    	    NULL,NULL);
1633 }
1634 
1635 #define OP_SET	0
1636 #define OP_AND	1
1637 #define OP_OR	2
1638 
1639 static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
1640 				      int *valp,
1641 				      int write, void *data)
1642 {
1643 	int op = *(int *)data;
1644 	if (write) {
1645 		int val = *negp ? -*lvalp : *lvalp;
1646 		switch(op) {
1647 		case OP_SET:	*valp = val; break;
1648 		case OP_AND:	*valp &= val; break;
1649 		case OP_OR:	*valp |= val; break;
1650 		}
1651 	} else {
1652 		int val = *valp;
1653 		if (val < 0) {
1654 			*negp = -1;
1655 			*lvalp = (unsigned long)-val;
1656 		} else {
1657 			*negp = 0;
1658 			*lvalp = (unsigned long)val;
1659 		}
1660 	}
1661 	return 0;
1662 }
1663 
1664 /*
1665  *	init may raise the set.
1666  */
1667 
1668 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
1669 			void __user *buffer, size_t *lenp, loff_t *ppos)
1670 {
1671 	int op;
1672 
1673 	if (write && !capable(CAP_SYS_MODULE)) {
1674 		return -EPERM;
1675 	}
1676 
1677 	op = is_init(current) ? OP_SET : OP_AND;
1678 	return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1679 				do_proc_dointvec_bset_conv,&op);
1680 }
1681 
1682 /*
1683  *	Taint values can only be increased
1684  */
1685 static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp,
1686 			       void __user *buffer, size_t *lenp, loff_t *ppos)
1687 {
1688 	int op;
1689 
1690 	if (write && !capable(CAP_SYS_ADMIN))
1691 		return -EPERM;
1692 
1693 	op = OP_OR;
1694 	return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1695 				do_proc_dointvec_bset_conv,&op);
1696 }
1697 
1698 struct do_proc_dointvec_minmax_conv_param {
1699 	int *min;
1700 	int *max;
1701 };
1702 
1703 static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
1704 					int *valp,
1705 					int write, void *data)
1706 {
1707 	struct do_proc_dointvec_minmax_conv_param *param = data;
1708 	if (write) {
1709 		int val = *negp ? -*lvalp : *lvalp;
1710 		if ((param->min && *param->min > val) ||
1711 		    (param->max && *param->max < val))
1712 			return -EINVAL;
1713 		*valp = val;
1714 	} else {
1715 		int val = *valp;
1716 		if (val < 0) {
1717 			*negp = -1;
1718 			*lvalp = (unsigned long)-val;
1719 		} else {
1720 			*negp = 0;
1721 			*lvalp = (unsigned long)val;
1722 		}
1723 	}
1724 	return 0;
1725 }
1726 
1727 /**
1728  * proc_dointvec_minmax - read a vector of integers with min/max values
1729  * @table: the sysctl table
1730  * @write: %TRUE if this is a write to the sysctl file
1731  * @filp: the file structure
1732  * @buffer: the user buffer
1733  * @lenp: the size of the user buffer
1734  * @ppos: file position
1735  *
1736  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1737  * values from/to the user buffer, treated as an ASCII string.
1738  *
1739  * This routine will ensure the values are within the range specified by
1740  * table->extra1 (min) and table->extra2 (max).
1741  *
1742  * Returns 0 on success.
1743  */
1744 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
1745 		  void __user *buffer, size_t *lenp, loff_t *ppos)
1746 {
1747 	struct do_proc_dointvec_minmax_conv_param param = {
1748 		.min = (int *) table->extra1,
1749 		.max = (int *) table->extra2,
1750 	};
1751 	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
1752 				do_proc_dointvec_minmax_conv, &param);
1753 }
1754 
1755 static int __do_proc_doulongvec_minmax(void *data, ctl_table *table, int write,
1756 				     struct file *filp,
1757 				     void __user *buffer,
1758 				     size_t *lenp, loff_t *ppos,
1759 				     unsigned long convmul,
1760 				     unsigned long convdiv)
1761 {
1762 #define TMPBUFLEN 21
1763 	unsigned long *i, *min, *max, val;
1764 	int vleft, first=1, neg;
1765 	size_t len, left;
1766 	char buf[TMPBUFLEN], *p;
1767 	char __user *s = buffer;
1768 
1769 	if (!data || !table->maxlen || !*lenp ||
1770 	    (*ppos && !write)) {
1771 		*lenp = 0;
1772 		return 0;
1773 	}
1774 
1775 	i = (unsigned long *) data;
1776 	min = (unsigned long *) table->extra1;
1777 	max = (unsigned long *) table->extra2;
1778 	vleft = table->maxlen / sizeof(unsigned long);
1779 	left = *lenp;
1780 
1781 	for (; left && vleft--; i++, min++, max++, first=0) {
1782 		if (write) {
1783 			while (left) {
1784 				char c;
1785 				if (get_user(c, s))
1786 					return -EFAULT;
1787 				if (!isspace(c))
1788 					break;
1789 				left--;
1790 				s++;
1791 			}
1792 			if (!left)
1793 				break;
1794 			neg = 0;
1795 			len = left;
1796 			if (len > TMPBUFLEN-1)
1797 				len = TMPBUFLEN-1;
1798 			if (copy_from_user(buf, s, len))
1799 				return -EFAULT;
1800 			buf[len] = 0;
1801 			p = buf;
1802 			if (*p == '-' && left > 1) {
1803 				neg = 1;
1804 				p++;
1805 			}
1806 			if (*p < '0' || *p > '9')
1807 				break;
1808 			val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
1809 			len = p-buf;
1810 			if ((len < left) && *p && !isspace(*p))
1811 				break;
1812 			if (neg)
1813 				val = -val;
1814 			s += len;
1815 			left -= len;
1816 
1817 			if(neg)
1818 				continue;
1819 			if ((min && val < *min) || (max && val > *max))
1820 				continue;
1821 			*i = val;
1822 		} else {
1823 			p = buf;
1824 			if (!first)
1825 				*p++ = '\t';
1826 			sprintf(p, "%lu", convdiv * (*i) / convmul);
1827 			len = strlen(buf);
1828 			if (len > left)
1829 				len = left;
1830 			if(copy_to_user(s, buf, len))
1831 				return -EFAULT;
1832 			left -= len;
1833 			s += len;
1834 		}
1835 	}
1836 
1837 	if (!write && !first && left) {
1838 		if(put_user('\n', s))
1839 			return -EFAULT;
1840 		left--, s++;
1841 	}
1842 	if (write) {
1843 		while (left) {
1844 			char c;
1845 			if (get_user(c, s++))
1846 				return -EFAULT;
1847 			if (!isspace(c))
1848 				break;
1849 			left--;
1850 		}
1851 	}
1852 	if (write && first)
1853 		return -EINVAL;
1854 	*lenp -= left;
1855 	*ppos += *lenp;
1856 	return 0;
1857 #undef TMPBUFLEN
1858 }
1859 
1860 static int do_proc_doulongvec_minmax(ctl_table *table, int write,
1861 				     struct file *filp,
1862 				     void __user *buffer,
1863 				     size_t *lenp, loff_t *ppos,
1864 				     unsigned long convmul,
1865 				     unsigned long convdiv)
1866 {
1867 	return __do_proc_doulongvec_minmax(table->data, table, write,
1868 			filp, buffer, lenp, ppos, convmul, convdiv);
1869 }
1870 
1871 /**
1872  * proc_doulongvec_minmax - read a vector of long integers with min/max values
1873  * @table: the sysctl table
1874  * @write: %TRUE if this is a write to the sysctl file
1875  * @filp: the file structure
1876  * @buffer: the user buffer
1877  * @lenp: the size of the user buffer
1878  * @ppos: file position
1879  *
1880  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1881  * values from/to the user buffer, treated as an ASCII string.
1882  *
1883  * This routine will ensure the values are within the range specified by
1884  * table->extra1 (min) and table->extra2 (max).
1885  *
1886  * Returns 0 on success.
1887  */
1888 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
1889 			   void __user *buffer, size_t *lenp, loff_t *ppos)
1890 {
1891     return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
1892 }
1893 
1894 /**
1895  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
1896  * @table: the sysctl table
1897  * @write: %TRUE if this is a write to the sysctl file
1898  * @filp: the file structure
1899  * @buffer: the user buffer
1900  * @lenp: the size of the user buffer
1901  * @ppos: file position
1902  *
1903  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1904  * values from/to the user buffer, treated as an ASCII string. The values
1905  * are treated as milliseconds, and converted to jiffies when they are stored.
1906  *
1907  * This routine will ensure the values are within the range specified by
1908  * table->extra1 (min) and table->extra2 (max).
1909  *
1910  * Returns 0 on success.
1911  */
1912 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
1913 				      struct file *filp,
1914 				      void __user *buffer,
1915 				      size_t *lenp, loff_t *ppos)
1916 {
1917     return do_proc_doulongvec_minmax(table, write, filp, buffer,
1918 				     lenp, ppos, HZ, 1000l);
1919 }
1920 
1921 
1922 static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
1923 					 int *valp,
1924 					 int write, void *data)
1925 {
1926 	if (write) {
1927 		if (*lvalp > LONG_MAX / HZ)
1928 			return 1;
1929 		*valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
1930 	} else {
1931 		int val = *valp;
1932 		unsigned long lval;
1933 		if (val < 0) {
1934 			*negp = -1;
1935 			lval = (unsigned long)-val;
1936 		} else {
1937 			*negp = 0;
1938 			lval = (unsigned long)val;
1939 		}
1940 		*lvalp = lval / HZ;
1941 	}
1942 	return 0;
1943 }
1944 
1945 static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
1946 						int *valp,
1947 						int write, void *data)
1948 {
1949 	if (write) {
1950 		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
1951 			return 1;
1952 		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
1953 	} else {
1954 		int val = *valp;
1955 		unsigned long lval;
1956 		if (val < 0) {
1957 			*negp = -1;
1958 			lval = (unsigned long)-val;
1959 		} else {
1960 			*negp = 0;
1961 			lval = (unsigned long)val;
1962 		}
1963 		*lvalp = jiffies_to_clock_t(lval);
1964 	}
1965 	return 0;
1966 }
1967 
1968 static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
1969 					    int *valp,
1970 					    int write, void *data)
1971 {
1972 	if (write) {
1973 		*valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
1974 	} else {
1975 		int val = *valp;
1976 		unsigned long lval;
1977 		if (val < 0) {
1978 			*negp = -1;
1979 			lval = (unsigned long)-val;
1980 		} else {
1981 			*negp = 0;
1982 			lval = (unsigned long)val;
1983 		}
1984 		*lvalp = jiffies_to_msecs(lval);
1985 	}
1986 	return 0;
1987 }
1988 
1989 /**
1990  * proc_dointvec_jiffies - read a vector of integers as seconds
1991  * @table: the sysctl table
1992  * @write: %TRUE if this is a write to the sysctl file
1993  * @filp: the file structure
1994  * @buffer: the user buffer
1995  * @lenp: the size of the user buffer
1996  * @ppos: file position
1997  *
1998  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1999  * values from/to the user buffer, treated as an ASCII string.
2000  * The values read are assumed to be in seconds, and are converted into
2001  * jiffies.
2002  *
2003  * Returns 0 on success.
2004  */
2005 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2006 			  void __user *buffer, size_t *lenp, loff_t *ppos)
2007 {
2008     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2009 		    	    do_proc_dointvec_jiffies_conv,NULL);
2010 }
2011 
2012 /**
2013  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2014  * @table: the sysctl table
2015  * @write: %TRUE if this is a write to the sysctl file
2016  * @filp: the file structure
2017  * @buffer: the user buffer
2018  * @lenp: the size of the user buffer
2019  * @ppos: pointer to the file position
2020  *
2021  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2022  * values from/to the user buffer, treated as an ASCII string.
2023  * The values read are assumed to be in 1/USER_HZ seconds, and
2024  * are converted into jiffies.
2025  *
2026  * Returns 0 on success.
2027  */
2028 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2029 				 void __user *buffer, size_t *lenp, loff_t *ppos)
2030 {
2031     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2032 		    	    do_proc_dointvec_userhz_jiffies_conv,NULL);
2033 }
2034 
2035 /**
2036  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2037  * @table: the sysctl table
2038  * @write: %TRUE if this is a write to the sysctl file
2039  * @filp: the file structure
2040  * @buffer: the user buffer
2041  * @lenp: the size of the user buffer
2042  * @ppos: file position
2043  * @ppos: the current position in the file
2044  *
2045  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2046  * values from/to the user buffer, treated as an ASCII string.
2047  * The values read are assumed to be in 1/1000 seconds, and
2048  * are converted into jiffies.
2049  *
2050  * Returns 0 on success.
2051  */
2052 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2053 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2054 {
2055 	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2056 				do_proc_dointvec_ms_jiffies_conv, NULL);
2057 }
2058 
2059 static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
2060 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2061 {
2062 	struct pid *new_pid;
2063 	pid_t tmp;
2064 	int r;
2065 
2066 	tmp = pid_nr(cad_pid);
2067 
2068 	r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
2069 			       lenp, ppos, NULL, NULL);
2070 	if (r || !write)
2071 		return r;
2072 
2073 	new_pid = find_get_pid(tmp);
2074 	if (!new_pid)
2075 		return -ESRCH;
2076 
2077 	put_pid(xchg(&cad_pid, new_pid));
2078 	return 0;
2079 }
2080 
2081 #else /* CONFIG_PROC_FS */
2082 
2083 int proc_dostring(ctl_table *table, int write, struct file *filp,
2084 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2085 {
2086 	return -ENOSYS;
2087 }
2088 
2089 int proc_dointvec(ctl_table *table, int write, struct file *filp,
2090 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2091 {
2092 	return -ENOSYS;
2093 }
2094 
2095 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
2096 			void __user *buffer, size_t *lenp, loff_t *ppos)
2097 {
2098 	return -ENOSYS;
2099 }
2100 
2101 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
2102 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2103 {
2104 	return -ENOSYS;
2105 }
2106 
2107 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2108 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2109 {
2110 	return -ENOSYS;
2111 }
2112 
2113 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2114 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2115 {
2116 	return -ENOSYS;
2117 }
2118 
2119 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2120 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2121 {
2122 	return -ENOSYS;
2123 }
2124 
2125 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2126 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2127 {
2128 	return -ENOSYS;
2129 }
2130 
2131 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2132 				      struct file *filp,
2133 				      void __user *buffer,
2134 				      size_t *lenp, loff_t *ppos)
2135 {
2136     return -ENOSYS;
2137 }
2138 
2139 
2140 #endif /* CONFIG_PROC_FS */
2141 
2142 
2143 #ifdef CONFIG_SYSCTL_SYSCALL
2144 /*
2145  * General sysctl support routines
2146  */
2147 
2148 /* The generic string strategy routine: */
2149 int sysctl_string(ctl_table *table, int __user *name, int nlen,
2150 		  void __user *oldval, size_t __user *oldlenp,
2151 		  void __user *newval, size_t newlen)
2152 {
2153 	if (!table->data || !table->maxlen)
2154 		return -ENOTDIR;
2155 
2156 	if (oldval && oldlenp) {
2157 		size_t bufsize;
2158 		if (get_user(bufsize, oldlenp))
2159 			return -EFAULT;
2160 		if (bufsize) {
2161 			size_t len = strlen(table->data), copied;
2162 
2163 			/* This shouldn't trigger for a well-formed sysctl */
2164 			if (len > table->maxlen)
2165 				len = table->maxlen;
2166 
2167 			/* Copy up to a max of bufsize-1 bytes of the string */
2168 			copied = (len >= bufsize) ? bufsize - 1 : len;
2169 
2170 			if (copy_to_user(oldval, table->data, copied) ||
2171 			    put_user(0, (char __user *)(oldval + copied)))
2172 				return -EFAULT;
2173 			if (put_user(len, oldlenp))
2174 				return -EFAULT;
2175 		}
2176 	}
2177 	if (newval && newlen) {
2178 		size_t len = newlen;
2179 		if (len > table->maxlen)
2180 			len = table->maxlen;
2181 		if(copy_from_user(table->data, newval, len))
2182 			return -EFAULT;
2183 		if (len == table->maxlen)
2184 			len--;
2185 		((char *) table->data)[len] = 0;
2186 	}
2187 	return 1;
2188 }
2189 
2190 /*
2191  * This function makes sure that all of the integers in the vector
2192  * are between the minimum and maximum values given in the arrays
2193  * table->extra1 and table->extra2, respectively.
2194  */
2195 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2196 		void __user *oldval, size_t __user *oldlenp,
2197 		void __user *newval, size_t newlen)
2198 {
2199 
2200 	if (newval && newlen) {
2201 		int __user *vec = (int __user *) newval;
2202 		int *min = (int *) table->extra1;
2203 		int *max = (int *) table->extra2;
2204 		size_t length;
2205 		int i;
2206 
2207 		if (newlen % sizeof(int) != 0)
2208 			return -EINVAL;
2209 
2210 		if (!table->extra1 && !table->extra2)
2211 			return 0;
2212 
2213 		if (newlen > table->maxlen)
2214 			newlen = table->maxlen;
2215 		length = newlen / sizeof(int);
2216 
2217 		for (i = 0; i < length; i++) {
2218 			int value;
2219 			if (get_user(value, vec + i))
2220 				return -EFAULT;
2221 			if (min && value < min[i])
2222 				return -EINVAL;
2223 			if (max && value > max[i])
2224 				return -EINVAL;
2225 		}
2226 	}
2227 	return 0;
2228 }
2229 
2230 /* Strategy function to convert jiffies to seconds */
2231 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2232 		void __user *oldval, size_t __user *oldlenp,
2233 		void __user *newval, size_t newlen)
2234 {
2235 	if (oldval && oldlenp) {
2236 		size_t olen;
2237 
2238 		if (get_user(olen, oldlenp))
2239 			return -EFAULT;
2240 		if (olen) {
2241 			int val;
2242 
2243 			if (olen < sizeof(int))
2244 				return -EINVAL;
2245 
2246 			val = *(int *)(table->data) / HZ;
2247 			if (put_user(val, (int __user *)oldval))
2248 				return -EFAULT;
2249 			if (put_user(sizeof(int), oldlenp))
2250 				return -EFAULT;
2251 		}
2252 	}
2253 	if (newval && newlen) {
2254 		int new;
2255 		if (newlen != sizeof(int))
2256 			return -EINVAL;
2257 		if (get_user(new, (int __user *)newval))
2258 			return -EFAULT;
2259 		*(int *)(table->data) = new*HZ;
2260 	}
2261 	return 1;
2262 }
2263 
2264 /* Strategy function to convert jiffies to seconds */
2265 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2266 		void __user *oldval, size_t __user *oldlenp,
2267 		void __user *newval, size_t newlen)
2268 {
2269 	if (oldval && oldlenp) {
2270 		size_t olen;
2271 
2272 		if (get_user(olen, oldlenp))
2273 			return -EFAULT;
2274 		if (olen) {
2275 			int val;
2276 
2277 			if (olen < sizeof(int))
2278 				return -EINVAL;
2279 
2280 			val = jiffies_to_msecs(*(int *)(table->data));
2281 			if (put_user(val, (int __user *)oldval))
2282 				return -EFAULT;
2283 			if (put_user(sizeof(int), oldlenp))
2284 				return -EFAULT;
2285 		}
2286 	}
2287 	if (newval && newlen) {
2288 		int new;
2289 		if (newlen != sizeof(int))
2290 			return -EINVAL;
2291 		if (get_user(new, (int __user *)newval))
2292 			return -EFAULT;
2293 		*(int *)(table->data) = msecs_to_jiffies(new);
2294 	}
2295 	return 1;
2296 }
2297 
2298 
2299 
2300 #else /* CONFIG_SYSCTL_SYSCALL */
2301 
2302 
2303 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
2304 {
2305 	static int msg_count;
2306 	struct __sysctl_args tmp;
2307 	int name[CTL_MAXNAME];
2308 	int i;
2309 
2310 	/* Read in the sysctl name for better debug message logging */
2311 	if (copy_from_user(&tmp, args, sizeof(tmp)))
2312 		return -EFAULT;
2313 	if (tmp.nlen <= 0 || tmp.nlen >= CTL_MAXNAME)
2314 		return -ENOTDIR;
2315 	for (i = 0; i < tmp.nlen; i++)
2316 		if (get_user(name[i], tmp.name + i))
2317 			return -EFAULT;
2318 
2319 	/* Ignore accesses to kernel.version */
2320 	if ((tmp.nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION))
2321 		goto out;
2322 
2323 	if (msg_count < 5) {
2324 		msg_count++;
2325 		printk(KERN_INFO
2326 			"warning: process `%s' used the removed sysctl "
2327 			"system call with ", current->comm);
2328 		for (i = 0; i < tmp.nlen; i++)
2329 			printk("%d.", name[i]);
2330 		printk("\n");
2331 	}
2332 out:
2333 	return -ENOSYS;
2334 }
2335 
2336 int sysctl_string(ctl_table *table, int __user *name, int nlen,
2337 		  void __user *oldval, size_t __user *oldlenp,
2338 		  void __user *newval, size_t newlen)
2339 {
2340 	return -ENOSYS;
2341 }
2342 
2343 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2344 		void __user *oldval, size_t __user *oldlenp,
2345 		void __user *newval, size_t newlen)
2346 {
2347 	return -ENOSYS;
2348 }
2349 
2350 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2351 		void __user *oldval, size_t __user *oldlenp,
2352 		void __user *newval, size_t newlen)
2353 {
2354 	return -ENOSYS;
2355 }
2356 
2357 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2358 		void __user *oldval, size_t __user *oldlenp,
2359 		void __user *newval, size_t newlen)
2360 {
2361 	return -ENOSYS;
2362 }
2363 
2364 #endif /* CONFIG_SYSCTL_SYSCALL */
2365 
2366 /*
2367  * No sense putting this after each symbol definition, twice,
2368  * exception granted :-)
2369  */
2370 EXPORT_SYMBOL(proc_dointvec);
2371 EXPORT_SYMBOL(proc_dointvec_jiffies);
2372 EXPORT_SYMBOL(proc_dointvec_minmax);
2373 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2374 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2375 EXPORT_SYMBOL(proc_dostring);
2376 EXPORT_SYMBOL(proc_doulongvec_minmax);
2377 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2378 EXPORT_SYMBOL(register_sysctl_table);
2379 EXPORT_SYMBOL(sysctl_intvec);
2380 EXPORT_SYMBOL(sysctl_jiffies);
2381 EXPORT_SYMBOL(sysctl_ms_jiffies);
2382 EXPORT_SYMBOL(sysctl_string);
2383 EXPORT_SYMBOL(unregister_sysctl_table);
2384