xref: /openbmc/linux/kernel/sysctl.c (revision ebf8889bd1fe3615991ff4494635d237280652a2)
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20 
21 #include <linux/module.h>
22 #include <linux/mm.h>
23 #include <linux/swap.h>
24 #include <linux/slab.h>
25 #include <linux/sysctl.h>
26 #include <linux/proc_fs.h>
27 #include <linux/capability.h>
28 #include <linux/ctype.h>
29 #include <linux/utsname.h>
30 #include <linux/smp_lock.h>
31 #include <linux/fs.h>
32 #include <linux/init.h>
33 #include <linux/kernel.h>
34 #include <linux/kobject.h>
35 #include <linux/net.h>
36 #include <linux/sysrq.h>
37 #include <linux/highuid.h>
38 #include <linux/writeback.h>
39 #include <linux/hugetlb.h>
40 #include <linux/security.h>
41 #include <linux/initrd.h>
42 #include <linux/times.h>
43 #include <linux/limits.h>
44 #include <linux/dcache.h>
45 #include <linux/syscalls.h>
46 #include <linux/nfs_fs.h>
47 #include <linux/acpi.h>
48 #include <linux/reboot.h>
49 
50 #include <asm/uaccess.h>
51 #include <asm/processor.h>
52 
53 #ifdef CONFIG_X86
54 #include <asm/nmi.h>
55 #include <asm/stacktrace.h>
56 #endif
57 
58 #if defined(CONFIG_SYSCTL)
59 
60 /* External variables not in a header file. */
61 extern int C_A_D;
62 extern int print_fatal_signals;
63 extern int sysctl_overcommit_memory;
64 extern int sysctl_overcommit_ratio;
65 extern int sysctl_panic_on_oom;
66 extern int max_threads;
67 extern int core_uses_pid;
68 extern int suid_dumpable;
69 extern char core_pattern[];
70 extern int pid_max;
71 extern int min_free_kbytes;
72 extern int printk_ratelimit_jiffies;
73 extern int printk_ratelimit_burst;
74 extern int pid_max_min, pid_max_max;
75 extern int sysctl_drop_caches;
76 extern int percpu_pagelist_fraction;
77 extern int compat_log;
78 extern int maps_protect;
79 extern int sysctl_stat_interval;
80 extern int audit_argv_kb;
81 
82 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
83 static int maxolduid = 65535;
84 static int minolduid;
85 static int min_percpu_pagelist_fract = 8;
86 
87 static int ngroups_max = NGROUPS_MAX;
88 
89 #ifdef CONFIG_KMOD
90 extern char modprobe_path[];
91 #endif
92 #ifdef CONFIG_CHR_DEV_SG
93 extern int sg_big_buff;
94 #endif
95 
96 #ifdef __sparc__
97 extern char reboot_command [];
98 extern int stop_a_enabled;
99 extern int scons_pwroff;
100 #endif
101 
102 #ifdef __hppa__
103 extern int pwrsw_enabled;
104 extern int unaligned_enabled;
105 #endif
106 
107 #ifdef CONFIG_S390
108 #ifdef CONFIG_MATHEMU
109 extern int sysctl_ieee_emulation_warnings;
110 #endif
111 extern int sysctl_userprocess_debug;
112 extern int spin_retry;
113 #endif
114 
115 extern int sysctl_hz_timer;
116 
117 #ifdef CONFIG_BSD_PROCESS_ACCT
118 extern int acct_parm[];
119 #endif
120 
121 #ifdef CONFIG_IA64
122 extern int no_unaligned_warning;
123 #endif
124 
125 #ifdef CONFIG_RT_MUTEXES
126 extern int max_lock_depth;
127 #endif
128 
129 #ifdef CONFIG_SYSCTL_SYSCALL
130 static int parse_table(int __user *, int, void __user *, size_t __user *,
131 		void __user *, size_t, ctl_table *);
132 #endif
133 
134 
135 #ifdef CONFIG_PROC_SYSCTL
136 static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
137 		  void __user *buffer, size_t *lenp, loff_t *ppos);
138 static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp,
139 			       void __user *buffer, size_t *lenp, loff_t *ppos);
140 #endif
141 
142 static ctl_table root_table[];
143 static struct ctl_table_header root_table_header =
144 	{ root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) };
145 
146 static ctl_table kern_table[];
147 static ctl_table vm_table[];
148 static ctl_table fs_table[];
149 static ctl_table debug_table[];
150 static ctl_table dev_table[];
151 extern ctl_table random_table[];
152 #ifdef CONFIG_UNIX98_PTYS
153 extern ctl_table pty_table[];
154 #endif
155 #ifdef CONFIG_INOTIFY_USER
156 extern ctl_table inotify_table[];
157 #endif
158 
159 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
160 int sysctl_legacy_va_layout;
161 #endif
162 
163 extern int prove_locking;
164 extern int lock_stat;
165 
166 /* The default sysctl tables: */
167 
168 static ctl_table root_table[] = {
169 	{
170 		.ctl_name	= CTL_KERN,
171 		.procname	= "kernel",
172 		.mode		= 0555,
173 		.child		= kern_table,
174 	},
175 	{
176 		.ctl_name	= CTL_VM,
177 		.procname	= "vm",
178 		.mode		= 0555,
179 		.child		= vm_table,
180 	},
181 #ifdef CONFIG_NET
182 	{
183 		.ctl_name	= CTL_NET,
184 		.procname	= "net",
185 		.mode		= 0555,
186 		.child		= net_table,
187 	},
188 #endif
189 	{
190 		.ctl_name	= CTL_FS,
191 		.procname	= "fs",
192 		.mode		= 0555,
193 		.child		= fs_table,
194 	},
195 	{
196 		.ctl_name	= CTL_DEBUG,
197 		.procname	= "debug",
198 		.mode		= 0555,
199 		.child		= debug_table,
200 	},
201 	{
202 		.ctl_name	= CTL_DEV,
203 		.procname	= "dev",
204 		.mode		= 0555,
205 		.child		= dev_table,
206 	},
207 /*
208  * NOTE: do not add new entries to this table unless you have read
209  * Documentation/sysctl/ctl_unnumbered.txt
210  */
211 	{ .ctl_name = 0 }
212 };
213 
214 #ifdef CONFIG_SCHED_DEBUG
215 static unsigned long min_sched_granularity_ns = 100000;		/* 100 usecs */
216 static unsigned long max_sched_granularity_ns = 1000000000;	/* 1 second */
217 static unsigned long min_wakeup_granularity_ns;			/* 0 usecs */
218 static unsigned long max_wakeup_granularity_ns = 1000000000;	/* 1 second */
219 #endif
220 
221 static ctl_table kern_table[] = {
222 #ifdef CONFIG_SCHED_DEBUG
223 	{
224 		.ctl_name	= CTL_UNNUMBERED,
225 		.procname	= "sched_min_granularity_ns",
226 		.data		= &sysctl_sched_min_granularity,
227 		.maxlen		= sizeof(unsigned int),
228 		.mode		= 0644,
229 		.proc_handler	= &proc_dointvec_minmax,
230 		.strategy	= &sysctl_intvec,
231 		.extra1		= &min_sched_granularity_ns,
232 		.extra2		= &max_sched_granularity_ns,
233 	},
234 	{
235 		.ctl_name	= CTL_UNNUMBERED,
236 		.procname	= "sched_latency_ns",
237 		.data		= &sysctl_sched_latency,
238 		.maxlen		= sizeof(unsigned int),
239 		.mode		= 0644,
240 		.proc_handler	= &proc_dointvec_minmax,
241 		.strategy	= &sysctl_intvec,
242 		.extra1		= &min_sched_granularity_ns,
243 		.extra2		= &max_sched_granularity_ns,
244 	},
245 	{
246 		.ctl_name	= CTL_UNNUMBERED,
247 		.procname	= "sched_wakeup_granularity_ns",
248 		.data		= &sysctl_sched_wakeup_granularity,
249 		.maxlen		= sizeof(unsigned int),
250 		.mode		= 0644,
251 		.proc_handler	= &proc_dointvec_minmax,
252 		.strategy	= &sysctl_intvec,
253 		.extra1		= &min_wakeup_granularity_ns,
254 		.extra2		= &max_wakeup_granularity_ns,
255 	},
256 	{
257 		.ctl_name	= CTL_UNNUMBERED,
258 		.procname	= "sched_batch_wakeup_granularity_ns",
259 		.data		= &sysctl_sched_batch_wakeup_granularity,
260 		.maxlen		= sizeof(unsigned int),
261 		.mode		= 0644,
262 		.proc_handler	= &proc_dointvec_minmax,
263 		.strategy	= &sysctl_intvec,
264 		.extra1		= &min_wakeup_granularity_ns,
265 		.extra2		= &max_wakeup_granularity_ns,
266 	},
267 	{
268 		.ctl_name	= CTL_UNNUMBERED,
269 		.procname	= "sched_stat_granularity_ns",
270 		.data		= &sysctl_sched_stat_granularity,
271 		.maxlen		= sizeof(unsigned int),
272 		.mode		= 0644,
273 		.proc_handler	= &proc_dointvec_minmax,
274 		.strategy	= &sysctl_intvec,
275 		.extra1		= &min_wakeup_granularity_ns,
276 		.extra2		= &max_wakeup_granularity_ns,
277 	},
278 	{
279 		.ctl_name	= CTL_UNNUMBERED,
280 		.procname	= "sched_runtime_limit_ns",
281 		.data		= &sysctl_sched_runtime_limit,
282 		.maxlen		= sizeof(unsigned int),
283 		.mode		= 0644,
284 		.proc_handler	= &proc_dointvec_minmax,
285 		.strategy	= &sysctl_intvec,
286 		.extra1		= &min_sched_granularity_ns,
287 		.extra2		= &max_sched_granularity_ns,
288 	},
289 	{
290 		.ctl_name	= CTL_UNNUMBERED,
291 		.procname	= "sched_child_runs_first",
292 		.data		= &sysctl_sched_child_runs_first,
293 		.maxlen		= sizeof(unsigned int),
294 		.mode		= 0644,
295 		.proc_handler	= &proc_dointvec,
296 	},
297 	{
298 		.ctl_name	= CTL_UNNUMBERED,
299 		.procname	= "sched_features",
300 		.data		= &sysctl_sched_features,
301 		.maxlen		= sizeof(unsigned int),
302 		.mode		= 0644,
303 		.proc_handler	= &proc_dointvec,
304 	},
305 #endif
306 	{
307 		.ctl_name	= CTL_UNNUMBERED,
308 		.procname	= "sched_compat_yield",
309 		.data		= &sysctl_sched_compat_yield,
310 		.maxlen		= sizeof(unsigned int),
311 		.mode		= 0644,
312 		.proc_handler	= &proc_dointvec,
313 	},
314 #ifdef CONFIG_PROVE_LOCKING
315 	{
316 		.ctl_name	= CTL_UNNUMBERED,
317 		.procname	= "prove_locking",
318 		.data		= &prove_locking,
319 		.maxlen		= sizeof(int),
320 		.mode		= 0644,
321 		.proc_handler	= &proc_dointvec,
322 	},
323 #endif
324 #ifdef CONFIG_LOCK_STAT
325 	{
326 		.ctl_name	= CTL_UNNUMBERED,
327 		.procname	= "lock_stat",
328 		.data		= &lock_stat,
329 		.maxlen		= sizeof(int),
330 		.mode		= 0644,
331 		.proc_handler	= &proc_dointvec,
332 	},
333 #endif
334 	{
335 		.ctl_name	= KERN_PANIC,
336 		.procname	= "panic",
337 		.data		= &panic_timeout,
338 		.maxlen		= sizeof(int),
339 		.mode		= 0644,
340 		.proc_handler	= &proc_dointvec,
341 	},
342 	{
343 		.ctl_name	= KERN_CORE_USES_PID,
344 		.procname	= "core_uses_pid",
345 		.data		= &core_uses_pid,
346 		.maxlen		= sizeof(int),
347 		.mode		= 0644,
348 		.proc_handler	= &proc_dointvec,
349 	},
350 #ifdef CONFIG_AUDITSYSCALL
351 	{
352 		.ctl_name	= CTL_UNNUMBERED,
353 		.procname	= "audit_argv_kb",
354 		.data		= &audit_argv_kb,
355 		.maxlen		= sizeof(int),
356 		.mode		= 0644,
357 		.proc_handler	= &proc_dointvec,
358 	},
359 #endif
360 	{
361 		.ctl_name	= KERN_CORE_PATTERN,
362 		.procname	= "core_pattern",
363 		.data		= core_pattern,
364 		.maxlen		= CORENAME_MAX_SIZE,
365 		.mode		= 0644,
366 		.proc_handler	= &proc_dostring,
367 		.strategy	= &sysctl_string,
368 	},
369 #ifdef CONFIG_PROC_SYSCTL
370 	{
371 		.ctl_name	= KERN_TAINTED,
372 		.procname	= "tainted",
373 		.data		= &tainted,
374 		.maxlen		= sizeof(int),
375 		.mode		= 0644,
376 		.proc_handler	= &proc_dointvec_taint,
377 	},
378 #endif
379 	{
380 		.ctl_name	= KERN_CAP_BSET,
381 		.procname	= "cap-bound",
382 		.data		= &cap_bset,
383 		.maxlen		= sizeof(kernel_cap_t),
384 		.mode		= 0600,
385 		.proc_handler	= &proc_dointvec_bset,
386 	},
387 #ifdef CONFIG_BLK_DEV_INITRD
388 	{
389 		.ctl_name	= KERN_REALROOTDEV,
390 		.procname	= "real-root-dev",
391 		.data		= &real_root_dev,
392 		.maxlen		= sizeof(int),
393 		.mode		= 0644,
394 		.proc_handler	= &proc_dointvec,
395 	},
396 #endif
397 	{
398 		.ctl_name	= CTL_UNNUMBERED,
399 		.procname	= "print-fatal-signals",
400 		.data		= &print_fatal_signals,
401 		.maxlen		= sizeof(int),
402 		.mode		= 0644,
403 		.proc_handler	= &proc_dointvec,
404 	},
405 #ifdef __sparc__
406 	{
407 		.ctl_name	= KERN_SPARC_REBOOT,
408 		.procname	= "reboot-cmd",
409 		.data		= reboot_command,
410 		.maxlen		= 256,
411 		.mode		= 0644,
412 		.proc_handler	= &proc_dostring,
413 		.strategy	= &sysctl_string,
414 	},
415 	{
416 		.ctl_name	= KERN_SPARC_STOP_A,
417 		.procname	= "stop-a",
418 		.data		= &stop_a_enabled,
419 		.maxlen		= sizeof (int),
420 		.mode		= 0644,
421 		.proc_handler	= &proc_dointvec,
422 	},
423 	{
424 		.ctl_name	= KERN_SPARC_SCONS_PWROFF,
425 		.procname	= "scons-poweroff",
426 		.data		= &scons_pwroff,
427 		.maxlen		= sizeof (int),
428 		.mode		= 0644,
429 		.proc_handler	= &proc_dointvec,
430 	},
431 #endif
432 #ifdef __hppa__
433 	{
434 		.ctl_name	= KERN_HPPA_PWRSW,
435 		.procname	= "soft-power",
436 		.data		= &pwrsw_enabled,
437 		.maxlen		= sizeof (int),
438 	 	.mode		= 0644,
439 		.proc_handler	= &proc_dointvec,
440 	},
441 	{
442 		.ctl_name	= KERN_HPPA_UNALIGNED,
443 		.procname	= "unaligned-trap",
444 		.data		= &unaligned_enabled,
445 		.maxlen		= sizeof (int),
446 		.mode		= 0644,
447 		.proc_handler	= &proc_dointvec,
448 	},
449 #endif
450 	{
451 		.ctl_name	= KERN_CTLALTDEL,
452 		.procname	= "ctrl-alt-del",
453 		.data		= &C_A_D,
454 		.maxlen		= sizeof(int),
455 		.mode		= 0644,
456 		.proc_handler	= &proc_dointvec,
457 	},
458 	{
459 		.ctl_name	= KERN_PRINTK,
460 		.procname	= "printk",
461 		.data		= &console_loglevel,
462 		.maxlen		= 4*sizeof(int),
463 		.mode		= 0644,
464 		.proc_handler	= &proc_dointvec,
465 	},
466 #ifdef CONFIG_KMOD
467 	{
468 		.ctl_name	= KERN_MODPROBE,
469 		.procname	= "modprobe",
470 		.data		= &modprobe_path,
471 		.maxlen		= KMOD_PATH_LEN,
472 		.mode		= 0644,
473 		.proc_handler	= &proc_dostring,
474 		.strategy	= &sysctl_string,
475 	},
476 #endif
477 #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
478 	{
479 		.ctl_name	= KERN_HOTPLUG,
480 		.procname	= "hotplug",
481 		.data		= &uevent_helper,
482 		.maxlen		= UEVENT_HELPER_PATH_LEN,
483 		.mode		= 0644,
484 		.proc_handler	= &proc_dostring,
485 		.strategy	= &sysctl_string,
486 	},
487 #endif
488 #ifdef CONFIG_CHR_DEV_SG
489 	{
490 		.ctl_name	= KERN_SG_BIG_BUFF,
491 		.procname	= "sg-big-buff",
492 		.data		= &sg_big_buff,
493 		.maxlen		= sizeof (int),
494 		.mode		= 0444,
495 		.proc_handler	= &proc_dointvec,
496 	},
497 #endif
498 #ifdef CONFIG_BSD_PROCESS_ACCT
499 	{
500 		.ctl_name	= KERN_ACCT,
501 		.procname	= "acct",
502 		.data		= &acct_parm,
503 		.maxlen		= 3*sizeof(int),
504 		.mode		= 0644,
505 		.proc_handler	= &proc_dointvec,
506 	},
507 #endif
508 #ifdef CONFIG_MAGIC_SYSRQ
509 	{
510 		.ctl_name	= KERN_SYSRQ,
511 		.procname	= "sysrq",
512 		.data		= &__sysrq_enabled,
513 		.maxlen		= sizeof (int),
514 		.mode		= 0644,
515 		.proc_handler	= &proc_dointvec,
516 	},
517 #endif
518 #ifdef CONFIG_PROC_SYSCTL
519 	{
520 		.ctl_name	= KERN_CADPID,
521 		.procname	= "cad_pid",
522 		.data		= NULL,
523 		.maxlen		= sizeof (int),
524 		.mode		= 0600,
525 		.proc_handler	= &proc_do_cad_pid,
526 	},
527 #endif
528 	{
529 		.ctl_name	= KERN_MAX_THREADS,
530 		.procname	= "threads-max",
531 		.data		= &max_threads,
532 		.maxlen		= sizeof(int),
533 		.mode		= 0644,
534 		.proc_handler	= &proc_dointvec,
535 	},
536 	{
537 		.ctl_name	= KERN_RANDOM,
538 		.procname	= "random",
539 		.mode		= 0555,
540 		.child		= random_table,
541 	},
542 #ifdef CONFIG_UNIX98_PTYS
543 	{
544 		.ctl_name	= KERN_PTY,
545 		.procname	= "pty",
546 		.mode		= 0555,
547 		.child		= pty_table,
548 	},
549 #endif
550 	{
551 		.ctl_name	= KERN_OVERFLOWUID,
552 		.procname	= "overflowuid",
553 		.data		= &overflowuid,
554 		.maxlen		= sizeof(int),
555 		.mode		= 0644,
556 		.proc_handler	= &proc_dointvec_minmax,
557 		.strategy	= &sysctl_intvec,
558 		.extra1		= &minolduid,
559 		.extra2		= &maxolduid,
560 	},
561 	{
562 		.ctl_name	= KERN_OVERFLOWGID,
563 		.procname	= "overflowgid",
564 		.data		= &overflowgid,
565 		.maxlen		= sizeof(int),
566 		.mode		= 0644,
567 		.proc_handler	= &proc_dointvec_minmax,
568 		.strategy	= &sysctl_intvec,
569 		.extra1		= &minolduid,
570 		.extra2		= &maxolduid,
571 	},
572 #ifdef CONFIG_S390
573 #ifdef CONFIG_MATHEMU
574 	{
575 		.ctl_name	= KERN_IEEE_EMULATION_WARNINGS,
576 		.procname	= "ieee_emulation_warnings",
577 		.data		= &sysctl_ieee_emulation_warnings,
578 		.maxlen		= sizeof(int),
579 		.mode		= 0644,
580 		.proc_handler	= &proc_dointvec,
581 	},
582 #endif
583 #ifdef CONFIG_NO_IDLE_HZ
584 	{
585 		.ctl_name       = KERN_HZ_TIMER,
586 		.procname       = "hz_timer",
587 		.data           = &sysctl_hz_timer,
588 		.maxlen         = sizeof(int),
589 		.mode           = 0644,
590 		.proc_handler   = &proc_dointvec,
591 	},
592 #endif
593 	{
594 		.ctl_name	= KERN_S390_USER_DEBUG_LOGGING,
595 		.procname	= "userprocess_debug",
596 		.data		= &sysctl_userprocess_debug,
597 		.maxlen		= sizeof(int),
598 		.mode		= 0644,
599 		.proc_handler	= &proc_dointvec,
600 	},
601 #endif
602 	{
603 		.ctl_name	= KERN_PIDMAX,
604 		.procname	= "pid_max",
605 		.data		= &pid_max,
606 		.maxlen		= sizeof (int),
607 		.mode		= 0644,
608 		.proc_handler	= &proc_dointvec_minmax,
609 		.strategy	= sysctl_intvec,
610 		.extra1		= &pid_max_min,
611 		.extra2		= &pid_max_max,
612 	},
613 	{
614 		.ctl_name	= KERN_PANIC_ON_OOPS,
615 		.procname	= "panic_on_oops",
616 		.data		= &panic_on_oops,
617 		.maxlen		= sizeof(int),
618 		.mode		= 0644,
619 		.proc_handler	= &proc_dointvec,
620 	},
621 	{
622 		.ctl_name	= KERN_PRINTK_RATELIMIT,
623 		.procname	= "printk_ratelimit",
624 		.data		= &printk_ratelimit_jiffies,
625 		.maxlen		= sizeof(int),
626 		.mode		= 0644,
627 		.proc_handler	= &proc_dointvec_jiffies,
628 		.strategy	= &sysctl_jiffies,
629 	},
630 	{
631 		.ctl_name	= KERN_PRINTK_RATELIMIT_BURST,
632 		.procname	= "printk_ratelimit_burst",
633 		.data		= &printk_ratelimit_burst,
634 		.maxlen		= sizeof(int),
635 		.mode		= 0644,
636 		.proc_handler	= &proc_dointvec,
637 	},
638 	{
639 		.ctl_name	= KERN_NGROUPS_MAX,
640 		.procname	= "ngroups_max",
641 		.data		= &ngroups_max,
642 		.maxlen		= sizeof (int),
643 		.mode		= 0444,
644 		.proc_handler	= &proc_dointvec,
645 	},
646 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
647 	{
648 		.ctl_name       = KERN_UNKNOWN_NMI_PANIC,
649 		.procname       = "unknown_nmi_panic",
650 		.data           = &unknown_nmi_panic,
651 		.maxlen         = sizeof (int),
652 		.mode           = 0644,
653 		.proc_handler   = &proc_dointvec,
654 	},
655 	{
656 		.ctl_name       = KERN_NMI_WATCHDOG,
657 		.procname       = "nmi_watchdog",
658 		.data           = &nmi_watchdog_enabled,
659 		.maxlen         = sizeof (int),
660 		.mode           = 0644,
661 		.proc_handler   = &proc_nmi_enabled,
662 	},
663 #endif
664 #if defined(CONFIG_X86)
665 	{
666 		.ctl_name	= KERN_PANIC_ON_NMI,
667 		.procname	= "panic_on_unrecovered_nmi",
668 		.data		= &panic_on_unrecovered_nmi,
669 		.maxlen		= sizeof(int),
670 		.mode		= 0644,
671 		.proc_handler	= &proc_dointvec,
672 	},
673 	{
674 		.ctl_name	= KERN_BOOTLOADER_TYPE,
675 		.procname	= "bootloader_type",
676 		.data		= &bootloader_type,
677 		.maxlen		= sizeof (int),
678 		.mode		= 0444,
679 		.proc_handler	= &proc_dointvec,
680 	},
681 	{
682 		.ctl_name	= CTL_UNNUMBERED,
683 		.procname	= "kstack_depth_to_print",
684 		.data		= &kstack_depth_to_print,
685 		.maxlen		= sizeof(int),
686 		.mode		= 0644,
687 		.proc_handler	= &proc_dointvec,
688 	},
689 #endif
690 #if defined(CONFIG_MMU)
691 	{
692 		.ctl_name	= KERN_RANDOMIZE,
693 		.procname	= "randomize_va_space",
694 		.data		= &randomize_va_space,
695 		.maxlen		= sizeof(int),
696 		.mode		= 0644,
697 		.proc_handler	= &proc_dointvec,
698 	},
699 #endif
700 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
701 	{
702 		.ctl_name	= KERN_SPIN_RETRY,
703 		.procname	= "spin_retry",
704 		.data		= &spin_retry,
705 		.maxlen		= sizeof (int),
706 		.mode		= 0644,
707 		.proc_handler	= &proc_dointvec,
708 	},
709 #endif
710 #if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
711 	{
712 		.ctl_name	= KERN_ACPI_VIDEO_FLAGS,
713 		.procname	= "acpi_video_flags",
714 		.data		= &acpi_realmode_flags,
715 		.maxlen		= sizeof (unsigned long),
716 		.mode		= 0644,
717 		.proc_handler	= &proc_doulongvec_minmax,
718 	},
719 #endif
720 #ifdef CONFIG_IA64
721 	{
722 		.ctl_name	= KERN_IA64_UNALIGNED,
723 		.procname	= "ignore-unaligned-usertrap",
724 		.data		= &no_unaligned_warning,
725 		.maxlen		= sizeof (int),
726 	 	.mode		= 0644,
727 		.proc_handler	= &proc_dointvec,
728 	},
729 #endif
730 #ifdef CONFIG_COMPAT
731 	{
732 		.ctl_name	= KERN_COMPAT_LOG,
733 		.procname	= "compat-log",
734 		.data		= &compat_log,
735 		.maxlen		= sizeof (int),
736 	 	.mode		= 0644,
737 		.proc_handler	= &proc_dointvec,
738 	},
739 #endif
740 #ifdef CONFIG_RT_MUTEXES
741 	{
742 		.ctl_name	= KERN_MAX_LOCK_DEPTH,
743 		.procname	= "max_lock_depth",
744 		.data		= &max_lock_depth,
745 		.maxlen		= sizeof(int),
746 		.mode		= 0644,
747 		.proc_handler	= &proc_dointvec,
748 	},
749 #endif
750 #ifdef CONFIG_PROC_FS
751 	{
752 		.ctl_name       = CTL_UNNUMBERED,
753 		.procname       = "maps_protect",
754 		.data           = &maps_protect,
755 		.maxlen         = sizeof(int),
756 		.mode           = 0644,
757 		.proc_handler   = &proc_dointvec,
758 	},
759 #endif
760 	{
761 		.ctl_name	= CTL_UNNUMBERED,
762 		.procname	= "poweroff_cmd",
763 		.data		= &poweroff_cmd,
764 		.maxlen		= POWEROFF_CMD_PATH_LEN,
765 		.mode		= 0644,
766 		.proc_handler	= &proc_dostring,
767 		.strategy	= &sysctl_string,
768 	},
769 /*
770  * NOTE: do not add new entries to this table unless you have read
771  * Documentation/sysctl/ctl_unnumbered.txt
772  */
773 	{ .ctl_name = 0 }
774 };
775 
776 /* Constants for minimum and maximum testing in vm_table.
777    We use these as one-element integer vectors. */
778 static int zero;
779 static int two = 2;
780 static int one_hundred = 100;
781 
782 
783 static ctl_table vm_table[] = {
784 	{
785 		.ctl_name	= VM_OVERCOMMIT_MEMORY,
786 		.procname	= "overcommit_memory",
787 		.data		= &sysctl_overcommit_memory,
788 		.maxlen		= sizeof(sysctl_overcommit_memory),
789 		.mode		= 0644,
790 		.proc_handler	= &proc_dointvec,
791 	},
792 	{
793 		.ctl_name	= VM_PANIC_ON_OOM,
794 		.procname	= "panic_on_oom",
795 		.data		= &sysctl_panic_on_oom,
796 		.maxlen		= sizeof(sysctl_panic_on_oom),
797 		.mode		= 0644,
798 		.proc_handler	= &proc_dointvec,
799 	},
800 	{
801 		.ctl_name	= VM_OVERCOMMIT_RATIO,
802 		.procname	= "overcommit_ratio",
803 		.data		= &sysctl_overcommit_ratio,
804 		.maxlen		= sizeof(sysctl_overcommit_ratio),
805 		.mode		= 0644,
806 		.proc_handler	= &proc_dointvec,
807 	},
808 	{
809 		.ctl_name	= VM_PAGE_CLUSTER,
810 		.procname	= "page-cluster",
811 		.data		= &page_cluster,
812 		.maxlen		= sizeof(int),
813 		.mode		= 0644,
814 		.proc_handler	= &proc_dointvec,
815 	},
816 	{
817 		.ctl_name	= VM_DIRTY_BACKGROUND,
818 		.procname	= "dirty_background_ratio",
819 		.data		= &dirty_background_ratio,
820 		.maxlen		= sizeof(dirty_background_ratio),
821 		.mode		= 0644,
822 		.proc_handler	= &proc_dointvec_minmax,
823 		.strategy	= &sysctl_intvec,
824 		.extra1		= &zero,
825 		.extra2		= &one_hundred,
826 	},
827 	{
828 		.ctl_name	= VM_DIRTY_RATIO,
829 		.procname	= "dirty_ratio",
830 		.data		= &vm_dirty_ratio,
831 		.maxlen		= sizeof(vm_dirty_ratio),
832 		.mode		= 0644,
833 		.proc_handler	= &proc_dointvec_minmax,
834 		.strategy	= &sysctl_intvec,
835 		.extra1		= &zero,
836 		.extra2		= &one_hundred,
837 	},
838 	{
839 		.ctl_name	= VM_DIRTY_WB_CS,
840 		.procname	= "dirty_writeback_centisecs",
841 		.data		= &dirty_writeback_interval,
842 		.maxlen		= sizeof(dirty_writeback_interval),
843 		.mode		= 0644,
844 		.proc_handler	= &dirty_writeback_centisecs_handler,
845 	},
846 	{
847 		.ctl_name	= VM_DIRTY_EXPIRE_CS,
848 		.procname	= "dirty_expire_centisecs",
849 		.data		= &dirty_expire_interval,
850 		.maxlen		= sizeof(dirty_expire_interval),
851 		.mode		= 0644,
852 		.proc_handler	= &proc_dointvec_userhz_jiffies,
853 	},
854 	{
855 		.ctl_name	= VM_NR_PDFLUSH_THREADS,
856 		.procname	= "nr_pdflush_threads",
857 		.data		= &nr_pdflush_threads,
858 		.maxlen		= sizeof nr_pdflush_threads,
859 		.mode		= 0444 /* read-only*/,
860 		.proc_handler	= &proc_dointvec,
861 	},
862 	{
863 		.ctl_name	= VM_SWAPPINESS,
864 		.procname	= "swappiness",
865 		.data		= &vm_swappiness,
866 		.maxlen		= sizeof(vm_swappiness),
867 		.mode		= 0644,
868 		.proc_handler	= &proc_dointvec_minmax,
869 		.strategy	= &sysctl_intvec,
870 		.extra1		= &zero,
871 		.extra2		= &one_hundred,
872 	},
873 #ifdef CONFIG_HUGETLB_PAGE
874 	 {
875 		.ctl_name	= VM_HUGETLB_PAGES,
876 		.procname	= "nr_hugepages",
877 		.data		= &max_huge_pages,
878 		.maxlen		= sizeof(unsigned long),
879 		.mode		= 0644,
880 		.proc_handler	= &hugetlb_sysctl_handler,
881 		.extra1		= (void *)&hugetlb_zero,
882 		.extra2		= (void *)&hugetlb_infinity,
883 	 },
884 	 {
885 		.ctl_name	= VM_HUGETLB_GROUP,
886 		.procname	= "hugetlb_shm_group",
887 		.data		= &sysctl_hugetlb_shm_group,
888 		.maxlen		= sizeof(gid_t),
889 		.mode		= 0644,
890 		.proc_handler	= &proc_dointvec,
891 	 },
892 	 {
893 		.ctl_name	= CTL_UNNUMBERED,
894 		.procname	= "hugepages_treat_as_movable",
895 		.data		= &hugepages_treat_as_movable,
896 		.maxlen		= sizeof(int),
897 		.mode		= 0644,
898 		.proc_handler	= &hugetlb_treat_movable_handler,
899 	},
900 #endif
901 	{
902 		.ctl_name	= VM_LOWMEM_RESERVE_RATIO,
903 		.procname	= "lowmem_reserve_ratio",
904 		.data		= &sysctl_lowmem_reserve_ratio,
905 		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
906 		.mode		= 0644,
907 		.proc_handler	= &lowmem_reserve_ratio_sysctl_handler,
908 		.strategy	= &sysctl_intvec,
909 	},
910 	{
911 		.ctl_name	= VM_DROP_PAGECACHE,
912 		.procname	= "drop_caches",
913 		.data		= &sysctl_drop_caches,
914 		.maxlen		= sizeof(int),
915 		.mode		= 0644,
916 		.proc_handler	= drop_caches_sysctl_handler,
917 		.strategy	= &sysctl_intvec,
918 	},
919 	{
920 		.ctl_name	= VM_MIN_FREE_KBYTES,
921 		.procname	= "min_free_kbytes",
922 		.data		= &min_free_kbytes,
923 		.maxlen		= sizeof(min_free_kbytes),
924 		.mode		= 0644,
925 		.proc_handler	= &min_free_kbytes_sysctl_handler,
926 		.strategy	= &sysctl_intvec,
927 		.extra1		= &zero,
928 	},
929 	{
930 		.ctl_name	= VM_PERCPU_PAGELIST_FRACTION,
931 		.procname	= "percpu_pagelist_fraction",
932 		.data		= &percpu_pagelist_fraction,
933 		.maxlen		= sizeof(percpu_pagelist_fraction),
934 		.mode		= 0644,
935 		.proc_handler	= &percpu_pagelist_fraction_sysctl_handler,
936 		.strategy	= &sysctl_intvec,
937 		.extra1		= &min_percpu_pagelist_fract,
938 	},
939 #ifdef CONFIG_MMU
940 	{
941 		.ctl_name	= VM_MAX_MAP_COUNT,
942 		.procname	= "max_map_count",
943 		.data		= &sysctl_max_map_count,
944 		.maxlen		= sizeof(sysctl_max_map_count),
945 		.mode		= 0644,
946 		.proc_handler	= &proc_dointvec
947 	},
948 #endif
949 	{
950 		.ctl_name	= VM_LAPTOP_MODE,
951 		.procname	= "laptop_mode",
952 		.data		= &laptop_mode,
953 		.maxlen		= sizeof(laptop_mode),
954 		.mode		= 0644,
955 		.proc_handler	= &proc_dointvec_jiffies,
956 		.strategy	= &sysctl_jiffies,
957 	},
958 	{
959 		.ctl_name	= VM_BLOCK_DUMP,
960 		.procname	= "block_dump",
961 		.data		= &block_dump,
962 		.maxlen		= sizeof(block_dump),
963 		.mode		= 0644,
964 		.proc_handler	= &proc_dointvec,
965 		.strategy	= &sysctl_intvec,
966 		.extra1		= &zero,
967 	},
968 	{
969 		.ctl_name	= VM_VFS_CACHE_PRESSURE,
970 		.procname	= "vfs_cache_pressure",
971 		.data		= &sysctl_vfs_cache_pressure,
972 		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
973 		.mode		= 0644,
974 		.proc_handler	= &proc_dointvec,
975 		.strategy	= &sysctl_intvec,
976 		.extra1		= &zero,
977 	},
978 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
979 	{
980 		.ctl_name	= VM_LEGACY_VA_LAYOUT,
981 		.procname	= "legacy_va_layout",
982 		.data		= &sysctl_legacy_va_layout,
983 		.maxlen		= sizeof(sysctl_legacy_va_layout),
984 		.mode		= 0644,
985 		.proc_handler	= &proc_dointvec,
986 		.strategy	= &sysctl_intvec,
987 		.extra1		= &zero,
988 	},
989 #endif
990 #ifdef CONFIG_NUMA
991 	{
992 		.ctl_name	= VM_ZONE_RECLAIM_MODE,
993 		.procname	= "zone_reclaim_mode",
994 		.data		= &zone_reclaim_mode,
995 		.maxlen		= sizeof(zone_reclaim_mode),
996 		.mode		= 0644,
997 		.proc_handler	= &proc_dointvec,
998 		.strategy	= &sysctl_intvec,
999 		.extra1		= &zero,
1000 	},
1001 	{
1002 		.ctl_name	= VM_MIN_UNMAPPED,
1003 		.procname	= "min_unmapped_ratio",
1004 		.data		= &sysctl_min_unmapped_ratio,
1005 		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
1006 		.mode		= 0644,
1007 		.proc_handler	= &sysctl_min_unmapped_ratio_sysctl_handler,
1008 		.strategy	= &sysctl_intvec,
1009 		.extra1		= &zero,
1010 		.extra2		= &one_hundred,
1011 	},
1012 	{
1013 		.ctl_name	= VM_MIN_SLAB,
1014 		.procname	= "min_slab_ratio",
1015 		.data		= &sysctl_min_slab_ratio,
1016 		.maxlen		= sizeof(sysctl_min_slab_ratio),
1017 		.mode		= 0644,
1018 		.proc_handler	= &sysctl_min_slab_ratio_sysctl_handler,
1019 		.strategy	= &sysctl_intvec,
1020 		.extra1		= &zero,
1021 		.extra2		= &one_hundred,
1022 	},
1023 #endif
1024 #ifdef CONFIG_SMP
1025 	{
1026 		.ctl_name	= CTL_UNNUMBERED,
1027 		.procname	= "stat_interval",
1028 		.data		= &sysctl_stat_interval,
1029 		.maxlen		= sizeof(sysctl_stat_interval),
1030 		.mode		= 0644,
1031 		.proc_handler	= &proc_dointvec_jiffies,
1032 		.strategy	= &sysctl_jiffies,
1033 	},
1034 #endif
1035 #ifdef CONFIG_SECURITY
1036 	{
1037 		.ctl_name	= CTL_UNNUMBERED,
1038 		.procname	= "mmap_min_addr",
1039 		.data		= &mmap_min_addr,
1040 		.maxlen         = sizeof(unsigned long),
1041 		.mode		= 0644,
1042 		.proc_handler	= &proc_doulongvec_minmax,
1043 	},
1044 #endif
1045 #ifdef CONFIG_NUMA
1046 	{
1047 		.ctl_name	= CTL_UNNUMBERED,
1048 		.procname	= "numa_zonelist_order",
1049 		.data		= &numa_zonelist_order,
1050 		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
1051 		.mode		= 0644,
1052 		.proc_handler	= &numa_zonelist_order_handler,
1053 		.strategy	= &sysctl_string,
1054 	},
1055 #endif
1056 #if defined(CONFIG_X86_32) || \
1057    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1058 	{
1059 		.ctl_name	= VM_VDSO_ENABLED,
1060 		.procname	= "vdso_enabled",
1061 		.data		= &vdso_enabled,
1062 		.maxlen		= sizeof(vdso_enabled),
1063 		.mode		= 0644,
1064 		.proc_handler	= &proc_dointvec,
1065 		.strategy	= &sysctl_intvec,
1066 		.extra1		= &zero,
1067 	},
1068 #endif
1069 /*
1070  * NOTE: do not add new entries to this table unless you have read
1071  * Documentation/sysctl/ctl_unnumbered.txt
1072  */
1073 	{ .ctl_name = 0 }
1074 };
1075 
1076 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1077 static ctl_table binfmt_misc_table[] = {
1078 	{ .ctl_name = 0 }
1079 };
1080 #endif
1081 
1082 static ctl_table fs_table[] = {
1083 	{
1084 		.ctl_name	= FS_NRINODE,
1085 		.procname	= "inode-nr",
1086 		.data		= &inodes_stat,
1087 		.maxlen		= 2*sizeof(int),
1088 		.mode		= 0444,
1089 		.proc_handler	= &proc_dointvec,
1090 	},
1091 	{
1092 		.ctl_name	= FS_STATINODE,
1093 		.procname	= "inode-state",
1094 		.data		= &inodes_stat,
1095 		.maxlen		= 7*sizeof(int),
1096 		.mode		= 0444,
1097 		.proc_handler	= &proc_dointvec,
1098 	},
1099 	{
1100 		.ctl_name	= FS_NRFILE,
1101 		.procname	= "file-nr",
1102 		.data		= &files_stat,
1103 		.maxlen		= 3*sizeof(int),
1104 		.mode		= 0444,
1105 		.proc_handler	= &proc_nr_files,
1106 	},
1107 	{
1108 		.ctl_name	= FS_MAXFILE,
1109 		.procname	= "file-max",
1110 		.data		= &files_stat.max_files,
1111 		.maxlen		= sizeof(int),
1112 		.mode		= 0644,
1113 		.proc_handler	= &proc_dointvec,
1114 	},
1115 	{
1116 		.ctl_name	= FS_DENTRY,
1117 		.procname	= "dentry-state",
1118 		.data		= &dentry_stat,
1119 		.maxlen		= 6*sizeof(int),
1120 		.mode		= 0444,
1121 		.proc_handler	= &proc_dointvec,
1122 	},
1123 	{
1124 		.ctl_name	= FS_OVERFLOWUID,
1125 		.procname	= "overflowuid",
1126 		.data		= &fs_overflowuid,
1127 		.maxlen		= sizeof(int),
1128 		.mode		= 0644,
1129 		.proc_handler	= &proc_dointvec_minmax,
1130 		.strategy	= &sysctl_intvec,
1131 		.extra1		= &minolduid,
1132 		.extra2		= &maxolduid,
1133 	},
1134 	{
1135 		.ctl_name	= FS_OVERFLOWGID,
1136 		.procname	= "overflowgid",
1137 		.data		= &fs_overflowgid,
1138 		.maxlen		= sizeof(int),
1139 		.mode		= 0644,
1140 		.proc_handler	= &proc_dointvec_minmax,
1141 		.strategy	= &sysctl_intvec,
1142 		.extra1		= &minolduid,
1143 		.extra2		= &maxolduid,
1144 	},
1145 	{
1146 		.ctl_name	= FS_LEASES,
1147 		.procname	= "leases-enable",
1148 		.data		= &leases_enable,
1149 		.maxlen		= sizeof(int),
1150 		.mode		= 0644,
1151 		.proc_handler	= &proc_dointvec,
1152 	},
1153 #ifdef CONFIG_DNOTIFY
1154 	{
1155 		.ctl_name	= FS_DIR_NOTIFY,
1156 		.procname	= "dir-notify-enable",
1157 		.data		= &dir_notify_enable,
1158 		.maxlen		= sizeof(int),
1159 		.mode		= 0644,
1160 		.proc_handler	= &proc_dointvec,
1161 	},
1162 #endif
1163 #ifdef CONFIG_MMU
1164 	{
1165 		.ctl_name	= FS_LEASE_TIME,
1166 		.procname	= "lease-break-time",
1167 		.data		= &lease_break_time,
1168 		.maxlen		= sizeof(int),
1169 		.mode		= 0644,
1170 		.proc_handler	= &proc_dointvec_minmax,
1171 		.strategy	= &sysctl_intvec,
1172 		.extra1		= &zero,
1173 		.extra2		= &two,
1174 	},
1175 	{
1176 		.ctl_name	= FS_AIO_NR,
1177 		.procname	= "aio-nr",
1178 		.data		= &aio_nr,
1179 		.maxlen		= sizeof(aio_nr),
1180 		.mode		= 0444,
1181 		.proc_handler	= &proc_doulongvec_minmax,
1182 	},
1183 	{
1184 		.ctl_name	= FS_AIO_MAX_NR,
1185 		.procname	= "aio-max-nr",
1186 		.data		= &aio_max_nr,
1187 		.maxlen		= sizeof(aio_max_nr),
1188 		.mode		= 0644,
1189 		.proc_handler	= &proc_doulongvec_minmax,
1190 	},
1191 #ifdef CONFIG_INOTIFY_USER
1192 	{
1193 		.ctl_name	= FS_INOTIFY,
1194 		.procname	= "inotify",
1195 		.mode		= 0555,
1196 		.child		= inotify_table,
1197 	},
1198 #endif
1199 #endif
1200 	{
1201 		.ctl_name	= KERN_SETUID_DUMPABLE,
1202 		.procname	= "suid_dumpable",
1203 		.data		= &suid_dumpable,
1204 		.maxlen		= sizeof(int),
1205 		.mode		= 0644,
1206 		.proc_handler	= &proc_dointvec,
1207 	},
1208 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1209 	{
1210 		.ctl_name	= CTL_UNNUMBERED,
1211 		.procname	= "binfmt_misc",
1212 		.mode		= 0555,
1213 		.child		= binfmt_misc_table,
1214 	},
1215 #endif
1216 /*
1217  * NOTE: do not add new entries to this table unless you have read
1218  * Documentation/sysctl/ctl_unnumbered.txt
1219  */
1220 	{ .ctl_name = 0 }
1221 };
1222 
1223 static ctl_table debug_table[] = {
1224 #if defined(CONFIG_X86) || defined(CONFIG_PPC)
1225 	{
1226 		.ctl_name	= CTL_UNNUMBERED,
1227 		.procname	= "exception-trace",
1228 		.data		= &show_unhandled_signals,
1229 		.maxlen		= sizeof(int),
1230 		.mode		= 0644,
1231 		.proc_handler	= proc_dointvec
1232 	},
1233 #endif
1234 	{ .ctl_name = 0 }
1235 };
1236 
1237 static ctl_table dev_table[] = {
1238 	{ .ctl_name = 0 }
1239 };
1240 
1241 static DEFINE_SPINLOCK(sysctl_lock);
1242 
1243 /* called under sysctl_lock */
1244 static int use_table(struct ctl_table_header *p)
1245 {
1246 	if (unlikely(p->unregistering))
1247 		return 0;
1248 	p->used++;
1249 	return 1;
1250 }
1251 
1252 /* called under sysctl_lock */
1253 static void unuse_table(struct ctl_table_header *p)
1254 {
1255 	if (!--p->used)
1256 		if (unlikely(p->unregistering))
1257 			complete(p->unregistering);
1258 }
1259 
1260 /* called under sysctl_lock, will reacquire if has to wait */
1261 static void start_unregistering(struct ctl_table_header *p)
1262 {
1263 	/*
1264 	 * if p->used is 0, nobody will ever touch that entry again;
1265 	 * we'll eliminate all paths to it before dropping sysctl_lock
1266 	 */
1267 	if (unlikely(p->used)) {
1268 		struct completion wait;
1269 		init_completion(&wait);
1270 		p->unregistering = &wait;
1271 		spin_unlock(&sysctl_lock);
1272 		wait_for_completion(&wait);
1273 		spin_lock(&sysctl_lock);
1274 	}
1275 	/*
1276 	 * do not remove from the list until nobody holds it; walking the
1277 	 * list in do_sysctl() relies on that.
1278 	 */
1279 	list_del_init(&p->ctl_entry);
1280 }
1281 
1282 void sysctl_head_finish(struct ctl_table_header *head)
1283 {
1284 	if (!head)
1285 		return;
1286 	spin_lock(&sysctl_lock);
1287 	unuse_table(head);
1288 	spin_unlock(&sysctl_lock);
1289 }
1290 
1291 struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1292 {
1293 	struct ctl_table_header *head;
1294 	struct list_head *tmp;
1295 	spin_lock(&sysctl_lock);
1296 	if (prev) {
1297 		tmp = &prev->ctl_entry;
1298 		unuse_table(prev);
1299 		goto next;
1300 	}
1301 	tmp = &root_table_header.ctl_entry;
1302 	for (;;) {
1303 		head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1304 
1305 		if (!use_table(head))
1306 			goto next;
1307 		spin_unlock(&sysctl_lock);
1308 		return head;
1309 	next:
1310 		tmp = tmp->next;
1311 		if (tmp == &root_table_header.ctl_entry)
1312 			break;
1313 	}
1314 	spin_unlock(&sysctl_lock);
1315 	return NULL;
1316 }
1317 
1318 #ifdef CONFIG_SYSCTL_SYSCALL
1319 int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
1320 	       void __user *newval, size_t newlen)
1321 {
1322 	struct ctl_table_header *head;
1323 	int error = -ENOTDIR;
1324 
1325 	if (nlen <= 0 || nlen >= CTL_MAXNAME)
1326 		return -ENOTDIR;
1327 	if (oldval) {
1328 		int old_len;
1329 		if (!oldlenp || get_user(old_len, oldlenp))
1330 			return -EFAULT;
1331 	}
1332 
1333 	for (head = sysctl_head_next(NULL); head;
1334 			head = sysctl_head_next(head)) {
1335 		error = parse_table(name, nlen, oldval, oldlenp,
1336 					newval, newlen, head->ctl_table);
1337 		if (error != -ENOTDIR) {
1338 			sysctl_head_finish(head);
1339 			break;
1340 		}
1341 	}
1342 	return error;
1343 }
1344 
1345 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
1346 {
1347 	struct __sysctl_args tmp;
1348 	int error;
1349 
1350 	if (copy_from_user(&tmp, args, sizeof(tmp)))
1351 		return -EFAULT;
1352 
1353 	lock_kernel();
1354 	error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
1355 			  tmp.newval, tmp.newlen);
1356 	unlock_kernel();
1357 	return error;
1358 }
1359 #endif /* CONFIG_SYSCTL_SYSCALL */
1360 
1361 /*
1362  * sysctl_perm does NOT grant the superuser all rights automatically, because
1363  * some sysctl variables are readonly even to root.
1364  */
1365 
1366 static int test_perm(int mode, int op)
1367 {
1368 	if (!current->euid)
1369 		mode >>= 6;
1370 	else if (in_egroup_p(0))
1371 		mode >>= 3;
1372 	if ((mode & op & 0007) == op)
1373 		return 0;
1374 	return -EACCES;
1375 }
1376 
1377 int sysctl_perm(ctl_table *table, int op)
1378 {
1379 	int error;
1380 	error = security_sysctl(table, op);
1381 	if (error)
1382 		return error;
1383 	return test_perm(table->mode, op);
1384 }
1385 
1386 #ifdef CONFIG_SYSCTL_SYSCALL
1387 static int parse_table(int __user *name, int nlen,
1388 		       void __user *oldval, size_t __user *oldlenp,
1389 		       void __user *newval, size_t newlen,
1390 		       ctl_table *table)
1391 {
1392 	int n;
1393 repeat:
1394 	if (!nlen)
1395 		return -ENOTDIR;
1396 	if (get_user(n, name))
1397 		return -EFAULT;
1398 	for ( ; table->ctl_name || table->procname; table++) {
1399 		if (!table->ctl_name)
1400 			continue;
1401 		if (n == table->ctl_name) {
1402 			int error;
1403 			if (table->child) {
1404 				if (sysctl_perm(table, 001))
1405 					return -EPERM;
1406 				name++;
1407 				nlen--;
1408 				table = table->child;
1409 				goto repeat;
1410 			}
1411 			error = do_sysctl_strategy(table, name, nlen,
1412 						   oldval, oldlenp,
1413 						   newval, newlen);
1414 			return error;
1415 		}
1416 	}
1417 	return -ENOTDIR;
1418 }
1419 
1420 /* Perform the actual read/write of a sysctl table entry. */
1421 int do_sysctl_strategy (ctl_table *table,
1422 			int __user *name, int nlen,
1423 			void __user *oldval, size_t __user *oldlenp,
1424 			void __user *newval, size_t newlen)
1425 {
1426 	int op = 0, rc;
1427 	size_t len;
1428 
1429 	if (oldval)
1430 		op |= 004;
1431 	if (newval)
1432 		op |= 002;
1433 	if (sysctl_perm(table, op))
1434 		return -EPERM;
1435 
1436 	if (table->strategy) {
1437 		rc = table->strategy(table, name, nlen, oldval, oldlenp,
1438 				     newval, newlen);
1439 		if (rc < 0)
1440 			return rc;
1441 		if (rc > 0)
1442 			return 0;
1443 	}
1444 
1445 	/* If there is no strategy routine, or if the strategy returns
1446 	 * zero, proceed with automatic r/w */
1447 	if (table->data && table->maxlen) {
1448 		if (oldval && oldlenp) {
1449 			if (get_user(len, oldlenp))
1450 				return -EFAULT;
1451 			if (len) {
1452 				if (len > table->maxlen)
1453 					len = table->maxlen;
1454 				if(copy_to_user(oldval, table->data, len))
1455 					return -EFAULT;
1456 				if(put_user(len, oldlenp))
1457 					return -EFAULT;
1458 			}
1459 		}
1460 		if (newval && newlen) {
1461 			len = newlen;
1462 			if (len > table->maxlen)
1463 				len = table->maxlen;
1464 			if(copy_from_user(table->data, newval, len))
1465 				return -EFAULT;
1466 		}
1467 	}
1468 	return 0;
1469 }
1470 #endif /* CONFIG_SYSCTL_SYSCALL */
1471 
1472 static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1473 {
1474 	for (; table->ctl_name || table->procname; table++) {
1475 		table->parent = parent;
1476 		if (table->child)
1477 			sysctl_set_parent(table, table->child);
1478 	}
1479 }
1480 
1481 static __init int sysctl_init(void)
1482 {
1483 	sysctl_set_parent(NULL, root_table);
1484 	return 0;
1485 }
1486 
1487 core_initcall(sysctl_init);
1488 
1489 /**
1490  * register_sysctl_table - register a sysctl hierarchy
1491  * @table: the top-level table structure
1492  *
1493  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1494  * array. An entry with a ctl_name of 0 terminates the table.
1495  *
1496  * The members of the &ctl_table structure are used as follows:
1497  *
1498  * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
1499  *            must be unique within that level of sysctl
1500  *
1501  * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1502  *            enter a sysctl file
1503  *
1504  * data - a pointer to data for use by proc_handler
1505  *
1506  * maxlen - the maximum size in bytes of the data
1507  *
1508  * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1509  *
1510  * child - a pointer to the child sysctl table if this entry is a directory, or
1511  *         %NULL.
1512  *
1513  * proc_handler - the text handler routine (described below)
1514  *
1515  * strategy - the strategy routine (described below)
1516  *
1517  * de - for internal use by the sysctl routines
1518  *
1519  * extra1, extra2 - extra pointers usable by the proc handler routines
1520  *
1521  * Leaf nodes in the sysctl tree will be represented by a single file
1522  * under /proc; non-leaf nodes will be represented by directories.
1523  *
1524  * sysctl(2) can automatically manage read and write requests through
1525  * the sysctl table.  The data and maxlen fields of the ctl_table
1526  * struct enable minimal validation of the values being written to be
1527  * performed, and the mode field allows minimal authentication.
1528  *
1529  * More sophisticated management can be enabled by the provision of a
1530  * strategy routine with the table entry.  This will be called before
1531  * any automatic read or write of the data is performed.
1532  *
1533  * The strategy routine may return
1534  *
1535  * < 0 - Error occurred (error is passed to user process)
1536  *
1537  * 0   - OK - proceed with automatic read or write.
1538  *
1539  * > 0 - OK - read or write has been done by the strategy routine, so
1540  *       return immediately.
1541  *
1542  * There must be a proc_handler routine for any terminal nodes
1543  * mirrored under /proc/sys (non-terminals are handled by a built-in
1544  * directory handler).  Several default handlers are available to
1545  * cover common cases -
1546  *
1547  * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1548  * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1549  * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1550  *
1551  * It is the handler's job to read the input buffer from user memory
1552  * and process it. The handler should return 0 on success.
1553  *
1554  * This routine returns %NULL on a failure to register, and a pointer
1555  * to the table header on success.
1556  */
1557 struct ctl_table_header *register_sysctl_table(ctl_table * table)
1558 {
1559 	struct ctl_table_header *tmp;
1560 	tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
1561 	if (!tmp)
1562 		return NULL;
1563 	tmp->ctl_table = table;
1564 	INIT_LIST_HEAD(&tmp->ctl_entry);
1565 	tmp->used = 0;
1566 	tmp->unregistering = NULL;
1567 	sysctl_set_parent(NULL, table);
1568 	spin_lock(&sysctl_lock);
1569 	list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
1570 	spin_unlock(&sysctl_lock);
1571 	return tmp;
1572 }
1573 
1574 /**
1575  * unregister_sysctl_table - unregister a sysctl table hierarchy
1576  * @header: the header returned from register_sysctl_table
1577  *
1578  * Unregisters the sysctl table and all children. proc entries may not
1579  * actually be removed until they are no longer used by anyone.
1580  */
1581 void unregister_sysctl_table(struct ctl_table_header * header)
1582 {
1583 	might_sleep();
1584 	spin_lock(&sysctl_lock);
1585 	start_unregistering(header);
1586 	spin_unlock(&sysctl_lock);
1587 	kfree(header);
1588 }
1589 
1590 #else /* !CONFIG_SYSCTL */
1591 struct ctl_table_header *register_sysctl_table(ctl_table * table)
1592 {
1593 	return NULL;
1594 }
1595 
1596 void unregister_sysctl_table(struct ctl_table_header * table)
1597 {
1598 }
1599 
1600 #endif /* CONFIG_SYSCTL */
1601 
1602 /*
1603  * /proc/sys support
1604  */
1605 
1606 #ifdef CONFIG_PROC_SYSCTL
1607 
1608 static int _proc_do_string(void* data, int maxlen, int write,
1609 			   struct file *filp, void __user *buffer,
1610 			   size_t *lenp, loff_t *ppos)
1611 {
1612 	size_t len;
1613 	char __user *p;
1614 	char c;
1615 
1616 	if (!data || !maxlen || !*lenp) {
1617 		*lenp = 0;
1618 		return 0;
1619 	}
1620 
1621 	if (write) {
1622 		len = 0;
1623 		p = buffer;
1624 		while (len < *lenp) {
1625 			if (get_user(c, p++))
1626 				return -EFAULT;
1627 			if (c == 0 || c == '\n')
1628 				break;
1629 			len++;
1630 		}
1631 		if (len >= maxlen)
1632 			len = maxlen-1;
1633 		if(copy_from_user(data, buffer, len))
1634 			return -EFAULT;
1635 		((char *) data)[len] = 0;
1636 		*ppos += *lenp;
1637 	} else {
1638 		len = strlen(data);
1639 		if (len > maxlen)
1640 			len = maxlen;
1641 
1642 		if (*ppos > len) {
1643 			*lenp = 0;
1644 			return 0;
1645 		}
1646 
1647 		data += *ppos;
1648 		len  -= *ppos;
1649 
1650 		if (len > *lenp)
1651 			len = *lenp;
1652 		if (len)
1653 			if(copy_to_user(buffer, data, len))
1654 				return -EFAULT;
1655 		if (len < *lenp) {
1656 			if(put_user('\n', ((char __user *) buffer) + len))
1657 				return -EFAULT;
1658 			len++;
1659 		}
1660 		*lenp = len;
1661 		*ppos += len;
1662 	}
1663 	return 0;
1664 }
1665 
1666 /**
1667  * proc_dostring - read a string sysctl
1668  * @table: the sysctl table
1669  * @write: %TRUE if this is a write to the sysctl file
1670  * @filp: the file structure
1671  * @buffer: the user buffer
1672  * @lenp: the size of the user buffer
1673  * @ppos: file position
1674  *
1675  * Reads/writes a string from/to the user buffer. If the kernel
1676  * buffer provided is not large enough to hold the string, the
1677  * string is truncated. The copied string is %NULL-terminated.
1678  * If the string is being read by the user process, it is copied
1679  * and a newline '\n' is added. It is truncated if the buffer is
1680  * not large enough.
1681  *
1682  * Returns 0 on success.
1683  */
1684 int proc_dostring(ctl_table *table, int write, struct file *filp,
1685 		  void __user *buffer, size_t *lenp, loff_t *ppos)
1686 {
1687 	return _proc_do_string(table->data, table->maxlen, write, filp,
1688 			       buffer, lenp, ppos);
1689 }
1690 
1691 
1692 static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
1693 				 int *valp,
1694 				 int write, void *data)
1695 {
1696 	if (write) {
1697 		*valp = *negp ? -*lvalp : *lvalp;
1698 	} else {
1699 		int val = *valp;
1700 		if (val < 0) {
1701 			*negp = -1;
1702 			*lvalp = (unsigned long)-val;
1703 		} else {
1704 			*negp = 0;
1705 			*lvalp = (unsigned long)val;
1706 		}
1707 	}
1708 	return 0;
1709 }
1710 
1711 static int __do_proc_dointvec(void *tbl_data, ctl_table *table,
1712 		  int write, struct file *filp, void __user *buffer,
1713 		  size_t *lenp, loff_t *ppos,
1714 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1715 			      int write, void *data),
1716 		  void *data)
1717 {
1718 #define TMPBUFLEN 21
1719 	int *i, vleft, first=1, neg, val;
1720 	unsigned long lval;
1721 	size_t left, len;
1722 
1723 	char buf[TMPBUFLEN], *p;
1724 	char __user *s = buffer;
1725 
1726 	if (!tbl_data || !table->maxlen || !*lenp ||
1727 	    (*ppos && !write)) {
1728 		*lenp = 0;
1729 		return 0;
1730 	}
1731 
1732 	i = (int *) tbl_data;
1733 	vleft = table->maxlen / sizeof(*i);
1734 	left = *lenp;
1735 
1736 	if (!conv)
1737 		conv = do_proc_dointvec_conv;
1738 
1739 	for (; left && vleft--; i++, first=0) {
1740 		if (write) {
1741 			while (left) {
1742 				char c;
1743 				if (get_user(c, s))
1744 					return -EFAULT;
1745 				if (!isspace(c))
1746 					break;
1747 				left--;
1748 				s++;
1749 			}
1750 			if (!left)
1751 				break;
1752 			neg = 0;
1753 			len = left;
1754 			if (len > sizeof(buf) - 1)
1755 				len = sizeof(buf) - 1;
1756 			if (copy_from_user(buf, s, len))
1757 				return -EFAULT;
1758 			buf[len] = 0;
1759 			p = buf;
1760 			if (*p == '-' && left > 1) {
1761 				neg = 1;
1762 				p++;
1763 			}
1764 			if (*p < '0' || *p > '9')
1765 				break;
1766 
1767 			lval = simple_strtoul(p, &p, 0);
1768 
1769 			len = p-buf;
1770 			if ((len < left) && *p && !isspace(*p))
1771 				break;
1772 			if (neg)
1773 				val = -val;
1774 			s += len;
1775 			left -= len;
1776 
1777 			if (conv(&neg, &lval, i, 1, data))
1778 				break;
1779 		} else {
1780 			p = buf;
1781 			if (!first)
1782 				*p++ = '\t';
1783 
1784 			if (conv(&neg, &lval, i, 0, data))
1785 				break;
1786 
1787 			sprintf(p, "%s%lu", neg ? "-" : "", lval);
1788 			len = strlen(buf);
1789 			if (len > left)
1790 				len = left;
1791 			if(copy_to_user(s, buf, len))
1792 				return -EFAULT;
1793 			left -= len;
1794 			s += len;
1795 		}
1796 	}
1797 
1798 	if (!write && !first && left) {
1799 		if(put_user('\n', s))
1800 			return -EFAULT;
1801 		left--, s++;
1802 	}
1803 	if (write) {
1804 		while (left) {
1805 			char c;
1806 			if (get_user(c, s++))
1807 				return -EFAULT;
1808 			if (!isspace(c))
1809 				break;
1810 			left--;
1811 		}
1812 	}
1813 	if (write && first)
1814 		return -EINVAL;
1815 	*lenp -= left;
1816 	*ppos += *lenp;
1817 	return 0;
1818 #undef TMPBUFLEN
1819 }
1820 
1821 static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
1822 		  void __user *buffer, size_t *lenp, loff_t *ppos,
1823 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1824 			      int write, void *data),
1825 		  void *data)
1826 {
1827 	return __do_proc_dointvec(table->data, table, write, filp,
1828 			buffer, lenp, ppos, conv, data);
1829 }
1830 
1831 /**
1832  * proc_dointvec - read a vector of integers
1833  * @table: the sysctl table
1834  * @write: %TRUE if this is a write to the sysctl file
1835  * @filp: the file structure
1836  * @buffer: the user buffer
1837  * @lenp: the size of the user buffer
1838  * @ppos: file position
1839  *
1840  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1841  * values from/to the user buffer, treated as an ASCII string.
1842  *
1843  * Returns 0 on success.
1844  */
1845 int proc_dointvec(ctl_table *table, int write, struct file *filp,
1846 		     void __user *buffer, size_t *lenp, loff_t *ppos)
1847 {
1848     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1849 		    	    NULL,NULL);
1850 }
1851 
1852 #define OP_SET	0
1853 #define OP_AND	1
1854 #define OP_OR	2
1855 
1856 static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
1857 				      int *valp,
1858 				      int write, void *data)
1859 {
1860 	int op = *(int *)data;
1861 	if (write) {
1862 		int val = *negp ? -*lvalp : *lvalp;
1863 		switch(op) {
1864 		case OP_SET:	*valp = val; break;
1865 		case OP_AND:	*valp &= val; break;
1866 		case OP_OR:	*valp |= val; break;
1867 		}
1868 	} else {
1869 		int val = *valp;
1870 		if (val < 0) {
1871 			*negp = -1;
1872 			*lvalp = (unsigned long)-val;
1873 		} else {
1874 			*negp = 0;
1875 			*lvalp = (unsigned long)val;
1876 		}
1877 	}
1878 	return 0;
1879 }
1880 
1881 /*
1882  *	init may raise the set.
1883  */
1884 
1885 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
1886 			void __user *buffer, size_t *lenp, loff_t *ppos)
1887 {
1888 	int op;
1889 
1890 	if (write && !capable(CAP_SYS_MODULE)) {
1891 		return -EPERM;
1892 	}
1893 
1894 	op = is_init(current) ? OP_SET : OP_AND;
1895 	return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1896 				do_proc_dointvec_bset_conv,&op);
1897 }
1898 
1899 /*
1900  *	Taint values can only be increased
1901  */
1902 static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp,
1903 			       void __user *buffer, size_t *lenp, loff_t *ppos)
1904 {
1905 	int op;
1906 
1907 	if (write && !capable(CAP_SYS_ADMIN))
1908 		return -EPERM;
1909 
1910 	op = OP_OR;
1911 	return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1912 				do_proc_dointvec_bset_conv,&op);
1913 }
1914 
1915 struct do_proc_dointvec_minmax_conv_param {
1916 	int *min;
1917 	int *max;
1918 };
1919 
1920 static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
1921 					int *valp,
1922 					int write, void *data)
1923 {
1924 	struct do_proc_dointvec_minmax_conv_param *param = data;
1925 	if (write) {
1926 		int val = *negp ? -*lvalp : *lvalp;
1927 		if ((param->min && *param->min > val) ||
1928 		    (param->max && *param->max < val))
1929 			return -EINVAL;
1930 		*valp = val;
1931 	} else {
1932 		int val = *valp;
1933 		if (val < 0) {
1934 			*negp = -1;
1935 			*lvalp = (unsigned long)-val;
1936 		} else {
1937 			*negp = 0;
1938 			*lvalp = (unsigned long)val;
1939 		}
1940 	}
1941 	return 0;
1942 }
1943 
1944 /**
1945  * proc_dointvec_minmax - read a vector of integers with min/max values
1946  * @table: the sysctl table
1947  * @write: %TRUE if this is a write to the sysctl file
1948  * @filp: the file structure
1949  * @buffer: the user buffer
1950  * @lenp: the size of the user buffer
1951  * @ppos: file position
1952  *
1953  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1954  * values from/to the user buffer, treated as an ASCII string.
1955  *
1956  * This routine will ensure the values are within the range specified by
1957  * table->extra1 (min) and table->extra2 (max).
1958  *
1959  * Returns 0 on success.
1960  */
1961 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
1962 		  void __user *buffer, size_t *lenp, loff_t *ppos)
1963 {
1964 	struct do_proc_dointvec_minmax_conv_param param = {
1965 		.min = (int *) table->extra1,
1966 		.max = (int *) table->extra2,
1967 	};
1968 	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
1969 				do_proc_dointvec_minmax_conv, &param);
1970 }
1971 
1972 static int __do_proc_doulongvec_minmax(void *data, ctl_table *table, int write,
1973 				     struct file *filp,
1974 				     void __user *buffer,
1975 				     size_t *lenp, loff_t *ppos,
1976 				     unsigned long convmul,
1977 				     unsigned long convdiv)
1978 {
1979 #define TMPBUFLEN 21
1980 	unsigned long *i, *min, *max, val;
1981 	int vleft, first=1, neg;
1982 	size_t len, left;
1983 	char buf[TMPBUFLEN], *p;
1984 	char __user *s = buffer;
1985 
1986 	if (!data || !table->maxlen || !*lenp ||
1987 	    (*ppos && !write)) {
1988 		*lenp = 0;
1989 		return 0;
1990 	}
1991 
1992 	i = (unsigned long *) data;
1993 	min = (unsigned long *) table->extra1;
1994 	max = (unsigned long *) table->extra2;
1995 	vleft = table->maxlen / sizeof(unsigned long);
1996 	left = *lenp;
1997 
1998 	for (; left && vleft--; i++, min++, max++, first=0) {
1999 		if (write) {
2000 			while (left) {
2001 				char c;
2002 				if (get_user(c, s))
2003 					return -EFAULT;
2004 				if (!isspace(c))
2005 					break;
2006 				left--;
2007 				s++;
2008 			}
2009 			if (!left)
2010 				break;
2011 			neg = 0;
2012 			len = left;
2013 			if (len > TMPBUFLEN-1)
2014 				len = TMPBUFLEN-1;
2015 			if (copy_from_user(buf, s, len))
2016 				return -EFAULT;
2017 			buf[len] = 0;
2018 			p = buf;
2019 			if (*p == '-' && left > 1) {
2020 				neg = 1;
2021 				p++;
2022 			}
2023 			if (*p < '0' || *p > '9')
2024 				break;
2025 			val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
2026 			len = p-buf;
2027 			if ((len < left) && *p && !isspace(*p))
2028 				break;
2029 			if (neg)
2030 				val = -val;
2031 			s += len;
2032 			left -= len;
2033 
2034 			if(neg)
2035 				continue;
2036 			if ((min && val < *min) || (max && val > *max))
2037 				continue;
2038 			*i = val;
2039 		} else {
2040 			p = buf;
2041 			if (!first)
2042 				*p++ = '\t';
2043 			sprintf(p, "%lu", convdiv * (*i) / convmul);
2044 			len = strlen(buf);
2045 			if (len > left)
2046 				len = left;
2047 			if(copy_to_user(s, buf, len))
2048 				return -EFAULT;
2049 			left -= len;
2050 			s += len;
2051 		}
2052 	}
2053 
2054 	if (!write && !first && left) {
2055 		if(put_user('\n', s))
2056 			return -EFAULT;
2057 		left--, s++;
2058 	}
2059 	if (write) {
2060 		while (left) {
2061 			char c;
2062 			if (get_user(c, s++))
2063 				return -EFAULT;
2064 			if (!isspace(c))
2065 				break;
2066 			left--;
2067 		}
2068 	}
2069 	if (write && first)
2070 		return -EINVAL;
2071 	*lenp -= left;
2072 	*ppos += *lenp;
2073 	return 0;
2074 #undef TMPBUFLEN
2075 }
2076 
2077 static int do_proc_doulongvec_minmax(ctl_table *table, int write,
2078 				     struct file *filp,
2079 				     void __user *buffer,
2080 				     size_t *lenp, loff_t *ppos,
2081 				     unsigned long convmul,
2082 				     unsigned long convdiv)
2083 {
2084 	return __do_proc_doulongvec_minmax(table->data, table, write,
2085 			filp, buffer, lenp, ppos, convmul, convdiv);
2086 }
2087 
2088 /**
2089  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2090  * @table: the sysctl table
2091  * @write: %TRUE if this is a write to the sysctl file
2092  * @filp: the file structure
2093  * @buffer: the user buffer
2094  * @lenp: the size of the user buffer
2095  * @ppos: file position
2096  *
2097  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2098  * values from/to the user buffer, treated as an ASCII string.
2099  *
2100  * This routine will ensure the values are within the range specified by
2101  * table->extra1 (min) and table->extra2 (max).
2102  *
2103  * Returns 0 on success.
2104  */
2105 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2106 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2107 {
2108     return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
2109 }
2110 
2111 /**
2112  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2113  * @table: the sysctl table
2114  * @write: %TRUE if this is a write to the sysctl file
2115  * @filp: the file structure
2116  * @buffer: the user buffer
2117  * @lenp: the size of the user buffer
2118  * @ppos: file position
2119  *
2120  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2121  * values from/to the user buffer, treated as an ASCII string. The values
2122  * are treated as milliseconds, and converted to jiffies when they are stored.
2123  *
2124  * This routine will ensure the values are within the range specified by
2125  * table->extra1 (min) and table->extra2 (max).
2126  *
2127  * Returns 0 on success.
2128  */
2129 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2130 				      struct file *filp,
2131 				      void __user *buffer,
2132 				      size_t *lenp, loff_t *ppos)
2133 {
2134     return do_proc_doulongvec_minmax(table, write, filp, buffer,
2135 				     lenp, ppos, HZ, 1000l);
2136 }
2137 
2138 
2139 static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
2140 					 int *valp,
2141 					 int write, void *data)
2142 {
2143 	if (write) {
2144 		if (*lvalp > LONG_MAX / HZ)
2145 			return 1;
2146 		*valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2147 	} else {
2148 		int val = *valp;
2149 		unsigned long lval;
2150 		if (val < 0) {
2151 			*negp = -1;
2152 			lval = (unsigned long)-val;
2153 		} else {
2154 			*negp = 0;
2155 			lval = (unsigned long)val;
2156 		}
2157 		*lvalp = lval / HZ;
2158 	}
2159 	return 0;
2160 }
2161 
2162 static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
2163 						int *valp,
2164 						int write, void *data)
2165 {
2166 	if (write) {
2167 		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2168 			return 1;
2169 		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2170 	} else {
2171 		int val = *valp;
2172 		unsigned long lval;
2173 		if (val < 0) {
2174 			*negp = -1;
2175 			lval = (unsigned long)-val;
2176 		} else {
2177 			*negp = 0;
2178 			lval = (unsigned long)val;
2179 		}
2180 		*lvalp = jiffies_to_clock_t(lval);
2181 	}
2182 	return 0;
2183 }
2184 
2185 static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
2186 					    int *valp,
2187 					    int write, void *data)
2188 {
2189 	if (write) {
2190 		*valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2191 	} else {
2192 		int val = *valp;
2193 		unsigned long lval;
2194 		if (val < 0) {
2195 			*negp = -1;
2196 			lval = (unsigned long)-val;
2197 		} else {
2198 			*negp = 0;
2199 			lval = (unsigned long)val;
2200 		}
2201 		*lvalp = jiffies_to_msecs(lval);
2202 	}
2203 	return 0;
2204 }
2205 
2206 /**
2207  * proc_dointvec_jiffies - read a vector of integers as seconds
2208  * @table: the sysctl table
2209  * @write: %TRUE if this is a write to the sysctl file
2210  * @filp: the file structure
2211  * @buffer: the user buffer
2212  * @lenp: the size of the user buffer
2213  * @ppos: file position
2214  *
2215  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2216  * values from/to the user buffer, treated as an ASCII string.
2217  * The values read are assumed to be in seconds, and are converted into
2218  * jiffies.
2219  *
2220  * Returns 0 on success.
2221  */
2222 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2223 			  void __user *buffer, size_t *lenp, loff_t *ppos)
2224 {
2225     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2226 		    	    do_proc_dointvec_jiffies_conv,NULL);
2227 }
2228 
2229 /**
2230  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2231  * @table: the sysctl table
2232  * @write: %TRUE if this is a write to the sysctl file
2233  * @filp: the file structure
2234  * @buffer: the user buffer
2235  * @lenp: the size of the user buffer
2236  * @ppos: pointer to the file position
2237  *
2238  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2239  * values from/to the user buffer, treated as an ASCII string.
2240  * The values read are assumed to be in 1/USER_HZ seconds, and
2241  * are converted into jiffies.
2242  *
2243  * Returns 0 on success.
2244  */
2245 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2246 				 void __user *buffer, size_t *lenp, loff_t *ppos)
2247 {
2248     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2249 		    	    do_proc_dointvec_userhz_jiffies_conv,NULL);
2250 }
2251 
2252 /**
2253  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2254  * @table: the sysctl table
2255  * @write: %TRUE if this is a write to the sysctl file
2256  * @filp: the file structure
2257  * @buffer: the user buffer
2258  * @lenp: the size of the user buffer
2259  * @ppos: file position
2260  * @ppos: the current position in the file
2261  *
2262  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2263  * values from/to the user buffer, treated as an ASCII string.
2264  * The values read are assumed to be in 1/1000 seconds, and
2265  * are converted into jiffies.
2266  *
2267  * Returns 0 on success.
2268  */
2269 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2270 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2271 {
2272 	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2273 				do_proc_dointvec_ms_jiffies_conv, NULL);
2274 }
2275 
2276 static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
2277 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2278 {
2279 	struct pid *new_pid;
2280 	pid_t tmp;
2281 	int r;
2282 
2283 	tmp = pid_nr(cad_pid);
2284 
2285 	r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
2286 			       lenp, ppos, NULL, NULL);
2287 	if (r || !write)
2288 		return r;
2289 
2290 	new_pid = find_get_pid(tmp);
2291 	if (!new_pid)
2292 		return -ESRCH;
2293 
2294 	put_pid(xchg(&cad_pid, new_pid));
2295 	return 0;
2296 }
2297 
2298 #else /* CONFIG_PROC_FS */
2299 
2300 int proc_dostring(ctl_table *table, int write, struct file *filp,
2301 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2302 {
2303 	return -ENOSYS;
2304 }
2305 
2306 int proc_dointvec(ctl_table *table, int write, struct file *filp,
2307 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2308 {
2309 	return -ENOSYS;
2310 }
2311 
2312 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
2313 			void __user *buffer, size_t *lenp, loff_t *ppos)
2314 {
2315 	return -ENOSYS;
2316 }
2317 
2318 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
2319 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2320 {
2321 	return -ENOSYS;
2322 }
2323 
2324 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2325 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2326 {
2327 	return -ENOSYS;
2328 }
2329 
2330 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2331 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2332 {
2333 	return -ENOSYS;
2334 }
2335 
2336 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2337 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2338 {
2339 	return -ENOSYS;
2340 }
2341 
2342 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2343 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2344 {
2345 	return -ENOSYS;
2346 }
2347 
2348 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2349 				      struct file *filp,
2350 				      void __user *buffer,
2351 				      size_t *lenp, loff_t *ppos)
2352 {
2353     return -ENOSYS;
2354 }
2355 
2356 
2357 #endif /* CONFIG_PROC_FS */
2358 
2359 
2360 #ifdef CONFIG_SYSCTL_SYSCALL
2361 /*
2362  * General sysctl support routines
2363  */
2364 
2365 /* The generic string strategy routine: */
2366 int sysctl_string(ctl_table *table, int __user *name, int nlen,
2367 		  void __user *oldval, size_t __user *oldlenp,
2368 		  void __user *newval, size_t newlen)
2369 {
2370 	if (!table->data || !table->maxlen)
2371 		return -ENOTDIR;
2372 
2373 	if (oldval && oldlenp) {
2374 		size_t bufsize;
2375 		if (get_user(bufsize, oldlenp))
2376 			return -EFAULT;
2377 		if (bufsize) {
2378 			size_t len = strlen(table->data), copied;
2379 
2380 			/* This shouldn't trigger for a well-formed sysctl */
2381 			if (len > table->maxlen)
2382 				len = table->maxlen;
2383 
2384 			/* Copy up to a max of bufsize-1 bytes of the string */
2385 			copied = (len >= bufsize) ? bufsize - 1 : len;
2386 
2387 			if (copy_to_user(oldval, table->data, copied) ||
2388 			    put_user(0, (char __user *)(oldval + copied)))
2389 				return -EFAULT;
2390 			if (put_user(len, oldlenp))
2391 				return -EFAULT;
2392 		}
2393 	}
2394 	if (newval && newlen) {
2395 		size_t len = newlen;
2396 		if (len > table->maxlen)
2397 			len = table->maxlen;
2398 		if(copy_from_user(table->data, newval, len))
2399 			return -EFAULT;
2400 		if (len == table->maxlen)
2401 			len--;
2402 		((char *) table->data)[len] = 0;
2403 	}
2404 	return 1;
2405 }
2406 
2407 /*
2408  * This function makes sure that all of the integers in the vector
2409  * are between the minimum and maximum values given in the arrays
2410  * table->extra1 and table->extra2, respectively.
2411  */
2412 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2413 		void __user *oldval, size_t __user *oldlenp,
2414 		void __user *newval, size_t newlen)
2415 {
2416 
2417 	if (newval && newlen) {
2418 		int __user *vec = (int __user *) newval;
2419 		int *min = (int *) table->extra1;
2420 		int *max = (int *) table->extra2;
2421 		size_t length;
2422 		int i;
2423 
2424 		if (newlen % sizeof(int) != 0)
2425 			return -EINVAL;
2426 
2427 		if (!table->extra1 && !table->extra2)
2428 			return 0;
2429 
2430 		if (newlen > table->maxlen)
2431 			newlen = table->maxlen;
2432 		length = newlen / sizeof(int);
2433 
2434 		for (i = 0; i < length; i++) {
2435 			int value;
2436 			if (get_user(value, vec + i))
2437 				return -EFAULT;
2438 			if (min && value < min[i])
2439 				return -EINVAL;
2440 			if (max && value > max[i])
2441 				return -EINVAL;
2442 		}
2443 	}
2444 	return 0;
2445 }
2446 
2447 /* Strategy function to convert jiffies to seconds */
2448 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2449 		void __user *oldval, size_t __user *oldlenp,
2450 		void __user *newval, size_t newlen)
2451 {
2452 	if (oldval && oldlenp) {
2453 		size_t olen;
2454 
2455 		if (get_user(olen, oldlenp))
2456 			return -EFAULT;
2457 		if (olen) {
2458 			int val;
2459 
2460 			if (olen < sizeof(int))
2461 				return -EINVAL;
2462 
2463 			val = *(int *)(table->data) / HZ;
2464 			if (put_user(val, (int __user *)oldval))
2465 				return -EFAULT;
2466 			if (put_user(sizeof(int), oldlenp))
2467 				return -EFAULT;
2468 		}
2469 	}
2470 	if (newval && newlen) {
2471 		int new;
2472 		if (newlen != sizeof(int))
2473 			return -EINVAL;
2474 		if (get_user(new, (int __user *)newval))
2475 			return -EFAULT;
2476 		*(int *)(table->data) = new*HZ;
2477 	}
2478 	return 1;
2479 }
2480 
2481 /* Strategy function to convert jiffies to seconds */
2482 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2483 		void __user *oldval, size_t __user *oldlenp,
2484 		void __user *newval, size_t newlen)
2485 {
2486 	if (oldval && oldlenp) {
2487 		size_t olen;
2488 
2489 		if (get_user(olen, oldlenp))
2490 			return -EFAULT;
2491 		if (olen) {
2492 			int val;
2493 
2494 			if (olen < sizeof(int))
2495 				return -EINVAL;
2496 
2497 			val = jiffies_to_msecs(*(int *)(table->data));
2498 			if (put_user(val, (int __user *)oldval))
2499 				return -EFAULT;
2500 			if (put_user(sizeof(int), oldlenp))
2501 				return -EFAULT;
2502 		}
2503 	}
2504 	if (newval && newlen) {
2505 		int new;
2506 		if (newlen != sizeof(int))
2507 			return -EINVAL;
2508 		if (get_user(new, (int __user *)newval))
2509 			return -EFAULT;
2510 		*(int *)(table->data) = msecs_to_jiffies(new);
2511 	}
2512 	return 1;
2513 }
2514 
2515 
2516 
2517 #else /* CONFIG_SYSCTL_SYSCALL */
2518 
2519 
2520 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
2521 {
2522 	static int msg_count;
2523 	struct __sysctl_args tmp;
2524 	int name[CTL_MAXNAME];
2525 	int i;
2526 
2527 	/* Read in the sysctl name for better debug message logging */
2528 	if (copy_from_user(&tmp, args, sizeof(tmp)))
2529 		return -EFAULT;
2530 	if (tmp.nlen <= 0 || tmp.nlen >= CTL_MAXNAME)
2531 		return -ENOTDIR;
2532 	for (i = 0; i < tmp.nlen; i++)
2533 		if (get_user(name[i], tmp.name + i))
2534 			return -EFAULT;
2535 
2536 	/* Ignore accesses to kernel.version */
2537 	if ((tmp.nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION))
2538 		goto out;
2539 
2540 	if (msg_count < 5) {
2541 		msg_count++;
2542 		printk(KERN_INFO
2543 			"warning: process `%s' used the removed sysctl "
2544 			"system call with ", current->comm);
2545 		for (i = 0; i < tmp.nlen; i++)
2546 			printk("%d.", name[i]);
2547 		printk("\n");
2548 	}
2549 out:
2550 	return -ENOSYS;
2551 }
2552 
2553 int sysctl_string(ctl_table *table, int __user *name, int nlen,
2554 		  void __user *oldval, size_t __user *oldlenp,
2555 		  void __user *newval, size_t newlen)
2556 {
2557 	return -ENOSYS;
2558 }
2559 
2560 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2561 		void __user *oldval, size_t __user *oldlenp,
2562 		void __user *newval, size_t newlen)
2563 {
2564 	return -ENOSYS;
2565 }
2566 
2567 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2568 		void __user *oldval, size_t __user *oldlenp,
2569 		void __user *newval, size_t newlen)
2570 {
2571 	return -ENOSYS;
2572 }
2573 
2574 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2575 		void __user *oldval, size_t __user *oldlenp,
2576 		void __user *newval, size_t newlen)
2577 {
2578 	return -ENOSYS;
2579 }
2580 
2581 #endif /* CONFIG_SYSCTL_SYSCALL */
2582 
2583 /*
2584  * No sense putting this after each symbol definition, twice,
2585  * exception granted :-)
2586  */
2587 EXPORT_SYMBOL(proc_dointvec);
2588 EXPORT_SYMBOL(proc_dointvec_jiffies);
2589 EXPORT_SYMBOL(proc_dointvec_minmax);
2590 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2591 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2592 EXPORT_SYMBOL(proc_dostring);
2593 EXPORT_SYMBOL(proc_doulongvec_minmax);
2594 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2595 EXPORT_SYMBOL(register_sysctl_table);
2596 EXPORT_SYMBOL(sysctl_intvec);
2597 EXPORT_SYMBOL(sysctl_jiffies);
2598 EXPORT_SYMBOL(sysctl_ms_jiffies);
2599 EXPORT_SYMBOL(sysctl_string);
2600 EXPORT_SYMBOL(unregister_sysctl_table);
2601