xref: /openbmc/linux/kernel/sysctl.c (revision 96de0e252cedffad61b3cb5e05662c591898e69a)
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20 
21 #include <linux/module.h>
22 #include <linux/mm.h>
23 #include <linux/swap.h>
24 #include <linux/slab.h>
25 #include <linux/sysctl.h>
26 #include <linux/proc_fs.h>
27 #include <linux/security.h>
28 #include <linux/ctype.h>
29 #include <linux/utsname.h>
30 #include <linux/smp_lock.h>
31 #include <linux/fs.h>
32 #include <linux/init.h>
33 #include <linux/kernel.h>
34 #include <linux/kobject.h>
35 #include <linux/net.h>
36 #include <linux/sysrq.h>
37 #include <linux/highuid.h>
38 #include <linux/writeback.h>
39 #include <linux/hugetlb.h>
40 #include <linux/security.h>
41 #include <linux/initrd.h>
42 #include <linux/times.h>
43 #include <linux/limits.h>
44 #include <linux/dcache.h>
45 #include <linux/syscalls.h>
46 #include <linux/nfs_fs.h>
47 #include <linux/acpi.h>
48 #include <linux/reboot.h>
49 
50 #include <asm/uaccess.h>
51 #include <asm/processor.h>
52 
53 #ifdef CONFIG_X86
54 #include <asm/nmi.h>
55 #include <asm/stacktrace.h>
56 #endif
57 
58 static int deprecated_sysctl_warning(struct __sysctl_args *args);
59 
60 #if defined(CONFIG_SYSCTL)
61 
62 /* External variables not in a header file. */
63 extern int C_A_D;
64 extern int print_fatal_signals;
65 extern int sysctl_overcommit_memory;
66 extern int sysctl_overcommit_ratio;
67 extern int sysctl_panic_on_oom;
68 extern int sysctl_oom_kill_allocating_task;
69 extern int max_threads;
70 extern int core_uses_pid;
71 extern int suid_dumpable;
72 extern char core_pattern[];
73 extern int pid_max;
74 extern int min_free_kbytes;
75 extern int printk_ratelimit_jiffies;
76 extern int printk_ratelimit_burst;
77 extern int pid_max_min, pid_max_max;
78 extern int sysctl_drop_caches;
79 extern int percpu_pagelist_fraction;
80 extern int compat_log;
81 extern int maps_protect;
82 extern int sysctl_stat_interval;
83 extern int audit_argv_kb;
84 
85 /* Constants used for minimum and  maximum */
86 #ifdef CONFIG_DETECT_SOFTLOCKUP
87 static int one = 1;
88 static int sixty = 60;
89 #endif
90 
91 #ifdef CONFIG_MMU
92 static int two = 2;
93 #endif
94 
95 static int zero;
96 static int one_hundred = 100;
97 
98 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
99 static int maxolduid = 65535;
100 static int minolduid;
101 static int min_percpu_pagelist_fract = 8;
102 
103 static int ngroups_max = NGROUPS_MAX;
104 
105 #ifdef CONFIG_KMOD
106 extern char modprobe_path[];
107 #endif
108 #ifdef CONFIG_CHR_DEV_SG
109 extern int sg_big_buff;
110 #endif
111 
112 #ifdef __sparc__
113 extern char reboot_command [];
114 extern int stop_a_enabled;
115 extern int scons_pwroff;
116 #endif
117 
118 #ifdef __hppa__
119 extern int pwrsw_enabled;
120 extern int unaligned_enabled;
121 #endif
122 
123 #ifdef CONFIG_S390
124 #ifdef CONFIG_MATHEMU
125 extern int sysctl_ieee_emulation_warnings;
126 #endif
127 extern int sysctl_userprocess_debug;
128 extern int spin_retry;
129 #endif
130 
131 extern int sysctl_hz_timer;
132 
133 #ifdef CONFIG_BSD_PROCESS_ACCT
134 extern int acct_parm[];
135 #endif
136 
137 #ifdef CONFIG_IA64
138 extern int no_unaligned_warning;
139 #endif
140 
141 #ifdef CONFIG_RT_MUTEXES
142 extern int max_lock_depth;
143 #endif
144 
145 #ifdef CONFIG_SYSCTL_SYSCALL
146 static int parse_table(int __user *, int, void __user *, size_t __user *,
147 		void __user *, size_t, struct ctl_table *);
148 #endif
149 
150 
151 #ifdef CONFIG_PROC_SYSCTL
152 static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp,
153 		  void __user *buffer, size_t *lenp, loff_t *ppos);
154 static int proc_dointvec_taint(struct ctl_table *table, int write, struct file *filp,
155 			       void __user *buffer, size_t *lenp, loff_t *ppos);
156 #endif
157 
158 static struct ctl_table root_table[];
159 static struct ctl_table_header root_table_header =
160 	{ root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) };
161 
162 static struct ctl_table kern_table[];
163 static struct ctl_table vm_table[];
164 static struct ctl_table fs_table[];
165 static struct ctl_table debug_table[];
166 static struct ctl_table dev_table[];
167 extern struct ctl_table random_table[];
168 #ifdef CONFIG_INOTIFY_USER
169 extern struct ctl_table inotify_table[];
170 #endif
171 
172 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
173 int sysctl_legacy_va_layout;
174 #endif
175 
176 extern int prove_locking;
177 extern int lock_stat;
178 
179 /* The default sysctl tables: */
180 
181 static struct ctl_table root_table[] = {
182 	{
183 		.ctl_name	= CTL_KERN,
184 		.procname	= "kernel",
185 		.mode		= 0555,
186 		.child		= kern_table,
187 	},
188 	{
189 		.ctl_name	= CTL_VM,
190 		.procname	= "vm",
191 		.mode		= 0555,
192 		.child		= vm_table,
193 	},
194 #ifdef CONFIG_NET
195 	{
196 		.ctl_name	= CTL_NET,
197 		.procname	= "net",
198 		.mode		= 0555,
199 		.child		= net_table,
200 	},
201 #endif
202 	{
203 		.ctl_name	= CTL_FS,
204 		.procname	= "fs",
205 		.mode		= 0555,
206 		.child		= fs_table,
207 	},
208 	{
209 		.ctl_name	= CTL_DEBUG,
210 		.procname	= "debug",
211 		.mode		= 0555,
212 		.child		= debug_table,
213 	},
214 	{
215 		.ctl_name	= CTL_DEV,
216 		.procname	= "dev",
217 		.mode		= 0555,
218 		.child		= dev_table,
219 	},
220 /*
221  * NOTE: do not add new entries to this table unless you have read
222  * Documentation/sysctl/ctl_unnumbered.txt
223  */
224 	{ .ctl_name = 0 }
225 };
226 
227 #ifdef CONFIG_SCHED_DEBUG
228 static unsigned long min_sched_granularity_ns = 100000;		/* 100 usecs */
229 static unsigned long max_sched_granularity_ns = 1000000000;	/* 1 second */
230 static unsigned long min_wakeup_granularity_ns;			/* 0 usecs */
231 static unsigned long max_wakeup_granularity_ns = 1000000000;	/* 1 second */
232 #endif
233 
234 static struct ctl_table kern_table[] = {
235 #ifdef CONFIG_SCHED_DEBUG
236 	{
237 		.ctl_name	= CTL_UNNUMBERED,
238 		.procname	= "sched_nr_latency",
239 		.data		= &sysctl_sched_nr_latency,
240 		.maxlen		= sizeof(unsigned int),
241 		.mode		= 0644,
242 		.proc_handler	= &proc_dointvec,
243 	},
244 	{
245 		.ctl_name	= CTL_UNNUMBERED,
246 		.procname	= "sched_latency_ns",
247 		.data		= &sysctl_sched_latency,
248 		.maxlen		= sizeof(unsigned int),
249 		.mode		= 0644,
250 		.proc_handler	= &proc_dointvec_minmax,
251 		.strategy	= &sysctl_intvec,
252 		.extra1		= &min_sched_granularity_ns,
253 		.extra2		= &max_sched_granularity_ns,
254 	},
255 	{
256 		.ctl_name	= CTL_UNNUMBERED,
257 		.procname	= "sched_wakeup_granularity_ns",
258 		.data		= &sysctl_sched_wakeup_granularity,
259 		.maxlen		= sizeof(unsigned int),
260 		.mode		= 0644,
261 		.proc_handler	= &proc_dointvec_minmax,
262 		.strategy	= &sysctl_intvec,
263 		.extra1		= &min_wakeup_granularity_ns,
264 		.extra2		= &max_wakeup_granularity_ns,
265 	},
266 	{
267 		.ctl_name	= CTL_UNNUMBERED,
268 		.procname	= "sched_batch_wakeup_granularity_ns",
269 		.data		= &sysctl_sched_batch_wakeup_granularity,
270 		.maxlen		= sizeof(unsigned int),
271 		.mode		= 0644,
272 		.proc_handler	= &proc_dointvec_minmax,
273 		.strategy	= &sysctl_intvec,
274 		.extra1		= &min_wakeup_granularity_ns,
275 		.extra2		= &max_wakeup_granularity_ns,
276 	},
277 	{
278 		.ctl_name	= CTL_UNNUMBERED,
279 		.procname	= "sched_child_runs_first",
280 		.data		= &sysctl_sched_child_runs_first,
281 		.maxlen		= sizeof(unsigned int),
282 		.mode		= 0644,
283 		.proc_handler	= &proc_dointvec,
284 	},
285 	{
286 		.ctl_name	= CTL_UNNUMBERED,
287 		.procname	= "sched_features",
288 		.data		= &sysctl_sched_features,
289 		.maxlen		= sizeof(unsigned int),
290 		.mode		= 0644,
291 		.proc_handler	= &proc_dointvec,
292 	},
293 	{
294 		.ctl_name	= CTL_UNNUMBERED,
295 		.procname	= "sched_migration_cost",
296 		.data		= &sysctl_sched_migration_cost,
297 		.maxlen		= sizeof(unsigned int),
298 		.mode		= 0644,
299 		.proc_handler	= &proc_dointvec,
300 	},
301 #endif
302 	{
303 		.ctl_name	= CTL_UNNUMBERED,
304 		.procname	= "sched_compat_yield",
305 		.data		= &sysctl_sched_compat_yield,
306 		.maxlen		= sizeof(unsigned int),
307 		.mode		= 0644,
308 		.proc_handler	= &proc_dointvec,
309 	},
310 #ifdef CONFIG_PROVE_LOCKING
311 	{
312 		.ctl_name	= CTL_UNNUMBERED,
313 		.procname	= "prove_locking",
314 		.data		= &prove_locking,
315 		.maxlen		= sizeof(int),
316 		.mode		= 0644,
317 		.proc_handler	= &proc_dointvec,
318 	},
319 #endif
320 #ifdef CONFIG_LOCK_STAT
321 	{
322 		.ctl_name	= CTL_UNNUMBERED,
323 		.procname	= "lock_stat",
324 		.data		= &lock_stat,
325 		.maxlen		= sizeof(int),
326 		.mode		= 0644,
327 		.proc_handler	= &proc_dointvec,
328 	},
329 #endif
330 	{
331 		.ctl_name	= KERN_PANIC,
332 		.procname	= "panic",
333 		.data		= &panic_timeout,
334 		.maxlen		= sizeof(int),
335 		.mode		= 0644,
336 		.proc_handler	= &proc_dointvec,
337 	},
338 	{
339 		.ctl_name	= KERN_CORE_USES_PID,
340 		.procname	= "core_uses_pid",
341 		.data		= &core_uses_pid,
342 		.maxlen		= sizeof(int),
343 		.mode		= 0644,
344 		.proc_handler	= &proc_dointvec,
345 	},
346 #ifdef CONFIG_AUDITSYSCALL
347 	{
348 		.ctl_name	= CTL_UNNUMBERED,
349 		.procname	= "audit_argv_kb",
350 		.data		= &audit_argv_kb,
351 		.maxlen		= sizeof(int),
352 		.mode		= 0644,
353 		.proc_handler	= &proc_dointvec,
354 	},
355 #endif
356 	{
357 		.ctl_name	= KERN_CORE_PATTERN,
358 		.procname	= "core_pattern",
359 		.data		= core_pattern,
360 		.maxlen		= CORENAME_MAX_SIZE,
361 		.mode		= 0644,
362 		.proc_handler	= &proc_dostring,
363 		.strategy	= &sysctl_string,
364 	},
365 #ifdef CONFIG_PROC_SYSCTL
366 	{
367 		.procname	= "tainted",
368 		.data		= &tainted,
369 		.maxlen		= sizeof(int),
370 		.mode		= 0644,
371 		.proc_handler	= &proc_dointvec_taint,
372 	},
373 #endif
374 #ifdef CONFIG_SECURITY_CAPABILITIES
375 	{
376 		.procname	= "cap-bound",
377 		.data		= &cap_bset,
378 		.maxlen		= sizeof(kernel_cap_t),
379 		.mode		= 0600,
380 		.proc_handler	= &proc_dointvec_bset,
381 	},
382 #endif /* def CONFIG_SECURITY_CAPABILITIES */
383 #ifdef CONFIG_BLK_DEV_INITRD
384 	{
385 		.ctl_name	= KERN_REALROOTDEV,
386 		.procname	= "real-root-dev",
387 		.data		= &real_root_dev,
388 		.maxlen		= sizeof(int),
389 		.mode		= 0644,
390 		.proc_handler	= &proc_dointvec,
391 	},
392 #endif
393 	{
394 		.ctl_name	= CTL_UNNUMBERED,
395 		.procname	= "print-fatal-signals",
396 		.data		= &print_fatal_signals,
397 		.maxlen		= sizeof(int),
398 		.mode		= 0644,
399 		.proc_handler	= &proc_dointvec,
400 	},
401 #ifdef __sparc__
402 	{
403 		.ctl_name	= KERN_SPARC_REBOOT,
404 		.procname	= "reboot-cmd",
405 		.data		= reboot_command,
406 		.maxlen		= 256,
407 		.mode		= 0644,
408 		.proc_handler	= &proc_dostring,
409 		.strategy	= &sysctl_string,
410 	},
411 	{
412 		.ctl_name	= KERN_SPARC_STOP_A,
413 		.procname	= "stop-a",
414 		.data		= &stop_a_enabled,
415 		.maxlen		= sizeof (int),
416 		.mode		= 0644,
417 		.proc_handler	= &proc_dointvec,
418 	},
419 	{
420 		.ctl_name	= KERN_SPARC_SCONS_PWROFF,
421 		.procname	= "scons-poweroff",
422 		.data		= &scons_pwroff,
423 		.maxlen		= sizeof (int),
424 		.mode		= 0644,
425 		.proc_handler	= &proc_dointvec,
426 	},
427 #endif
428 #ifdef __hppa__
429 	{
430 		.ctl_name	= KERN_HPPA_PWRSW,
431 		.procname	= "soft-power",
432 		.data		= &pwrsw_enabled,
433 		.maxlen		= sizeof (int),
434 	 	.mode		= 0644,
435 		.proc_handler	= &proc_dointvec,
436 	},
437 	{
438 		.ctl_name	= KERN_HPPA_UNALIGNED,
439 		.procname	= "unaligned-trap",
440 		.data		= &unaligned_enabled,
441 		.maxlen		= sizeof (int),
442 		.mode		= 0644,
443 		.proc_handler	= &proc_dointvec,
444 	},
445 #endif
446 	{
447 		.ctl_name	= KERN_CTLALTDEL,
448 		.procname	= "ctrl-alt-del",
449 		.data		= &C_A_D,
450 		.maxlen		= sizeof(int),
451 		.mode		= 0644,
452 		.proc_handler	= &proc_dointvec,
453 	},
454 	{
455 		.ctl_name	= KERN_PRINTK,
456 		.procname	= "printk",
457 		.data		= &console_loglevel,
458 		.maxlen		= 4*sizeof(int),
459 		.mode		= 0644,
460 		.proc_handler	= &proc_dointvec,
461 	},
462 #ifdef CONFIG_KMOD
463 	{
464 		.ctl_name	= KERN_MODPROBE,
465 		.procname	= "modprobe",
466 		.data		= &modprobe_path,
467 		.maxlen		= KMOD_PATH_LEN,
468 		.mode		= 0644,
469 		.proc_handler	= &proc_dostring,
470 		.strategy	= &sysctl_string,
471 	},
472 #endif
473 #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
474 	{
475 		.ctl_name	= KERN_HOTPLUG,
476 		.procname	= "hotplug",
477 		.data		= &uevent_helper,
478 		.maxlen		= UEVENT_HELPER_PATH_LEN,
479 		.mode		= 0644,
480 		.proc_handler	= &proc_dostring,
481 		.strategy	= &sysctl_string,
482 	},
483 #endif
484 #ifdef CONFIG_CHR_DEV_SG
485 	{
486 		.ctl_name	= KERN_SG_BIG_BUFF,
487 		.procname	= "sg-big-buff",
488 		.data		= &sg_big_buff,
489 		.maxlen		= sizeof (int),
490 		.mode		= 0444,
491 		.proc_handler	= &proc_dointvec,
492 	},
493 #endif
494 #ifdef CONFIG_BSD_PROCESS_ACCT
495 	{
496 		.ctl_name	= KERN_ACCT,
497 		.procname	= "acct",
498 		.data		= &acct_parm,
499 		.maxlen		= 3*sizeof(int),
500 		.mode		= 0644,
501 		.proc_handler	= &proc_dointvec,
502 	},
503 #endif
504 #ifdef CONFIG_MAGIC_SYSRQ
505 	{
506 		.ctl_name	= KERN_SYSRQ,
507 		.procname	= "sysrq",
508 		.data		= &__sysrq_enabled,
509 		.maxlen		= sizeof (int),
510 		.mode		= 0644,
511 		.proc_handler	= &proc_dointvec,
512 	},
513 #endif
514 #ifdef CONFIG_PROC_SYSCTL
515 	{
516 		.procname	= "cad_pid",
517 		.data		= NULL,
518 		.maxlen		= sizeof (int),
519 		.mode		= 0600,
520 		.proc_handler	= &proc_do_cad_pid,
521 	},
522 #endif
523 	{
524 		.ctl_name	= KERN_MAX_THREADS,
525 		.procname	= "threads-max",
526 		.data		= &max_threads,
527 		.maxlen		= sizeof(int),
528 		.mode		= 0644,
529 		.proc_handler	= &proc_dointvec,
530 	},
531 	{
532 		.ctl_name	= KERN_RANDOM,
533 		.procname	= "random",
534 		.mode		= 0555,
535 		.child		= random_table,
536 	},
537 	{
538 		.ctl_name	= KERN_OVERFLOWUID,
539 		.procname	= "overflowuid",
540 		.data		= &overflowuid,
541 		.maxlen		= sizeof(int),
542 		.mode		= 0644,
543 		.proc_handler	= &proc_dointvec_minmax,
544 		.strategy	= &sysctl_intvec,
545 		.extra1		= &minolduid,
546 		.extra2		= &maxolduid,
547 	},
548 	{
549 		.ctl_name	= KERN_OVERFLOWGID,
550 		.procname	= "overflowgid",
551 		.data		= &overflowgid,
552 		.maxlen		= sizeof(int),
553 		.mode		= 0644,
554 		.proc_handler	= &proc_dointvec_minmax,
555 		.strategy	= &sysctl_intvec,
556 		.extra1		= &minolduid,
557 		.extra2		= &maxolduid,
558 	},
559 #ifdef CONFIG_S390
560 #ifdef CONFIG_MATHEMU
561 	{
562 		.ctl_name	= KERN_IEEE_EMULATION_WARNINGS,
563 		.procname	= "ieee_emulation_warnings",
564 		.data		= &sysctl_ieee_emulation_warnings,
565 		.maxlen		= sizeof(int),
566 		.mode		= 0644,
567 		.proc_handler	= &proc_dointvec,
568 	},
569 #endif
570 #ifdef CONFIG_NO_IDLE_HZ
571 	{
572 		.ctl_name       = KERN_HZ_TIMER,
573 		.procname       = "hz_timer",
574 		.data           = &sysctl_hz_timer,
575 		.maxlen         = sizeof(int),
576 		.mode           = 0644,
577 		.proc_handler   = &proc_dointvec,
578 	},
579 #endif
580 	{
581 		.ctl_name	= KERN_S390_USER_DEBUG_LOGGING,
582 		.procname	= "userprocess_debug",
583 		.data		= &sysctl_userprocess_debug,
584 		.maxlen		= sizeof(int),
585 		.mode		= 0644,
586 		.proc_handler	= &proc_dointvec,
587 	},
588 #endif
589 	{
590 		.ctl_name	= KERN_PIDMAX,
591 		.procname	= "pid_max",
592 		.data		= &pid_max,
593 		.maxlen		= sizeof (int),
594 		.mode		= 0644,
595 		.proc_handler	= &proc_dointvec_minmax,
596 		.strategy	= sysctl_intvec,
597 		.extra1		= &pid_max_min,
598 		.extra2		= &pid_max_max,
599 	},
600 	{
601 		.ctl_name	= KERN_PANIC_ON_OOPS,
602 		.procname	= "panic_on_oops",
603 		.data		= &panic_on_oops,
604 		.maxlen		= sizeof(int),
605 		.mode		= 0644,
606 		.proc_handler	= &proc_dointvec,
607 	},
608 	{
609 		.ctl_name	= KERN_PRINTK_RATELIMIT,
610 		.procname	= "printk_ratelimit",
611 		.data		= &printk_ratelimit_jiffies,
612 		.maxlen		= sizeof(int),
613 		.mode		= 0644,
614 		.proc_handler	= &proc_dointvec_jiffies,
615 		.strategy	= &sysctl_jiffies,
616 	},
617 	{
618 		.ctl_name	= KERN_PRINTK_RATELIMIT_BURST,
619 		.procname	= "printk_ratelimit_burst",
620 		.data		= &printk_ratelimit_burst,
621 		.maxlen		= sizeof(int),
622 		.mode		= 0644,
623 		.proc_handler	= &proc_dointvec,
624 	},
625 	{
626 		.ctl_name	= KERN_NGROUPS_MAX,
627 		.procname	= "ngroups_max",
628 		.data		= &ngroups_max,
629 		.maxlen		= sizeof (int),
630 		.mode		= 0444,
631 		.proc_handler	= &proc_dointvec,
632 	},
633 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
634 	{
635 		.ctl_name       = KERN_UNKNOWN_NMI_PANIC,
636 		.procname       = "unknown_nmi_panic",
637 		.data           = &unknown_nmi_panic,
638 		.maxlen         = sizeof (int),
639 		.mode           = 0644,
640 		.proc_handler   = &proc_dointvec,
641 	},
642 	{
643 		.procname       = "nmi_watchdog",
644 		.data           = &nmi_watchdog_enabled,
645 		.maxlen         = sizeof (int),
646 		.mode           = 0644,
647 		.proc_handler   = &proc_nmi_enabled,
648 	},
649 #endif
650 #if defined(CONFIG_X86)
651 	{
652 		.ctl_name	= KERN_PANIC_ON_NMI,
653 		.procname	= "panic_on_unrecovered_nmi",
654 		.data		= &panic_on_unrecovered_nmi,
655 		.maxlen		= sizeof(int),
656 		.mode		= 0644,
657 		.proc_handler	= &proc_dointvec,
658 	},
659 	{
660 		.ctl_name	= KERN_BOOTLOADER_TYPE,
661 		.procname	= "bootloader_type",
662 		.data		= &bootloader_type,
663 		.maxlen		= sizeof (int),
664 		.mode		= 0444,
665 		.proc_handler	= &proc_dointvec,
666 	},
667 	{
668 		.ctl_name	= CTL_UNNUMBERED,
669 		.procname	= "kstack_depth_to_print",
670 		.data		= &kstack_depth_to_print,
671 		.maxlen		= sizeof(int),
672 		.mode		= 0644,
673 		.proc_handler	= &proc_dointvec,
674 	},
675 #endif
676 #if defined(CONFIG_MMU)
677 	{
678 		.ctl_name	= KERN_RANDOMIZE,
679 		.procname	= "randomize_va_space",
680 		.data		= &randomize_va_space,
681 		.maxlen		= sizeof(int),
682 		.mode		= 0644,
683 		.proc_handler	= &proc_dointvec,
684 	},
685 #endif
686 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
687 	{
688 		.ctl_name	= KERN_SPIN_RETRY,
689 		.procname	= "spin_retry",
690 		.data		= &spin_retry,
691 		.maxlen		= sizeof (int),
692 		.mode		= 0644,
693 		.proc_handler	= &proc_dointvec,
694 	},
695 #endif
696 #if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
697 	{
698 		.procname	= "acpi_video_flags",
699 		.data		= &acpi_realmode_flags,
700 		.maxlen		= sizeof (unsigned long),
701 		.mode		= 0644,
702 		.proc_handler	= &proc_doulongvec_minmax,
703 	},
704 #endif
705 #ifdef CONFIG_IA64
706 	{
707 		.ctl_name	= KERN_IA64_UNALIGNED,
708 		.procname	= "ignore-unaligned-usertrap",
709 		.data		= &no_unaligned_warning,
710 		.maxlen		= sizeof (int),
711 	 	.mode		= 0644,
712 		.proc_handler	= &proc_dointvec,
713 	},
714 #endif
715 #ifdef CONFIG_DETECT_SOFTLOCKUP
716 	{
717 		.ctl_name	= CTL_UNNUMBERED,
718 		.procname	= "softlockup_thresh",
719 		.data		= &softlockup_thresh,
720 		.maxlen		= sizeof(int),
721 		.mode		= 0644,
722 		.proc_handler	= &proc_dointvec_minmax,
723 		.strategy	= &sysctl_intvec,
724 		.extra1		= &one,
725 		.extra2		= &sixty,
726 	},
727 #endif
728 #ifdef CONFIG_COMPAT
729 	{
730 		.ctl_name	= KERN_COMPAT_LOG,
731 		.procname	= "compat-log",
732 		.data		= &compat_log,
733 		.maxlen		= sizeof (int),
734 	 	.mode		= 0644,
735 		.proc_handler	= &proc_dointvec,
736 	},
737 #endif
738 #ifdef CONFIG_RT_MUTEXES
739 	{
740 		.ctl_name	= KERN_MAX_LOCK_DEPTH,
741 		.procname	= "max_lock_depth",
742 		.data		= &max_lock_depth,
743 		.maxlen		= sizeof(int),
744 		.mode		= 0644,
745 		.proc_handler	= &proc_dointvec,
746 	},
747 #endif
748 #ifdef CONFIG_PROC_FS
749 	{
750 		.ctl_name       = CTL_UNNUMBERED,
751 		.procname       = "maps_protect",
752 		.data           = &maps_protect,
753 		.maxlen         = sizeof(int),
754 		.mode           = 0644,
755 		.proc_handler   = &proc_dointvec,
756 	},
757 #endif
758 	{
759 		.ctl_name	= CTL_UNNUMBERED,
760 		.procname	= "poweroff_cmd",
761 		.data		= &poweroff_cmd,
762 		.maxlen		= POWEROFF_CMD_PATH_LEN,
763 		.mode		= 0644,
764 		.proc_handler	= &proc_dostring,
765 		.strategy	= &sysctl_string,
766 	},
767 /*
768  * NOTE: do not add new entries to this table unless you have read
769  * Documentation/sysctl/ctl_unnumbered.txt
770  */
771 	{ .ctl_name = 0 }
772 };
773 
774 static struct ctl_table vm_table[] = {
775 	{
776 		.ctl_name	= VM_OVERCOMMIT_MEMORY,
777 		.procname	= "overcommit_memory",
778 		.data		= &sysctl_overcommit_memory,
779 		.maxlen		= sizeof(sysctl_overcommit_memory),
780 		.mode		= 0644,
781 		.proc_handler	= &proc_dointvec,
782 	},
783 	{
784 		.ctl_name	= VM_PANIC_ON_OOM,
785 		.procname	= "panic_on_oom",
786 		.data		= &sysctl_panic_on_oom,
787 		.maxlen		= sizeof(sysctl_panic_on_oom),
788 		.mode		= 0644,
789 		.proc_handler	= &proc_dointvec,
790 	},
791 	{
792 		.ctl_name	= CTL_UNNUMBERED,
793 		.procname	= "oom_kill_allocating_task",
794 		.data		= &sysctl_oom_kill_allocating_task,
795 		.maxlen		= sizeof(sysctl_oom_kill_allocating_task),
796 		.mode		= 0644,
797 		.proc_handler	= &proc_dointvec,
798 	},
799 	{
800 		.ctl_name	= VM_OVERCOMMIT_RATIO,
801 		.procname	= "overcommit_ratio",
802 		.data		= &sysctl_overcommit_ratio,
803 		.maxlen		= sizeof(sysctl_overcommit_ratio),
804 		.mode		= 0644,
805 		.proc_handler	= &proc_dointvec,
806 	},
807 	{
808 		.ctl_name	= VM_PAGE_CLUSTER,
809 		.procname	= "page-cluster",
810 		.data		= &page_cluster,
811 		.maxlen		= sizeof(int),
812 		.mode		= 0644,
813 		.proc_handler	= &proc_dointvec,
814 	},
815 	{
816 		.ctl_name	= VM_DIRTY_BACKGROUND,
817 		.procname	= "dirty_background_ratio",
818 		.data		= &dirty_background_ratio,
819 		.maxlen		= sizeof(dirty_background_ratio),
820 		.mode		= 0644,
821 		.proc_handler	= &proc_dointvec_minmax,
822 		.strategy	= &sysctl_intvec,
823 		.extra1		= &zero,
824 		.extra2		= &one_hundred,
825 	},
826 	{
827 		.ctl_name	= VM_DIRTY_RATIO,
828 		.procname	= "dirty_ratio",
829 		.data		= &vm_dirty_ratio,
830 		.maxlen		= sizeof(vm_dirty_ratio),
831 		.mode		= 0644,
832 		.proc_handler	= &dirty_ratio_handler,
833 		.strategy	= &sysctl_intvec,
834 		.extra1		= &zero,
835 		.extra2		= &one_hundred,
836 	},
837 	{
838 		.procname	= "dirty_writeback_centisecs",
839 		.data		= &dirty_writeback_interval,
840 		.maxlen		= sizeof(dirty_writeback_interval),
841 		.mode		= 0644,
842 		.proc_handler	= &dirty_writeback_centisecs_handler,
843 	},
844 	{
845 		.procname	= "dirty_expire_centisecs",
846 		.data		= &dirty_expire_interval,
847 		.maxlen		= sizeof(dirty_expire_interval),
848 		.mode		= 0644,
849 		.proc_handler	= &proc_dointvec_userhz_jiffies,
850 	},
851 	{
852 		.ctl_name	= VM_NR_PDFLUSH_THREADS,
853 		.procname	= "nr_pdflush_threads",
854 		.data		= &nr_pdflush_threads,
855 		.maxlen		= sizeof nr_pdflush_threads,
856 		.mode		= 0444 /* read-only*/,
857 		.proc_handler	= &proc_dointvec,
858 	},
859 	{
860 		.ctl_name	= VM_SWAPPINESS,
861 		.procname	= "swappiness",
862 		.data		= &vm_swappiness,
863 		.maxlen		= sizeof(vm_swappiness),
864 		.mode		= 0644,
865 		.proc_handler	= &proc_dointvec_minmax,
866 		.strategy	= &sysctl_intvec,
867 		.extra1		= &zero,
868 		.extra2		= &one_hundred,
869 	},
870 #ifdef CONFIG_HUGETLB_PAGE
871 	 {
872 		.procname	= "nr_hugepages",
873 		.data		= &max_huge_pages,
874 		.maxlen		= sizeof(unsigned long),
875 		.mode		= 0644,
876 		.proc_handler	= &hugetlb_sysctl_handler,
877 		.extra1		= (void *)&hugetlb_zero,
878 		.extra2		= (void *)&hugetlb_infinity,
879 	 },
880 	 {
881 		.ctl_name	= VM_HUGETLB_GROUP,
882 		.procname	= "hugetlb_shm_group",
883 		.data		= &sysctl_hugetlb_shm_group,
884 		.maxlen		= sizeof(gid_t),
885 		.mode		= 0644,
886 		.proc_handler	= &proc_dointvec,
887 	 },
888 	 {
889 		.ctl_name	= CTL_UNNUMBERED,
890 		.procname	= "hugepages_treat_as_movable",
891 		.data		= &hugepages_treat_as_movable,
892 		.maxlen		= sizeof(int),
893 		.mode		= 0644,
894 		.proc_handler	= &hugetlb_treat_movable_handler,
895 	},
896 	{
897 		.ctl_name	= CTL_UNNUMBERED,
898 		.procname	= "hugetlb_dynamic_pool",
899 		.data		= &hugetlb_dynamic_pool,
900 		.maxlen		= sizeof(hugetlb_dynamic_pool),
901 		.mode		= 0644,
902 		.proc_handler	= &proc_dointvec,
903 	},
904 #endif
905 	{
906 		.ctl_name	= VM_LOWMEM_RESERVE_RATIO,
907 		.procname	= "lowmem_reserve_ratio",
908 		.data		= &sysctl_lowmem_reserve_ratio,
909 		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
910 		.mode		= 0644,
911 		.proc_handler	= &lowmem_reserve_ratio_sysctl_handler,
912 		.strategy	= &sysctl_intvec,
913 	},
914 	{
915 		.ctl_name	= VM_DROP_PAGECACHE,
916 		.procname	= "drop_caches",
917 		.data		= &sysctl_drop_caches,
918 		.maxlen		= sizeof(int),
919 		.mode		= 0644,
920 		.proc_handler	= drop_caches_sysctl_handler,
921 		.strategy	= &sysctl_intvec,
922 	},
923 	{
924 		.ctl_name	= VM_MIN_FREE_KBYTES,
925 		.procname	= "min_free_kbytes",
926 		.data		= &min_free_kbytes,
927 		.maxlen		= sizeof(min_free_kbytes),
928 		.mode		= 0644,
929 		.proc_handler	= &min_free_kbytes_sysctl_handler,
930 		.strategy	= &sysctl_intvec,
931 		.extra1		= &zero,
932 	},
933 	{
934 		.ctl_name	= VM_PERCPU_PAGELIST_FRACTION,
935 		.procname	= "percpu_pagelist_fraction",
936 		.data		= &percpu_pagelist_fraction,
937 		.maxlen		= sizeof(percpu_pagelist_fraction),
938 		.mode		= 0644,
939 		.proc_handler	= &percpu_pagelist_fraction_sysctl_handler,
940 		.strategy	= &sysctl_intvec,
941 		.extra1		= &min_percpu_pagelist_fract,
942 	},
943 #ifdef CONFIG_MMU
944 	{
945 		.ctl_name	= VM_MAX_MAP_COUNT,
946 		.procname	= "max_map_count",
947 		.data		= &sysctl_max_map_count,
948 		.maxlen		= sizeof(sysctl_max_map_count),
949 		.mode		= 0644,
950 		.proc_handler	= &proc_dointvec
951 	},
952 #endif
953 	{
954 		.ctl_name	= VM_LAPTOP_MODE,
955 		.procname	= "laptop_mode",
956 		.data		= &laptop_mode,
957 		.maxlen		= sizeof(laptop_mode),
958 		.mode		= 0644,
959 		.proc_handler	= &proc_dointvec_jiffies,
960 		.strategy	= &sysctl_jiffies,
961 	},
962 	{
963 		.ctl_name	= VM_BLOCK_DUMP,
964 		.procname	= "block_dump",
965 		.data		= &block_dump,
966 		.maxlen		= sizeof(block_dump),
967 		.mode		= 0644,
968 		.proc_handler	= &proc_dointvec,
969 		.strategy	= &sysctl_intvec,
970 		.extra1		= &zero,
971 	},
972 	{
973 		.ctl_name	= VM_VFS_CACHE_PRESSURE,
974 		.procname	= "vfs_cache_pressure",
975 		.data		= &sysctl_vfs_cache_pressure,
976 		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
977 		.mode		= 0644,
978 		.proc_handler	= &proc_dointvec,
979 		.strategy	= &sysctl_intvec,
980 		.extra1		= &zero,
981 	},
982 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
983 	{
984 		.ctl_name	= VM_LEGACY_VA_LAYOUT,
985 		.procname	= "legacy_va_layout",
986 		.data		= &sysctl_legacy_va_layout,
987 		.maxlen		= sizeof(sysctl_legacy_va_layout),
988 		.mode		= 0644,
989 		.proc_handler	= &proc_dointvec,
990 		.strategy	= &sysctl_intvec,
991 		.extra1		= &zero,
992 	},
993 #endif
994 #ifdef CONFIG_NUMA
995 	{
996 		.ctl_name	= VM_ZONE_RECLAIM_MODE,
997 		.procname	= "zone_reclaim_mode",
998 		.data		= &zone_reclaim_mode,
999 		.maxlen		= sizeof(zone_reclaim_mode),
1000 		.mode		= 0644,
1001 		.proc_handler	= &proc_dointvec,
1002 		.strategy	= &sysctl_intvec,
1003 		.extra1		= &zero,
1004 	},
1005 	{
1006 		.ctl_name	= VM_MIN_UNMAPPED,
1007 		.procname	= "min_unmapped_ratio",
1008 		.data		= &sysctl_min_unmapped_ratio,
1009 		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
1010 		.mode		= 0644,
1011 		.proc_handler	= &sysctl_min_unmapped_ratio_sysctl_handler,
1012 		.strategy	= &sysctl_intvec,
1013 		.extra1		= &zero,
1014 		.extra2		= &one_hundred,
1015 	},
1016 	{
1017 		.ctl_name	= VM_MIN_SLAB,
1018 		.procname	= "min_slab_ratio",
1019 		.data		= &sysctl_min_slab_ratio,
1020 		.maxlen		= sizeof(sysctl_min_slab_ratio),
1021 		.mode		= 0644,
1022 		.proc_handler	= &sysctl_min_slab_ratio_sysctl_handler,
1023 		.strategy	= &sysctl_intvec,
1024 		.extra1		= &zero,
1025 		.extra2		= &one_hundred,
1026 	},
1027 #endif
1028 #ifdef CONFIG_SMP
1029 	{
1030 		.ctl_name	= CTL_UNNUMBERED,
1031 		.procname	= "stat_interval",
1032 		.data		= &sysctl_stat_interval,
1033 		.maxlen		= sizeof(sysctl_stat_interval),
1034 		.mode		= 0644,
1035 		.proc_handler	= &proc_dointvec_jiffies,
1036 		.strategy	= &sysctl_jiffies,
1037 	},
1038 #endif
1039 #ifdef CONFIG_SECURITY
1040 	{
1041 		.ctl_name	= CTL_UNNUMBERED,
1042 		.procname	= "mmap_min_addr",
1043 		.data		= &mmap_min_addr,
1044 		.maxlen         = sizeof(unsigned long),
1045 		.mode		= 0644,
1046 		.proc_handler	= &proc_doulongvec_minmax,
1047 	},
1048 #endif
1049 #ifdef CONFIG_NUMA
1050 	{
1051 		.ctl_name	= CTL_UNNUMBERED,
1052 		.procname	= "numa_zonelist_order",
1053 		.data		= &numa_zonelist_order,
1054 		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
1055 		.mode		= 0644,
1056 		.proc_handler	= &numa_zonelist_order_handler,
1057 		.strategy	= &sysctl_string,
1058 	},
1059 #endif
1060 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1061    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1062 	{
1063 		.ctl_name	= VM_VDSO_ENABLED,
1064 		.procname	= "vdso_enabled",
1065 		.data		= &vdso_enabled,
1066 		.maxlen		= sizeof(vdso_enabled),
1067 		.mode		= 0644,
1068 		.proc_handler	= &proc_dointvec,
1069 		.strategy	= &sysctl_intvec,
1070 		.extra1		= &zero,
1071 	},
1072 #endif
1073 /*
1074  * NOTE: do not add new entries to this table unless you have read
1075  * Documentation/sysctl/ctl_unnumbered.txt
1076  */
1077 	{ .ctl_name = 0 }
1078 };
1079 
1080 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1081 static struct ctl_table binfmt_misc_table[] = {
1082 	{ .ctl_name = 0 }
1083 };
1084 #endif
1085 
1086 static struct ctl_table fs_table[] = {
1087 	{
1088 		.ctl_name	= FS_NRINODE,
1089 		.procname	= "inode-nr",
1090 		.data		= &inodes_stat,
1091 		.maxlen		= 2*sizeof(int),
1092 		.mode		= 0444,
1093 		.proc_handler	= &proc_dointvec,
1094 	},
1095 	{
1096 		.ctl_name	= FS_STATINODE,
1097 		.procname	= "inode-state",
1098 		.data		= &inodes_stat,
1099 		.maxlen		= 7*sizeof(int),
1100 		.mode		= 0444,
1101 		.proc_handler	= &proc_dointvec,
1102 	},
1103 	{
1104 		.procname	= "file-nr",
1105 		.data		= &files_stat,
1106 		.maxlen		= 3*sizeof(int),
1107 		.mode		= 0444,
1108 		.proc_handler	= &proc_nr_files,
1109 	},
1110 	{
1111 		.ctl_name	= FS_MAXFILE,
1112 		.procname	= "file-max",
1113 		.data		= &files_stat.max_files,
1114 		.maxlen		= sizeof(int),
1115 		.mode		= 0644,
1116 		.proc_handler	= &proc_dointvec,
1117 	},
1118 	{
1119 		.ctl_name	= FS_DENTRY,
1120 		.procname	= "dentry-state",
1121 		.data		= &dentry_stat,
1122 		.maxlen		= 6*sizeof(int),
1123 		.mode		= 0444,
1124 		.proc_handler	= &proc_dointvec,
1125 	},
1126 	{
1127 		.ctl_name	= FS_OVERFLOWUID,
1128 		.procname	= "overflowuid",
1129 		.data		= &fs_overflowuid,
1130 		.maxlen		= sizeof(int),
1131 		.mode		= 0644,
1132 		.proc_handler	= &proc_dointvec_minmax,
1133 		.strategy	= &sysctl_intvec,
1134 		.extra1		= &minolduid,
1135 		.extra2		= &maxolduid,
1136 	},
1137 	{
1138 		.ctl_name	= FS_OVERFLOWGID,
1139 		.procname	= "overflowgid",
1140 		.data		= &fs_overflowgid,
1141 		.maxlen		= sizeof(int),
1142 		.mode		= 0644,
1143 		.proc_handler	= &proc_dointvec_minmax,
1144 		.strategy	= &sysctl_intvec,
1145 		.extra1		= &minolduid,
1146 		.extra2		= &maxolduid,
1147 	},
1148 	{
1149 		.ctl_name	= FS_LEASES,
1150 		.procname	= "leases-enable",
1151 		.data		= &leases_enable,
1152 		.maxlen		= sizeof(int),
1153 		.mode		= 0644,
1154 		.proc_handler	= &proc_dointvec,
1155 	},
1156 #ifdef CONFIG_DNOTIFY
1157 	{
1158 		.ctl_name	= FS_DIR_NOTIFY,
1159 		.procname	= "dir-notify-enable",
1160 		.data		= &dir_notify_enable,
1161 		.maxlen		= sizeof(int),
1162 		.mode		= 0644,
1163 		.proc_handler	= &proc_dointvec,
1164 	},
1165 #endif
1166 #ifdef CONFIG_MMU
1167 	{
1168 		.ctl_name	= FS_LEASE_TIME,
1169 		.procname	= "lease-break-time",
1170 		.data		= &lease_break_time,
1171 		.maxlen		= sizeof(int),
1172 		.mode		= 0644,
1173 		.proc_handler	= &proc_dointvec_minmax,
1174 		.strategy	= &sysctl_intvec,
1175 		.extra1		= &zero,
1176 		.extra2		= &two,
1177 	},
1178 	{
1179 		.procname	= "aio-nr",
1180 		.data		= &aio_nr,
1181 		.maxlen		= sizeof(aio_nr),
1182 		.mode		= 0444,
1183 		.proc_handler	= &proc_doulongvec_minmax,
1184 	},
1185 	{
1186 		.procname	= "aio-max-nr",
1187 		.data		= &aio_max_nr,
1188 		.maxlen		= sizeof(aio_max_nr),
1189 		.mode		= 0644,
1190 		.proc_handler	= &proc_doulongvec_minmax,
1191 	},
1192 #ifdef CONFIG_INOTIFY_USER
1193 	{
1194 		.ctl_name	= FS_INOTIFY,
1195 		.procname	= "inotify",
1196 		.mode		= 0555,
1197 		.child		= inotify_table,
1198 	},
1199 #endif
1200 #endif
1201 	{
1202 		.ctl_name	= KERN_SETUID_DUMPABLE,
1203 		.procname	= "suid_dumpable",
1204 		.data		= &suid_dumpable,
1205 		.maxlen		= sizeof(int),
1206 		.mode		= 0644,
1207 		.proc_handler	= &proc_dointvec,
1208 	},
1209 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1210 	{
1211 		.ctl_name	= CTL_UNNUMBERED,
1212 		.procname	= "binfmt_misc",
1213 		.mode		= 0555,
1214 		.child		= binfmt_misc_table,
1215 	},
1216 #endif
1217 /*
1218  * NOTE: do not add new entries to this table unless you have read
1219  * Documentation/sysctl/ctl_unnumbered.txt
1220  */
1221 	{ .ctl_name = 0 }
1222 };
1223 
1224 static struct ctl_table debug_table[] = {
1225 #if defined(CONFIG_X86) || defined(CONFIG_PPC)
1226 	{
1227 		.ctl_name	= CTL_UNNUMBERED,
1228 		.procname	= "exception-trace",
1229 		.data		= &show_unhandled_signals,
1230 		.maxlen		= sizeof(int),
1231 		.mode		= 0644,
1232 		.proc_handler	= proc_dointvec
1233 	},
1234 #endif
1235 	{ .ctl_name = 0 }
1236 };
1237 
1238 static struct ctl_table dev_table[] = {
1239 	{ .ctl_name = 0 }
1240 };
1241 
1242 static DEFINE_SPINLOCK(sysctl_lock);
1243 
1244 /* called under sysctl_lock */
1245 static int use_table(struct ctl_table_header *p)
1246 {
1247 	if (unlikely(p->unregistering))
1248 		return 0;
1249 	p->used++;
1250 	return 1;
1251 }
1252 
1253 /* called under sysctl_lock */
1254 static void unuse_table(struct ctl_table_header *p)
1255 {
1256 	if (!--p->used)
1257 		if (unlikely(p->unregistering))
1258 			complete(p->unregistering);
1259 }
1260 
1261 /* called under sysctl_lock, will reacquire if has to wait */
1262 static void start_unregistering(struct ctl_table_header *p)
1263 {
1264 	/*
1265 	 * if p->used is 0, nobody will ever touch that entry again;
1266 	 * we'll eliminate all paths to it before dropping sysctl_lock
1267 	 */
1268 	if (unlikely(p->used)) {
1269 		struct completion wait;
1270 		init_completion(&wait);
1271 		p->unregistering = &wait;
1272 		spin_unlock(&sysctl_lock);
1273 		wait_for_completion(&wait);
1274 		spin_lock(&sysctl_lock);
1275 	}
1276 	/*
1277 	 * do not remove from the list until nobody holds it; walking the
1278 	 * list in do_sysctl() relies on that.
1279 	 */
1280 	list_del_init(&p->ctl_entry);
1281 }
1282 
1283 void sysctl_head_finish(struct ctl_table_header *head)
1284 {
1285 	if (!head)
1286 		return;
1287 	spin_lock(&sysctl_lock);
1288 	unuse_table(head);
1289 	spin_unlock(&sysctl_lock);
1290 }
1291 
1292 struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1293 {
1294 	struct ctl_table_header *head;
1295 	struct list_head *tmp;
1296 	spin_lock(&sysctl_lock);
1297 	if (prev) {
1298 		tmp = &prev->ctl_entry;
1299 		unuse_table(prev);
1300 		goto next;
1301 	}
1302 	tmp = &root_table_header.ctl_entry;
1303 	for (;;) {
1304 		head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1305 
1306 		if (!use_table(head))
1307 			goto next;
1308 		spin_unlock(&sysctl_lock);
1309 		return head;
1310 	next:
1311 		tmp = tmp->next;
1312 		if (tmp == &root_table_header.ctl_entry)
1313 			break;
1314 	}
1315 	spin_unlock(&sysctl_lock);
1316 	return NULL;
1317 }
1318 
1319 #ifdef CONFIG_SYSCTL_SYSCALL
1320 int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
1321 	       void __user *newval, size_t newlen)
1322 {
1323 	struct ctl_table_header *head;
1324 	int error = -ENOTDIR;
1325 
1326 	if (nlen <= 0 || nlen >= CTL_MAXNAME)
1327 		return -ENOTDIR;
1328 	if (oldval) {
1329 		int old_len;
1330 		if (!oldlenp || get_user(old_len, oldlenp))
1331 			return -EFAULT;
1332 	}
1333 
1334 	for (head = sysctl_head_next(NULL); head;
1335 			head = sysctl_head_next(head)) {
1336 		error = parse_table(name, nlen, oldval, oldlenp,
1337 					newval, newlen, head->ctl_table);
1338 		if (error != -ENOTDIR) {
1339 			sysctl_head_finish(head);
1340 			break;
1341 		}
1342 	}
1343 	return error;
1344 }
1345 
1346 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
1347 {
1348 	struct __sysctl_args tmp;
1349 	int error;
1350 
1351 	if (copy_from_user(&tmp, args, sizeof(tmp)))
1352 		return -EFAULT;
1353 
1354 	error = deprecated_sysctl_warning(&tmp);
1355 	if (error)
1356 		goto out;
1357 
1358 	lock_kernel();
1359 	error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
1360 			  tmp.newval, tmp.newlen);
1361 	unlock_kernel();
1362 out:
1363 	return error;
1364 }
1365 #endif /* CONFIG_SYSCTL_SYSCALL */
1366 
1367 /*
1368  * sysctl_perm does NOT grant the superuser all rights automatically, because
1369  * some sysctl variables are readonly even to root.
1370  */
1371 
1372 static int test_perm(int mode, int op)
1373 {
1374 	if (!current->euid)
1375 		mode >>= 6;
1376 	else if (in_egroup_p(0))
1377 		mode >>= 3;
1378 	if ((mode & op & 0007) == op)
1379 		return 0;
1380 	return -EACCES;
1381 }
1382 
1383 int sysctl_perm(struct ctl_table *table, int op)
1384 {
1385 	int error;
1386 	error = security_sysctl(table, op);
1387 	if (error)
1388 		return error;
1389 	return test_perm(table->mode, op);
1390 }
1391 
1392 #ifdef CONFIG_SYSCTL_SYSCALL
1393 static int parse_table(int __user *name, int nlen,
1394 		       void __user *oldval, size_t __user *oldlenp,
1395 		       void __user *newval, size_t newlen,
1396 		       struct ctl_table *table)
1397 {
1398 	int n;
1399 repeat:
1400 	if (!nlen)
1401 		return -ENOTDIR;
1402 	if (get_user(n, name))
1403 		return -EFAULT;
1404 	for ( ; table->ctl_name || table->procname; table++) {
1405 		if (!table->ctl_name)
1406 			continue;
1407 		if (n == table->ctl_name) {
1408 			int error;
1409 			if (table->child) {
1410 				if (sysctl_perm(table, 001))
1411 					return -EPERM;
1412 				name++;
1413 				nlen--;
1414 				table = table->child;
1415 				goto repeat;
1416 			}
1417 			error = do_sysctl_strategy(table, name, nlen,
1418 						   oldval, oldlenp,
1419 						   newval, newlen);
1420 			return error;
1421 		}
1422 	}
1423 	return -ENOTDIR;
1424 }
1425 
1426 /* Perform the actual read/write of a sysctl table entry. */
1427 int do_sysctl_strategy (struct ctl_table *table,
1428 			int __user *name, int nlen,
1429 			void __user *oldval, size_t __user *oldlenp,
1430 			void __user *newval, size_t newlen)
1431 {
1432 	int op = 0, rc;
1433 
1434 	if (oldval)
1435 		op |= 004;
1436 	if (newval)
1437 		op |= 002;
1438 	if (sysctl_perm(table, op))
1439 		return -EPERM;
1440 
1441 	if (table->strategy) {
1442 		rc = table->strategy(table, name, nlen, oldval, oldlenp,
1443 				     newval, newlen);
1444 		if (rc < 0)
1445 			return rc;
1446 		if (rc > 0)
1447 			return 0;
1448 	}
1449 
1450 	/* If there is no strategy routine, or if the strategy returns
1451 	 * zero, proceed with automatic r/w */
1452 	if (table->data && table->maxlen) {
1453 		rc = sysctl_data(table, name, nlen, oldval, oldlenp,
1454 				 newval, newlen);
1455 		if (rc < 0)
1456 			return rc;
1457 	}
1458 	return 0;
1459 }
1460 #endif /* CONFIG_SYSCTL_SYSCALL */
1461 
1462 static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1463 {
1464 	for (; table->ctl_name || table->procname; table++) {
1465 		table->parent = parent;
1466 		if (table->child)
1467 			sysctl_set_parent(table, table->child);
1468 	}
1469 }
1470 
1471 static __init int sysctl_init(void)
1472 {
1473 	int err;
1474 	sysctl_set_parent(NULL, root_table);
1475 	err = sysctl_check_table(root_table);
1476 	return 0;
1477 }
1478 
1479 core_initcall(sysctl_init);
1480 
1481 /**
1482  * register_sysctl_table - register a sysctl hierarchy
1483  * @table: the top-level table structure
1484  *
1485  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1486  * array. An entry with a ctl_name of 0 terminates the table.
1487  *
1488  * The members of the &struct ctl_table structure are used as follows:
1489  *
1490  * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
1491  *            must be unique within that level of sysctl
1492  *
1493  * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1494  *            enter a sysctl file
1495  *
1496  * data - a pointer to data for use by proc_handler
1497  *
1498  * maxlen - the maximum size in bytes of the data
1499  *
1500  * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1501  *
1502  * child - a pointer to the child sysctl table if this entry is a directory, or
1503  *         %NULL.
1504  *
1505  * proc_handler - the text handler routine (described below)
1506  *
1507  * strategy - the strategy routine (described below)
1508  *
1509  * de - for internal use by the sysctl routines
1510  *
1511  * extra1, extra2 - extra pointers usable by the proc handler routines
1512  *
1513  * Leaf nodes in the sysctl tree will be represented by a single file
1514  * under /proc; non-leaf nodes will be represented by directories.
1515  *
1516  * sysctl(2) can automatically manage read and write requests through
1517  * the sysctl table.  The data and maxlen fields of the ctl_table
1518  * struct enable minimal validation of the values being written to be
1519  * performed, and the mode field allows minimal authentication.
1520  *
1521  * More sophisticated management can be enabled by the provision of a
1522  * strategy routine with the table entry.  This will be called before
1523  * any automatic read or write of the data is performed.
1524  *
1525  * The strategy routine may return
1526  *
1527  * < 0 - Error occurred (error is passed to user process)
1528  *
1529  * 0   - OK - proceed with automatic read or write.
1530  *
1531  * > 0 - OK - read or write has been done by the strategy routine, so
1532  *       return immediately.
1533  *
1534  * There must be a proc_handler routine for any terminal nodes
1535  * mirrored under /proc/sys (non-terminals are handled by a built-in
1536  * directory handler).  Several default handlers are available to
1537  * cover common cases -
1538  *
1539  * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1540  * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1541  * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1542  *
1543  * It is the handler's job to read the input buffer from user memory
1544  * and process it. The handler should return 0 on success.
1545  *
1546  * This routine returns %NULL on a failure to register, and a pointer
1547  * to the table header on success.
1548  */
1549 struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
1550 {
1551 	struct ctl_table_header *tmp;
1552 	tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
1553 	if (!tmp)
1554 		return NULL;
1555 	tmp->ctl_table = table;
1556 	INIT_LIST_HEAD(&tmp->ctl_entry);
1557 	tmp->used = 0;
1558 	tmp->unregistering = NULL;
1559 	sysctl_set_parent(NULL, table);
1560 	if (sysctl_check_table(tmp->ctl_table)) {
1561 		kfree(tmp);
1562 		return NULL;
1563 	}
1564 	spin_lock(&sysctl_lock);
1565 	list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
1566 	spin_unlock(&sysctl_lock);
1567 	return tmp;
1568 }
1569 
1570 /**
1571  * unregister_sysctl_table - unregister a sysctl table hierarchy
1572  * @header: the header returned from register_sysctl_table
1573  *
1574  * Unregisters the sysctl table and all children. proc entries may not
1575  * actually be removed until they are no longer used by anyone.
1576  */
1577 void unregister_sysctl_table(struct ctl_table_header * header)
1578 {
1579 	might_sleep();
1580 	spin_lock(&sysctl_lock);
1581 	start_unregistering(header);
1582 	spin_unlock(&sysctl_lock);
1583 	kfree(header);
1584 }
1585 
1586 #else /* !CONFIG_SYSCTL */
1587 struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
1588 {
1589 	return NULL;
1590 }
1591 
1592 void unregister_sysctl_table(struct ctl_table_header * table)
1593 {
1594 }
1595 
1596 #endif /* CONFIG_SYSCTL */
1597 
1598 /*
1599  * /proc/sys support
1600  */
1601 
1602 #ifdef CONFIG_PROC_SYSCTL
1603 
1604 static int _proc_do_string(void* data, int maxlen, int write,
1605 			   struct file *filp, void __user *buffer,
1606 			   size_t *lenp, loff_t *ppos)
1607 {
1608 	size_t len;
1609 	char __user *p;
1610 	char c;
1611 
1612 	if (!data || !maxlen || !*lenp) {
1613 		*lenp = 0;
1614 		return 0;
1615 	}
1616 
1617 	if (write) {
1618 		len = 0;
1619 		p = buffer;
1620 		while (len < *lenp) {
1621 			if (get_user(c, p++))
1622 				return -EFAULT;
1623 			if (c == 0 || c == '\n')
1624 				break;
1625 			len++;
1626 		}
1627 		if (len >= maxlen)
1628 			len = maxlen-1;
1629 		if(copy_from_user(data, buffer, len))
1630 			return -EFAULT;
1631 		((char *) data)[len] = 0;
1632 		*ppos += *lenp;
1633 	} else {
1634 		len = strlen(data);
1635 		if (len > maxlen)
1636 			len = maxlen;
1637 
1638 		if (*ppos > len) {
1639 			*lenp = 0;
1640 			return 0;
1641 		}
1642 
1643 		data += *ppos;
1644 		len  -= *ppos;
1645 
1646 		if (len > *lenp)
1647 			len = *lenp;
1648 		if (len)
1649 			if(copy_to_user(buffer, data, len))
1650 				return -EFAULT;
1651 		if (len < *lenp) {
1652 			if(put_user('\n', ((char __user *) buffer) + len))
1653 				return -EFAULT;
1654 			len++;
1655 		}
1656 		*lenp = len;
1657 		*ppos += len;
1658 	}
1659 	return 0;
1660 }
1661 
1662 /**
1663  * proc_dostring - read a string sysctl
1664  * @table: the sysctl table
1665  * @write: %TRUE if this is a write to the sysctl file
1666  * @filp: the file structure
1667  * @buffer: the user buffer
1668  * @lenp: the size of the user buffer
1669  * @ppos: file position
1670  *
1671  * Reads/writes a string from/to the user buffer. If the kernel
1672  * buffer provided is not large enough to hold the string, the
1673  * string is truncated. The copied string is %NULL-terminated.
1674  * If the string is being read by the user process, it is copied
1675  * and a newline '\n' is added. It is truncated if the buffer is
1676  * not large enough.
1677  *
1678  * Returns 0 on success.
1679  */
1680 int proc_dostring(struct ctl_table *table, int write, struct file *filp,
1681 		  void __user *buffer, size_t *lenp, loff_t *ppos)
1682 {
1683 	return _proc_do_string(table->data, table->maxlen, write, filp,
1684 			       buffer, lenp, ppos);
1685 }
1686 
1687 
1688 static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
1689 				 int *valp,
1690 				 int write, void *data)
1691 {
1692 	if (write) {
1693 		*valp = *negp ? -*lvalp : *lvalp;
1694 	} else {
1695 		int val = *valp;
1696 		if (val < 0) {
1697 			*negp = -1;
1698 			*lvalp = (unsigned long)-val;
1699 		} else {
1700 			*negp = 0;
1701 			*lvalp = (unsigned long)val;
1702 		}
1703 	}
1704 	return 0;
1705 }
1706 
1707 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
1708 		  int write, struct file *filp, void __user *buffer,
1709 		  size_t *lenp, loff_t *ppos,
1710 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1711 			      int write, void *data),
1712 		  void *data)
1713 {
1714 #define TMPBUFLEN 21
1715 	int *i, vleft, first=1, neg, val;
1716 	unsigned long lval;
1717 	size_t left, len;
1718 
1719 	char buf[TMPBUFLEN], *p;
1720 	char __user *s = buffer;
1721 
1722 	if (!tbl_data || !table->maxlen || !*lenp ||
1723 	    (*ppos && !write)) {
1724 		*lenp = 0;
1725 		return 0;
1726 	}
1727 
1728 	i = (int *) tbl_data;
1729 	vleft = table->maxlen / sizeof(*i);
1730 	left = *lenp;
1731 
1732 	if (!conv)
1733 		conv = do_proc_dointvec_conv;
1734 
1735 	for (; left && vleft--; i++, first=0) {
1736 		if (write) {
1737 			while (left) {
1738 				char c;
1739 				if (get_user(c, s))
1740 					return -EFAULT;
1741 				if (!isspace(c))
1742 					break;
1743 				left--;
1744 				s++;
1745 			}
1746 			if (!left)
1747 				break;
1748 			neg = 0;
1749 			len = left;
1750 			if (len > sizeof(buf) - 1)
1751 				len = sizeof(buf) - 1;
1752 			if (copy_from_user(buf, s, len))
1753 				return -EFAULT;
1754 			buf[len] = 0;
1755 			p = buf;
1756 			if (*p == '-' && left > 1) {
1757 				neg = 1;
1758 				p++;
1759 			}
1760 			if (*p < '0' || *p > '9')
1761 				break;
1762 
1763 			lval = simple_strtoul(p, &p, 0);
1764 
1765 			len = p-buf;
1766 			if ((len < left) && *p && !isspace(*p))
1767 				break;
1768 			if (neg)
1769 				val = -val;
1770 			s += len;
1771 			left -= len;
1772 
1773 			if (conv(&neg, &lval, i, 1, data))
1774 				break;
1775 		} else {
1776 			p = buf;
1777 			if (!first)
1778 				*p++ = '\t';
1779 
1780 			if (conv(&neg, &lval, i, 0, data))
1781 				break;
1782 
1783 			sprintf(p, "%s%lu", neg ? "-" : "", lval);
1784 			len = strlen(buf);
1785 			if (len > left)
1786 				len = left;
1787 			if(copy_to_user(s, buf, len))
1788 				return -EFAULT;
1789 			left -= len;
1790 			s += len;
1791 		}
1792 	}
1793 
1794 	if (!write && !first && left) {
1795 		if(put_user('\n', s))
1796 			return -EFAULT;
1797 		left--, s++;
1798 	}
1799 	if (write) {
1800 		while (left) {
1801 			char c;
1802 			if (get_user(c, s++))
1803 				return -EFAULT;
1804 			if (!isspace(c))
1805 				break;
1806 			left--;
1807 		}
1808 	}
1809 	if (write && first)
1810 		return -EINVAL;
1811 	*lenp -= left;
1812 	*ppos += *lenp;
1813 	return 0;
1814 #undef TMPBUFLEN
1815 }
1816 
1817 static int do_proc_dointvec(struct ctl_table *table, int write, struct file *filp,
1818 		  void __user *buffer, size_t *lenp, loff_t *ppos,
1819 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1820 			      int write, void *data),
1821 		  void *data)
1822 {
1823 	return __do_proc_dointvec(table->data, table, write, filp,
1824 			buffer, lenp, ppos, conv, data);
1825 }
1826 
1827 /**
1828  * proc_dointvec - read a vector of integers
1829  * @table: the sysctl table
1830  * @write: %TRUE if this is a write to the sysctl file
1831  * @filp: the file structure
1832  * @buffer: the user buffer
1833  * @lenp: the size of the user buffer
1834  * @ppos: file position
1835  *
1836  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1837  * values from/to the user buffer, treated as an ASCII string.
1838  *
1839  * Returns 0 on success.
1840  */
1841 int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
1842 		     void __user *buffer, size_t *lenp, loff_t *ppos)
1843 {
1844     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1845 		    	    NULL,NULL);
1846 }
1847 
1848 #define OP_SET	0
1849 #define OP_AND	1
1850 #define OP_OR	2
1851 
1852 static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
1853 				      int *valp,
1854 				      int write, void *data)
1855 {
1856 	int op = *(int *)data;
1857 	if (write) {
1858 		int val = *negp ? -*lvalp : *lvalp;
1859 		switch(op) {
1860 		case OP_SET:	*valp = val; break;
1861 		case OP_AND:	*valp &= val; break;
1862 		case OP_OR:	*valp |= val; break;
1863 		}
1864 	} else {
1865 		int val = *valp;
1866 		if (val < 0) {
1867 			*negp = -1;
1868 			*lvalp = (unsigned long)-val;
1869 		} else {
1870 			*negp = 0;
1871 			*lvalp = (unsigned long)val;
1872 		}
1873 	}
1874 	return 0;
1875 }
1876 
1877 #ifdef CONFIG_SECURITY_CAPABILITIES
1878 /*
1879  *	init may raise the set.
1880  */
1881 
1882 int proc_dointvec_bset(struct ctl_table *table, int write, struct file *filp,
1883 			void __user *buffer, size_t *lenp, loff_t *ppos)
1884 {
1885 	int op;
1886 
1887 	if (write && !capable(CAP_SYS_MODULE)) {
1888 		return -EPERM;
1889 	}
1890 
1891 	op = is_global_init(current) ? OP_SET : OP_AND;
1892 	return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1893 				do_proc_dointvec_bset_conv,&op);
1894 }
1895 #endif /* def CONFIG_SECURITY_CAPABILITIES */
1896 
1897 /*
1898  *	Taint values can only be increased
1899  */
1900 static int proc_dointvec_taint(struct ctl_table *table, int write, struct file *filp,
1901 			       void __user *buffer, size_t *lenp, loff_t *ppos)
1902 {
1903 	int op;
1904 
1905 	if (write && !capable(CAP_SYS_ADMIN))
1906 		return -EPERM;
1907 
1908 	op = OP_OR;
1909 	return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1910 				do_proc_dointvec_bset_conv,&op);
1911 }
1912 
1913 struct do_proc_dointvec_minmax_conv_param {
1914 	int *min;
1915 	int *max;
1916 };
1917 
1918 static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
1919 					int *valp,
1920 					int write, void *data)
1921 {
1922 	struct do_proc_dointvec_minmax_conv_param *param = data;
1923 	if (write) {
1924 		int val = *negp ? -*lvalp : *lvalp;
1925 		if ((param->min && *param->min > val) ||
1926 		    (param->max && *param->max < val))
1927 			return -EINVAL;
1928 		*valp = val;
1929 	} else {
1930 		int val = *valp;
1931 		if (val < 0) {
1932 			*negp = -1;
1933 			*lvalp = (unsigned long)-val;
1934 		} else {
1935 			*negp = 0;
1936 			*lvalp = (unsigned long)val;
1937 		}
1938 	}
1939 	return 0;
1940 }
1941 
1942 /**
1943  * proc_dointvec_minmax - read a vector of integers with min/max values
1944  * @table: the sysctl table
1945  * @write: %TRUE if this is a write to the sysctl file
1946  * @filp: the file structure
1947  * @buffer: the user buffer
1948  * @lenp: the size of the user buffer
1949  * @ppos: file position
1950  *
1951  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1952  * values from/to the user buffer, treated as an ASCII string.
1953  *
1954  * This routine will ensure the values are within the range specified by
1955  * table->extra1 (min) and table->extra2 (max).
1956  *
1957  * Returns 0 on success.
1958  */
1959 int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp,
1960 		  void __user *buffer, size_t *lenp, loff_t *ppos)
1961 {
1962 	struct do_proc_dointvec_minmax_conv_param param = {
1963 		.min = (int *) table->extra1,
1964 		.max = (int *) table->extra2,
1965 	};
1966 	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
1967 				do_proc_dointvec_minmax_conv, &param);
1968 }
1969 
1970 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
1971 				     struct file *filp,
1972 				     void __user *buffer,
1973 				     size_t *lenp, loff_t *ppos,
1974 				     unsigned long convmul,
1975 				     unsigned long convdiv)
1976 {
1977 #define TMPBUFLEN 21
1978 	unsigned long *i, *min, *max, val;
1979 	int vleft, first=1, neg;
1980 	size_t len, left;
1981 	char buf[TMPBUFLEN], *p;
1982 	char __user *s = buffer;
1983 
1984 	if (!data || !table->maxlen || !*lenp ||
1985 	    (*ppos && !write)) {
1986 		*lenp = 0;
1987 		return 0;
1988 	}
1989 
1990 	i = (unsigned long *) data;
1991 	min = (unsigned long *) table->extra1;
1992 	max = (unsigned long *) table->extra2;
1993 	vleft = table->maxlen / sizeof(unsigned long);
1994 	left = *lenp;
1995 
1996 	for (; left && vleft--; i++, min++, max++, first=0) {
1997 		if (write) {
1998 			while (left) {
1999 				char c;
2000 				if (get_user(c, s))
2001 					return -EFAULT;
2002 				if (!isspace(c))
2003 					break;
2004 				left--;
2005 				s++;
2006 			}
2007 			if (!left)
2008 				break;
2009 			neg = 0;
2010 			len = left;
2011 			if (len > TMPBUFLEN-1)
2012 				len = TMPBUFLEN-1;
2013 			if (copy_from_user(buf, s, len))
2014 				return -EFAULT;
2015 			buf[len] = 0;
2016 			p = buf;
2017 			if (*p == '-' && left > 1) {
2018 				neg = 1;
2019 				p++;
2020 			}
2021 			if (*p < '0' || *p > '9')
2022 				break;
2023 			val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
2024 			len = p-buf;
2025 			if ((len < left) && *p && !isspace(*p))
2026 				break;
2027 			if (neg)
2028 				val = -val;
2029 			s += len;
2030 			left -= len;
2031 
2032 			if(neg)
2033 				continue;
2034 			if ((min && val < *min) || (max && val > *max))
2035 				continue;
2036 			*i = val;
2037 		} else {
2038 			p = buf;
2039 			if (!first)
2040 				*p++ = '\t';
2041 			sprintf(p, "%lu", convdiv * (*i) / convmul);
2042 			len = strlen(buf);
2043 			if (len > left)
2044 				len = left;
2045 			if(copy_to_user(s, buf, len))
2046 				return -EFAULT;
2047 			left -= len;
2048 			s += len;
2049 		}
2050 	}
2051 
2052 	if (!write && !first && left) {
2053 		if(put_user('\n', s))
2054 			return -EFAULT;
2055 		left--, s++;
2056 	}
2057 	if (write) {
2058 		while (left) {
2059 			char c;
2060 			if (get_user(c, s++))
2061 				return -EFAULT;
2062 			if (!isspace(c))
2063 				break;
2064 			left--;
2065 		}
2066 	}
2067 	if (write && first)
2068 		return -EINVAL;
2069 	*lenp -= left;
2070 	*ppos += *lenp;
2071 	return 0;
2072 #undef TMPBUFLEN
2073 }
2074 
2075 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2076 				     struct file *filp,
2077 				     void __user *buffer,
2078 				     size_t *lenp, loff_t *ppos,
2079 				     unsigned long convmul,
2080 				     unsigned long convdiv)
2081 {
2082 	return __do_proc_doulongvec_minmax(table->data, table, write,
2083 			filp, buffer, lenp, ppos, convmul, convdiv);
2084 }
2085 
2086 /**
2087  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2088  * @table: the sysctl table
2089  * @write: %TRUE if this is a write to the sysctl file
2090  * @filp: the file structure
2091  * @buffer: the user buffer
2092  * @lenp: the size of the user buffer
2093  * @ppos: file position
2094  *
2095  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2096  * values from/to the user buffer, treated as an ASCII string.
2097  *
2098  * This routine will ensure the values are within the range specified by
2099  * table->extra1 (min) and table->extra2 (max).
2100  *
2101  * Returns 0 on success.
2102  */
2103 int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp,
2104 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2105 {
2106     return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
2107 }
2108 
2109 /**
2110  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2111  * @table: the sysctl table
2112  * @write: %TRUE if this is a write to the sysctl file
2113  * @filp: the file structure
2114  * @buffer: the user buffer
2115  * @lenp: the size of the user buffer
2116  * @ppos: file position
2117  *
2118  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2119  * values from/to the user buffer, treated as an ASCII string. The values
2120  * are treated as milliseconds, and converted to jiffies when they are stored.
2121  *
2122  * This routine will ensure the values are within the range specified by
2123  * table->extra1 (min) and table->extra2 (max).
2124  *
2125  * Returns 0 on success.
2126  */
2127 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2128 				      struct file *filp,
2129 				      void __user *buffer,
2130 				      size_t *lenp, loff_t *ppos)
2131 {
2132     return do_proc_doulongvec_minmax(table, write, filp, buffer,
2133 				     lenp, ppos, HZ, 1000l);
2134 }
2135 
2136 
2137 static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
2138 					 int *valp,
2139 					 int write, void *data)
2140 {
2141 	if (write) {
2142 		if (*lvalp > LONG_MAX / HZ)
2143 			return 1;
2144 		*valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2145 	} else {
2146 		int val = *valp;
2147 		unsigned long lval;
2148 		if (val < 0) {
2149 			*negp = -1;
2150 			lval = (unsigned long)-val;
2151 		} else {
2152 			*negp = 0;
2153 			lval = (unsigned long)val;
2154 		}
2155 		*lvalp = lval / HZ;
2156 	}
2157 	return 0;
2158 }
2159 
2160 static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
2161 						int *valp,
2162 						int write, void *data)
2163 {
2164 	if (write) {
2165 		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2166 			return 1;
2167 		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2168 	} else {
2169 		int val = *valp;
2170 		unsigned long lval;
2171 		if (val < 0) {
2172 			*negp = -1;
2173 			lval = (unsigned long)-val;
2174 		} else {
2175 			*negp = 0;
2176 			lval = (unsigned long)val;
2177 		}
2178 		*lvalp = jiffies_to_clock_t(lval);
2179 	}
2180 	return 0;
2181 }
2182 
2183 static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
2184 					    int *valp,
2185 					    int write, void *data)
2186 {
2187 	if (write) {
2188 		*valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2189 	} else {
2190 		int val = *valp;
2191 		unsigned long lval;
2192 		if (val < 0) {
2193 			*negp = -1;
2194 			lval = (unsigned long)-val;
2195 		} else {
2196 			*negp = 0;
2197 			lval = (unsigned long)val;
2198 		}
2199 		*lvalp = jiffies_to_msecs(lval);
2200 	}
2201 	return 0;
2202 }
2203 
2204 /**
2205  * proc_dointvec_jiffies - read a vector of integers as seconds
2206  * @table: the sysctl table
2207  * @write: %TRUE if this is a write to the sysctl file
2208  * @filp: the file structure
2209  * @buffer: the user buffer
2210  * @lenp: the size of the user buffer
2211  * @ppos: file position
2212  *
2213  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2214  * values from/to the user buffer, treated as an ASCII string.
2215  * The values read are assumed to be in seconds, and are converted into
2216  * jiffies.
2217  *
2218  * Returns 0 on success.
2219  */
2220 int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
2221 			  void __user *buffer, size_t *lenp, loff_t *ppos)
2222 {
2223     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2224 		    	    do_proc_dointvec_jiffies_conv,NULL);
2225 }
2226 
2227 /**
2228  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2229  * @table: the sysctl table
2230  * @write: %TRUE if this is a write to the sysctl file
2231  * @filp: the file structure
2232  * @buffer: the user buffer
2233  * @lenp: the size of the user buffer
2234  * @ppos: pointer to the file position
2235  *
2236  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2237  * values from/to the user buffer, treated as an ASCII string.
2238  * The values read are assumed to be in 1/USER_HZ seconds, and
2239  * are converted into jiffies.
2240  *
2241  * Returns 0 on success.
2242  */
2243 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp,
2244 				 void __user *buffer, size_t *lenp, loff_t *ppos)
2245 {
2246     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2247 		    	    do_proc_dointvec_userhz_jiffies_conv,NULL);
2248 }
2249 
2250 /**
2251  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2252  * @table: the sysctl table
2253  * @write: %TRUE if this is a write to the sysctl file
2254  * @filp: the file structure
2255  * @buffer: the user buffer
2256  * @lenp: the size of the user buffer
2257  * @ppos: file position
2258  * @ppos: the current position in the file
2259  *
2260  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2261  * values from/to the user buffer, treated as an ASCII string.
2262  * The values read are assumed to be in 1/1000 seconds, and
2263  * are converted into jiffies.
2264  *
2265  * Returns 0 on success.
2266  */
2267 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp,
2268 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2269 {
2270 	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2271 				do_proc_dointvec_ms_jiffies_conv, NULL);
2272 }
2273 
2274 static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp,
2275 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2276 {
2277 	struct pid *new_pid;
2278 	pid_t tmp;
2279 	int r;
2280 
2281 	tmp = pid_nr_ns(cad_pid, current->nsproxy->pid_ns);
2282 
2283 	r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
2284 			       lenp, ppos, NULL, NULL);
2285 	if (r || !write)
2286 		return r;
2287 
2288 	new_pid = find_get_pid(tmp);
2289 	if (!new_pid)
2290 		return -ESRCH;
2291 
2292 	put_pid(xchg(&cad_pid, new_pid));
2293 	return 0;
2294 }
2295 
2296 #else /* CONFIG_PROC_FS */
2297 
2298 int proc_dostring(struct ctl_table *table, int write, struct file *filp,
2299 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2300 {
2301 	return -ENOSYS;
2302 }
2303 
2304 int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
2305 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2306 {
2307 	return -ENOSYS;
2308 }
2309 
2310 int proc_dointvec_bset(struct ctl_table *table, int write, struct file *filp,
2311 			void __user *buffer, size_t *lenp, loff_t *ppos)
2312 {
2313 	return -ENOSYS;
2314 }
2315 
2316 int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp,
2317 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2318 {
2319 	return -ENOSYS;
2320 }
2321 
2322 int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
2323 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2324 {
2325 	return -ENOSYS;
2326 }
2327 
2328 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp,
2329 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2330 {
2331 	return -ENOSYS;
2332 }
2333 
2334 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp,
2335 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2336 {
2337 	return -ENOSYS;
2338 }
2339 
2340 int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp,
2341 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2342 {
2343 	return -ENOSYS;
2344 }
2345 
2346 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2347 				      struct file *filp,
2348 				      void __user *buffer,
2349 				      size_t *lenp, loff_t *ppos)
2350 {
2351     return -ENOSYS;
2352 }
2353 
2354 
2355 #endif /* CONFIG_PROC_FS */
2356 
2357 
2358 #ifdef CONFIG_SYSCTL_SYSCALL
2359 /*
2360  * General sysctl support routines
2361  */
2362 
2363 /* The generic sysctl data routine (used if no strategy routine supplied) */
2364 int sysctl_data(struct ctl_table *table, int __user *name, int nlen,
2365 		void __user *oldval, size_t __user *oldlenp,
2366 		void __user *newval, size_t newlen)
2367 {
2368 	size_t len;
2369 
2370 	/* Get out of I don't have a variable */
2371 	if (!table->data || !table->maxlen)
2372 		return -ENOTDIR;
2373 
2374 	if (oldval && oldlenp) {
2375 		if (get_user(len, oldlenp))
2376 			return -EFAULT;
2377 		if (len) {
2378 			if (len > table->maxlen)
2379 				len = table->maxlen;
2380 			if (copy_to_user(oldval, table->data, len))
2381 				return -EFAULT;
2382 			if (put_user(len, oldlenp))
2383 				return -EFAULT;
2384 		}
2385 	}
2386 
2387 	if (newval && newlen) {
2388 		if (newlen > table->maxlen)
2389 			newlen = table->maxlen;
2390 
2391 		if (copy_from_user(table->data, newval, newlen))
2392 			return -EFAULT;
2393 	}
2394 	return 1;
2395 }
2396 
2397 /* The generic string strategy routine: */
2398 int sysctl_string(struct ctl_table *table, int __user *name, int nlen,
2399 		  void __user *oldval, size_t __user *oldlenp,
2400 		  void __user *newval, size_t newlen)
2401 {
2402 	if (!table->data || !table->maxlen)
2403 		return -ENOTDIR;
2404 
2405 	if (oldval && oldlenp) {
2406 		size_t bufsize;
2407 		if (get_user(bufsize, oldlenp))
2408 			return -EFAULT;
2409 		if (bufsize) {
2410 			size_t len = strlen(table->data), copied;
2411 
2412 			/* This shouldn't trigger for a well-formed sysctl */
2413 			if (len > table->maxlen)
2414 				len = table->maxlen;
2415 
2416 			/* Copy up to a max of bufsize-1 bytes of the string */
2417 			copied = (len >= bufsize) ? bufsize - 1 : len;
2418 
2419 			if (copy_to_user(oldval, table->data, copied) ||
2420 			    put_user(0, (char __user *)(oldval + copied)))
2421 				return -EFAULT;
2422 			if (put_user(len, oldlenp))
2423 				return -EFAULT;
2424 		}
2425 	}
2426 	if (newval && newlen) {
2427 		size_t len = newlen;
2428 		if (len > table->maxlen)
2429 			len = table->maxlen;
2430 		if(copy_from_user(table->data, newval, len))
2431 			return -EFAULT;
2432 		if (len == table->maxlen)
2433 			len--;
2434 		((char *) table->data)[len] = 0;
2435 	}
2436 	return 1;
2437 }
2438 
2439 /*
2440  * This function makes sure that all of the integers in the vector
2441  * are between the minimum and maximum values given in the arrays
2442  * table->extra1 and table->extra2, respectively.
2443  */
2444 int sysctl_intvec(struct ctl_table *table, int __user *name, int nlen,
2445 		void __user *oldval, size_t __user *oldlenp,
2446 		void __user *newval, size_t newlen)
2447 {
2448 
2449 	if (newval && newlen) {
2450 		int __user *vec = (int __user *) newval;
2451 		int *min = (int *) table->extra1;
2452 		int *max = (int *) table->extra2;
2453 		size_t length;
2454 		int i;
2455 
2456 		if (newlen % sizeof(int) != 0)
2457 			return -EINVAL;
2458 
2459 		if (!table->extra1 && !table->extra2)
2460 			return 0;
2461 
2462 		if (newlen > table->maxlen)
2463 			newlen = table->maxlen;
2464 		length = newlen / sizeof(int);
2465 
2466 		for (i = 0; i < length; i++) {
2467 			int value;
2468 			if (get_user(value, vec + i))
2469 				return -EFAULT;
2470 			if (min && value < min[i])
2471 				return -EINVAL;
2472 			if (max && value > max[i])
2473 				return -EINVAL;
2474 		}
2475 	}
2476 	return 0;
2477 }
2478 
2479 /* Strategy function to convert jiffies to seconds */
2480 int sysctl_jiffies(struct ctl_table *table, int __user *name, int nlen,
2481 		void __user *oldval, size_t __user *oldlenp,
2482 		void __user *newval, size_t newlen)
2483 {
2484 	if (oldval && oldlenp) {
2485 		size_t olen;
2486 
2487 		if (get_user(olen, oldlenp))
2488 			return -EFAULT;
2489 		if (olen) {
2490 			int val;
2491 
2492 			if (olen < sizeof(int))
2493 				return -EINVAL;
2494 
2495 			val = *(int *)(table->data) / HZ;
2496 			if (put_user(val, (int __user *)oldval))
2497 				return -EFAULT;
2498 			if (put_user(sizeof(int), oldlenp))
2499 				return -EFAULT;
2500 		}
2501 	}
2502 	if (newval && newlen) {
2503 		int new;
2504 		if (newlen != sizeof(int))
2505 			return -EINVAL;
2506 		if (get_user(new, (int __user *)newval))
2507 			return -EFAULT;
2508 		*(int *)(table->data) = new*HZ;
2509 	}
2510 	return 1;
2511 }
2512 
2513 /* Strategy function to convert jiffies to seconds */
2514 int sysctl_ms_jiffies(struct ctl_table *table, int __user *name, int nlen,
2515 		void __user *oldval, size_t __user *oldlenp,
2516 		void __user *newval, size_t newlen)
2517 {
2518 	if (oldval && oldlenp) {
2519 		size_t olen;
2520 
2521 		if (get_user(olen, oldlenp))
2522 			return -EFAULT;
2523 		if (olen) {
2524 			int val;
2525 
2526 			if (olen < sizeof(int))
2527 				return -EINVAL;
2528 
2529 			val = jiffies_to_msecs(*(int *)(table->data));
2530 			if (put_user(val, (int __user *)oldval))
2531 				return -EFAULT;
2532 			if (put_user(sizeof(int), oldlenp))
2533 				return -EFAULT;
2534 		}
2535 	}
2536 	if (newval && newlen) {
2537 		int new;
2538 		if (newlen != sizeof(int))
2539 			return -EINVAL;
2540 		if (get_user(new, (int __user *)newval))
2541 			return -EFAULT;
2542 		*(int *)(table->data) = msecs_to_jiffies(new);
2543 	}
2544 	return 1;
2545 }
2546 
2547 
2548 
2549 #else /* CONFIG_SYSCTL_SYSCALL */
2550 
2551 
2552 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
2553 {
2554 	struct __sysctl_args tmp;
2555 	int error;
2556 
2557 	if (copy_from_user(&tmp, args, sizeof(tmp)))
2558 		return -EFAULT;
2559 
2560 	error = deprecated_sysctl_warning(&tmp);
2561 
2562 	/* If no error reading the parameters then just -ENOSYS ... */
2563 	if (!error)
2564 		error = -ENOSYS;
2565 
2566 	return error;
2567 }
2568 
2569 int sysctl_data(struct ctl_table *table, int __user *name, int nlen,
2570 		  void __user *oldval, size_t __user *oldlenp,
2571 		  void __user *newval, size_t newlen)
2572 {
2573 	return -ENOSYS;
2574 }
2575 
2576 int sysctl_string(struct ctl_table *table, int __user *name, int nlen,
2577 		  void __user *oldval, size_t __user *oldlenp,
2578 		  void __user *newval, size_t newlen)
2579 {
2580 	return -ENOSYS;
2581 }
2582 
2583 int sysctl_intvec(struct ctl_table *table, int __user *name, int nlen,
2584 		void __user *oldval, size_t __user *oldlenp,
2585 		void __user *newval, size_t newlen)
2586 {
2587 	return -ENOSYS;
2588 }
2589 
2590 int sysctl_jiffies(struct ctl_table *table, int __user *name, int nlen,
2591 		void __user *oldval, size_t __user *oldlenp,
2592 		void __user *newval, size_t newlen)
2593 {
2594 	return -ENOSYS;
2595 }
2596 
2597 int sysctl_ms_jiffies(struct ctl_table *table, int __user *name, int nlen,
2598 		void __user *oldval, size_t __user *oldlenp,
2599 		void __user *newval, size_t newlen)
2600 {
2601 	return -ENOSYS;
2602 }
2603 
2604 #endif /* CONFIG_SYSCTL_SYSCALL */
2605 
2606 static int deprecated_sysctl_warning(struct __sysctl_args *args)
2607 {
2608 	static int msg_count;
2609 	int name[CTL_MAXNAME];
2610 	int i;
2611 
2612 	/* Read in the sysctl name for better debug message logging */
2613 	for (i = 0; i < args->nlen; i++)
2614 		if (get_user(name[i], args->name + i))
2615 			return -EFAULT;
2616 
2617 	/* Ignore accesses to kernel.version */
2618 	if ((args->nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION))
2619 		return 0;
2620 
2621 	if (msg_count < 5) {
2622 		msg_count++;
2623 		printk(KERN_INFO
2624 			"warning: process `%s' used the deprecated sysctl "
2625 			"system call with ", current->comm);
2626 		for (i = 0; i < args->nlen; i++)
2627 			printk("%d.", name[i]);
2628 		printk("\n");
2629 	}
2630 	return 0;
2631 }
2632 
2633 /*
2634  * No sense putting this after each symbol definition, twice,
2635  * exception granted :-)
2636  */
2637 EXPORT_SYMBOL(proc_dointvec);
2638 EXPORT_SYMBOL(proc_dointvec_jiffies);
2639 EXPORT_SYMBOL(proc_dointvec_minmax);
2640 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2641 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2642 EXPORT_SYMBOL(proc_dostring);
2643 EXPORT_SYMBOL(proc_doulongvec_minmax);
2644 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2645 EXPORT_SYMBOL(register_sysctl_table);
2646 EXPORT_SYMBOL(sysctl_intvec);
2647 EXPORT_SYMBOL(sysctl_jiffies);
2648 EXPORT_SYMBOL(sysctl_ms_jiffies);
2649 EXPORT_SYMBOL(sysctl_string);
2650 EXPORT_SYMBOL(sysctl_data);
2651 EXPORT_SYMBOL(unregister_sysctl_table);
2652