xref: /openbmc/linux/kernel/sysctl.c (revision bfdb4d9f0f611687d71cf6a460efc9e755f4a462)
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20 
21 #include <linux/module.h>
22 #include <linux/mm.h>
23 #include <linux/swap.h>
24 #include <linux/slab.h>
25 #include <linux/sysctl.h>
26 #include <linux/proc_fs.h>
27 #include <linux/security.h>
28 #include <linux/ctype.h>
29 #include <linux/utsname.h>
30 #include <linux/kmemcheck.h>
31 #include <linux/smp_lock.h>
32 #include <linux/fs.h>
33 #include <linux/init.h>
34 #include <linux/kernel.h>
35 #include <linux/kobject.h>
36 #include <linux/net.h>
37 #include <linux/sysrq.h>
38 #include <linux/highuid.h>
39 #include <linux/writeback.h>
40 #include <linux/hugetlb.h>
41 #include <linux/initrd.h>
42 #include <linux/key.h>
43 #include <linux/times.h>
44 #include <linux/limits.h>
45 #include <linux/dcache.h>
46 #include <linux/syscalls.h>
47 #include <linux/vmstat.h>
48 #include <linux/nfs_fs.h>
49 #include <linux/acpi.h>
50 #include <linux/reboot.h>
51 #include <linux/ftrace.h>
52 #include <linux/slow-work.h>
53 #include <linux/perf_counter.h>
54 
55 #include <asm/uaccess.h>
56 #include <asm/processor.h>
57 
58 #ifdef CONFIG_X86
59 #include <asm/nmi.h>
60 #include <asm/stacktrace.h>
61 #include <asm/io.h>
62 #endif
63 
64 static int deprecated_sysctl_warning(struct __sysctl_args *args);
65 
66 #if defined(CONFIG_SYSCTL)
67 
68 /* External variables not in a header file. */
69 extern int C_A_D;
70 extern int print_fatal_signals;
71 extern int sysctl_overcommit_memory;
72 extern int sysctl_overcommit_ratio;
73 extern int sysctl_panic_on_oom;
74 extern int sysctl_oom_kill_allocating_task;
75 extern int sysctl_oom_dump_tasks;
76 extern int max_threads;
77 extern int core_uses_pid;
78 extern int suid_dumpable;
79 extern char core_pattern[];
80 extern int pid_max;
81 extern int min_free_kbytes;
82 extern int pid_max_min, pid_max_max;
83 extern int sysctl_drop_caches;
84 extern int percpu_pagelist_fraction;
85 extern int compat_log;
86 extern int latencytop_enabled;
87 extern int sysctl_nr_open_min, sysctl_nr_open_max;
88 #ifndef CONFIG_MMU
89 extern int sysctl_nr_trim_pages;
90 #endif
91 #ifdef CONFIG_RCU_TORTURE_TEST
92 extern int rcutorture_runnable;
93 #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
94 
95 /* Constants used for minimum and  maximum */
96 #ifdef CONFIG_DETECT_SOFTLOCKUP
97 static int sixty = 60;
98 static int neg_one = -1;
99 #endif
100 
101 static int zero;
102 static int __maybe_unused one = 1;
103 static int __maybe_unused two = 2;
104 static unsigned long one_ul = 1;
105 static int one_hundred = 100;
106 
107 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
108 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
109 
110 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
111 static int maxolduid = 65535;
112 static int minolduid;
113 static int min_percpu_pagelist_fract = 8;
114 
115 static int ngroups_max = NGROUPS_MAX;
116 
117 #ifdef CONFIG_MODULES
118 extern char modprobe_path[];
119 extern int modules_disabled;
120 #endif
121 #ifdef CONFIG_CHR_DEV_SG
122 extern int sg_big_buff;
123 #endif
124 
125 #ifdef CONFIG_SPARC
126 #include <asm/system.h>
127 #endif
128 
129 #ifdef CONFIG_SPARC64
130 extern int sysctl_tsb_ratio;
131 #endif
132 
133 #ifdef __hppa__
134 extern int pwrsw_enabled;
135 extern int unaligned_enabled;
136 #endif
137 
138 #ifdef CONFIG_S390
139 #ifdef CONFIG_MATHEMU
140 extern int sysctl_ieee_emulation_warnings;
141 #endif
142 extern int sysctl_userprocess_debug;
143 extern int spin_retry;
144 #endif
145 
146 #ifdef CONFIG_BSD_PROCESS_ACCT
147 extern int acct_parm[];
148 #endif
149 
150 #ifdef CONFIG_IA64
151 extern int no_unaligned_warning;
152 extern int unaligned_dump_stack;
153 #endif
154 
155 #ifdef CONFIG_RT_MUTEXES
156 extern int max_lock_depth;
157 #endif
158 
159 #ifdef CONFIG_PROC_SYSCTL
160 static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp,
161 		  void __user *buffer, size_t *lenp, loff_t *ppos);
162 static int proc_taint(struct ctl_table *table, int write, struct file *filp,
163 			       void __user *buffer, size_t *lenp, loff_t *ppos);
164 #endif
165 
166 static struct ctl_table root_table[];
167 static struct ctl_table_root sysctl_table_root;
168 static struct ctl_table_header root_table_header = {
169 	.count = 1,
170 	.ctl_table = root_table,
171 	.ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),
172 	.root = &sysctl_table_root,
173 	.set = &sysctl_table_root.default_set,
174 };
175 static struct ctl_table_root sysctl_table_root = {
176 	.root_list = LIST_HEAD_INIT(sysctl_table_root.root_list),
177 	.default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry),
178 };
179 
180 static struct ctl_table kern_table[];
181 static struct ctl_table vm_table[];
182 static struct ctl_table fs_table[];
183 static struct ctl_table debug_table[];
184 static struct ctl_table dev_table[];
185 extern struct ctl_table random_table[];
186 #ifdef CONFIG_INOTIFY_USER
187 extern struct ctl_table inotify_table[];
188 #endif
189 #ifdef CONFIG_EPOLL
190 extern struct ctl_table epoll_table[];
191 #endif
192 
193 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
194 int sysctl_legacy_va_layout;
195 #endif
196 
197 extern int prove_locking;
198 extern int lock_stat;
199 
200 /* The default sysctl tables: */
201 
202 static struct ctl_table root_table[] = {
203 	{
204 		.ctl_name	= CTL_KERN,
205 		.procname	= "kernel",
206 		.mode		= 0555,
207 		.child		= kern_table,
208 	},
209 	{
210 		.ctl_name	= CTL_VM,
211 		.procname	= "vm",
212 		.mode		= 0555,
213 		.child		= vm_table,
214 	},
215 	{
216 		.ctl_name	= CTL_FS,
217 		.procname	= "fs",
218 		.mode		= 0555,
219 		.child		= fs_table,
220 	},
221 	{
222 		.ctl_name	= CTL_DEBUG,
223 		.procname	= "debug",
224 		.mode		= 0555,
225 		.child		= debug_table,
226 	},
227 	{
228 		.ctl_name	= CTL_DEV,
229 		.procname	= "dev",
230 		.mode		= 0555,
231 		.child		= dev_table,
232 	},
233 /*
234  * NOTE: do not add new entries to this table unless you have read
235  * Documentation/sysctl/ctl_unnumbered.txt
236  */
237 	{ .ctl_name = 0 }
238 };
239 
240 #ifdef CONFIG_SCHED_DEBUG
241 static int min_sched_granularity_ns = 100000;		/* 100 usecs */
242 static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
243 static int min_wakeup_granularity_ns;			/* 0 usecs */
244 static int max_wakeup_granularity_ns = NSEC_PER_SEC;	/* 1 second */
245 #endif
246 
247 static struct ctl_table kern_table[] = {
248 #ifdef CONFIG_SCHED_DEBUG
249 	{
250 		.ctl_name	= CTL_UNNUMBERED,
251 		.procname	= "sched_min_granularity_ns",
252 		.data		= &sysctl_sched_min_granularity,
253 		.maxlen		= sizeof(unsigned int),
254 		.mode		= 0644,
255 		.proc_handler	= &sched_nr_latency_handler,
256 		.strategy	= &sysctl_intvec,
257 		.extra1		= &min_sched_granularity_ns,
258 		.extra2		= &max_sched_granularity_ns,
259 	},
260 	{
261 		.ctl_name	= CTL_UNNUMBERED,
262 		.procname	= "sched_latency_ns",
263 		.data		= &sysctl_sched_latency,
264 		.maxlen		= sizeof(unsigned int),
265 		.mode		= 0644,
266 		.proc_handler	= &sched_nr_latency_handler,
267 		.strategy	= &sysctl_intvec,
268 		.extra1		= &min_sched_granularity_ns,
269 		.extra2		= &max_sched_granularity_ns,
270 	},
271 	{
272 		.ctl_name	= CTL_UNNUMBERED,
273 		.procname	= "sched_wakeup_granularity_ns",
274 		.data		= &sysctl_sched_wakeup_granularity,
275 		.maxlen		= sizeof(unsigned int),
276 		.mode		= 0644,
277 		.proc_handler	= &proc_dointvec_minmax,
278 		.strategy	= &sysctl_intvec,
279 		.extra1		= &min_wakeup_granularity_ns,
280 		.extra2		= &max_wakeup_granularity_ns,
281 	},
282 	{
283 		.ctl_name	= CTL_UNNUMBERED,
284 		.procname	= "sched_shares_ratelimit",
285 		.data		= &sysctl_sched_shares_ratelimit,
286 		.maxlen		= sizeof(unsigned int),
287 		.mode		= 0644,
288 		.proc_handler	= &proc_dointvec,
289 	},
290 	{
291 		.ctl_name	= CTL_UNNUMBERED,
292 		.procname	= "sched_shares_thresh",
293 		.data		= &sysctl_sched_shares_thresh,
294 		.maxlen		= sizeof(unsigned int),
295 		.mode		= 0644,
296 		.proc_handler	= &proc_dointvec_minmax,
297 		.strategy	= &sysctl_intvec,
298 		.extra1		= &zero,
299 	},
300 	{
301 		.ctl_name	= CTL_UNNUMBERED,
302 		.procname	= "sched_child_runs_first",
303 		.data		= &sysctl_sched_child_runs_first,
304 		.maxlen		= sizeof(unsigned int),
305 		.mode		= 0644,
306 		.proc_handler	= &proc_dointvec,
307 	},
308 	{
309 		.ctl_name	= CTL_UNNUMBERED,
310 		.procname	= "sched_features",
311 		.data		= &sysctl_sched_features,
312 		.maxlen		= sizeof(unsigned int),
313 		.mode		= 0644,
314 		.proc_handler	= &proc_dointvec,
315 	},
316 	{
317 		.ctl_name	= CTL_UNNUMBERED,
318 		.procname	= "sched_migration_cost",
319 		.data		= &sysctl_sched_migration_cost,
320 		.maxlen		= sizeof(unsigned int),
321 		.mode		= 0644,
322 		.proc_handler	= &proc_dointvec,
323 	},
324 	{
325 		.ctl_name	= CTL_UNNUMBERED,
326 		.procname	= "sched_nr_migrate",
327 		.data		= &sysctl_sched_nr_migrate,
328 		.maxlen		= sizeof(unsigned int),
329 		.mode		= 0644,
330 		.proc_handler	= &proc_dointvec,
331 	},
332 	{
333 		.ctl_name	= CTL_UNNUMBERED,
334 		.procname	= "timer_migration",
335 		.data		= &sysctl_timer_migration,
336 		.maxlen		= sizeof(unsigned int),
337 		.mode		= 0644,
338 		.proc_handler	= &proc_dointvec_minmax,
339 		.strategy	= &sysctl_intvec,
340 		.extra1		= &zero,
341 		.extra2		= &one,
342 	},
343 #endif
344 	{
345 		.ctl_name	= CTL_UNNUMBERED,
346 		.procname	= "sched_rt_period_us",
347 		.data		= &sysctl_sched_rt_period,
348 		.maxlen		= sizeof(unsigned int),
349 		.mode		= 0644,
350 		.proc_handler	= &sched_rt_handler,
351 	},
352 	{
353 		.ctl_name	= CTL_UNNUMBERED,
354 		.procname	= "sched_rt_runtime_us",
355 		.data		= &sysctl_sched_rt_runtime,
356 		.maxlen		= sizeof(int),
357 		.mode		= 0644,
358 		.proc_handler	= &sched_rt_handler,
359 	},
360 	{
361 		.ctl_name	= CTL_UNNUMBERED,
362 		.procname	= "sched_compat_yield",
363 		.data		= &sysctl_sched_compat_yield,
364 		.maxlen		= sizeof(unsigned int),
365 		.mode		= 0644,
366 		.proc_handler	= &proc_dointvec,
367 	},
368 #ifdef CONFIG_PROVE_LOCKING
369 	{
370 		.ctl_name	= CTL_UNNUMBERED,
371 		.procname	= "prove_locking",
372 		.data		= &prove_locking,
373 		.maxlen		= sizeof(int),
374 		.mode		= 0644,
375 		.proc_handler	= &proc_dointvec,
376 	},
377 #endif
378 #ifdef CONFIG_LOCK_STAT
379 	{
380 		.ctl_name	= CTL_UNNUMBERED,
381 		.procname	= "lock_stat",
382 		.data		= &lock_stat,
383 		.maxlen		= sizeof(int),
384 		.mode		= 0644,
385 		.proc_handler	= &proc_dointvec,
386 	},
387 #endif
388 	{
389 		.ctl_name	= KERN_PANIC,
390 		.procname	= "panic",
391 		.data		= &panic_timeout,
392 		.maxlen		= sizeof(int),
393 		.mode		= 0644,
394 		.proc_handler	= &proc_dointvec,
395 	},
396 	{
397 		.ctl_name	= KERN_CORE_USES_PID,
398 		.procname	= "core_uses_pid",
399 		.data		= &core_uses_pid,
400 		.maxlen		= sizeof(int),
401 		.mode		= 0644,
402 		.proc_handler	= &proc_dointvec,
403 	},
404 	{
405 		.ctl_name	= KERN_CORE_PATTERN,
406 		.procname	= "core_pattern",
407 		.data		= core_pattern,
408 		.maxlen		= CORENAME_MAX_SIZE,
409 		.mode		= 0644,
410 		.proc_handler	= &proc_dostring,
411 		.strategy	= &sysctl_string,
412 	},
413 #ifdef CONFIG_PROC_SYSCTL
414 	{
415 		.procname	= "tainted",
416 		.maxlen 	= sizeof(long),
417 		.mode		= 0644,
418 		.proc_handler	= &proc_taint,
419 	},
420 #endif
421 #ifdef CONFIG_LATENCYTOP
422 	{
423 		.procname	= "latencytop",
424 		.data		= &latencytop_enabled,
425 		.maxlen		= sizeof(int),
426 		.mode		= 0644,
427 		.proc_handler	= &proc_dointvec,
428 	},
429 #endif
430 #ifdef CONFIG_BLK_DEV_INITRD
431 	{
432 		.ctl_name	= KERN_REALROOTDEV,
433 		.procname	= "real-root-dev",
434 		.data		= &real_root_dev,
435 		.maxlen		= sizeof(int),
436 		.mode		= 0644,
437 		.proc_handler	= &proc_dointvec,
438 	},
439 #endif
440 	{
441 		.ctl_name	= CTL_UNNUMBERED,
442 		.procname	= "print-fatal-signals",
443 		.data		= &print_fatal_signals,
444 		.maxlen		= sizeof(int),
445 		.mode		= 0644,
446 		.proc_handler	= &proc_dointvec,
447 	},
448 #ifdef CONFIG_SPARC
449 	{
450 		.ctl_name	= KERN_SPARC_REBOOT,
451 		.procname	= "reboot-cmd",
452 		.data		= reboot_command,
453 		.maxlen		= 256,
454 		.mode		= 0644,
455 		.proc_handler	= &proc_dostring,
456 		.strategy	= &sysctl_string,
457 	},
458 	{
459 		.ctl_name	= KERN_SPARC_STOP_A,
460 		.procname	= "stop-a",
461 		.data		= &stop_a_enabled,
462 		.maxlen		= sizeof (int),
463 		.mode		= 0644,
464 		.proc_handler	= &proc_dointvec,
465 	},
466 	{
467 		.ctl_name	= KERN_SPARC_SCONS_PWROFF,
468 		.procname	= "scons-poweroff",
469 		.data		= &scons_pwroff,
470 		.maxlen		= sizeof (int),
471 		.mode		= 0644,
472 		.proc_handler	= &proc_dointvec,
473 	},
474 #endif
475 #ifdef CONFIG_SPARC64
476 	{
477 		.ctl_name	= CTL_UNNUMBERED,
478 		.procname	= "tsb-ratio",
479 		.data		= &sysctl_tsb_ratio,
480 		.maxlen		= sizeof (int),
481 		.mode		= 0644,
482 		.proc_handler	= &proc_dointvec,
483 	},
484 #endif
485 #ifdef __hppa__
486 	{
487 		.ctl_name	= KERN_HPPA_PWRSW,
488 		.procname	= "soft-power",
489 		.data		= &pwrsw_enabled,
490 		.maxlen		= sizeof (int),
491 	 	.mode		= 0644,
492 		.proc_handler	= &proc_dointvec,
493 	},
494 	{
495 		.ctl_name	= KERN_HPPA_UNALIGNED,
496 		.procname	= "unaligned-trap",
497 		.data		= &unaligned_enabled,
498 		.maxlen		= sizeof (int),
499 		.mode		= 0644,
500 		.proc_handler	= &proc_dointvec,
501 	},
502 #endif
503 	{
504 		.ctl_name	= KERN_CTLALTDEL,
505 		.procname	= "ctrl-alt-del",
506 		.data		= &C_A_D,
507 		.maxlen		= sizeof(int),
508 		.mode		= 0644,
509 		.proc_handler	= &proc_dointvec,
510 	},
511 #ifdef CONFIG_FUNCTION_TRACER
512 	{
513 		.ctl_name	= CTL_UNNUMBERED,
514 		.procname	= "ftrace_enabled",
515 		.data		= &ftrace_enabled,
516 		.maxlen		= sizeof(int),
517 		.mode		= 0644,
518 		.proc_handler	= &ftrace_enable_sysctl,
519 	},
520 #endif
521 #ifdef CONFIG_STACK_TRACER
522 	{
523 		.ctl_name	= CTL_UNNUMBERED,
524 		.procname	= "stack_tracer_enabled",
525 		.data		= &stack_tracer_enabled,
526 		.maxlen		= sizeof(int),
527 		.mode		= 0644,
528 		.proc_handler	= &stack_trace_sysctl,
529 	},
530 #endif
531 #ifdef CONFIG_TRACING
532 	{
533 		.ctl_name	= CTL_UNNUMBERED,
534 		.procname	= "ftrace_dump_on_oops",
535 		.data		= &ftrace_dump_on_oops,
536 		.maxlen		= sizeof(int),
537 		.mode		= 0644,
538 		.proc_handler	= &proc_dointvec,
539 	},
540 #endif
541 #ifdef CONFIG_MODULES
542 	{
543 		.ctl_name	= KERN_MODPROBE,
544 		.procname	= "modprobe",
545 		.data		= &modprobe_path,
546 		.maxlen		= KMOD_PATH_LEN,
547 		.mode		= 0644,
548 		.proc_handler	= &proc_dostring,
549 		.strategy	= &sysctl_string,
550 	},
551 	{
552 		.ctl_name	= CTL_UNNUMBERED,
553 		.procname	= "modules_disabled",
554 		.data		= &modules_disabled,
555 		.maxlen		= sizeof(int),
556 		.mode		= 0644,
557 		/* only handle a transition from default "0" to "1" */
558 		.proc_handler	= &proc_dointvec_minmax,
559 		.extra1		= &one,
560 		.extra2		= &one,
561 	},
562 #endif
563 #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
564 	{
565 		.ctl_name	= KERN_HOTPLUG,
566 		.procname	= "hotplug",
567 		.data		= &uevent_helper,
568 		.maxlen		= UEVENT_HELPER_PATH_LEN,
569 		.mode		= 0644,
570 		.proc_handler	= &proc_dostring,
571 		.strategy	= &sysctl_string,
572 	},
573 #endif
574 #ifdef CONFIG_CHR_DEV_SG
575 	{
576 		.ctl_name	= KERN_SG_BIG_BUFF,
577 		.procname	= "sg-big-buff",
578 		.data		= &sg_big_buff,
579 		.maxlen		= sizeof (int),
580 		.mode		= 0444,
581 		.proc_handler	= &proc_dointvec,
582 	},
583 #endif
584 #ifdef CONFIG_BSD_PROCESS_ACCT
585 	{
586 		.ctl_name	= KERN_ACCT,
587 		.procname	= "acct",
588 		.data		= &acct_parm,
589 		.maxlen		= 3*sizeof(int),
590 		.mode		= 0644,
591 		.proc_handler	= &proc_dointvec,
592 	},
593 #endif
594 #ifdef CONFIG_MAGIC_SYSRQ
595 	{
596 		.ctl_name	= KERN_SYSRQ,
597 		.procname	= "sysrq",
598 		.data		= &__sysrq_enabled,
599 		.maxlen		= sizeof (int),
600 		.mode		= 0644,
601 		.proc_handler	= &proc_dointvec,
602 	},
603 #endif
604 #ifdef CONFIG_PROC_SYSCTL
605 	{
606 		.procname	= "cad_pid",
607 		.data		= NULL,
608 		.maxlen		= sizeof (int),
609 		.mode		= 0600,
610 		.proc_handler	= &proc_do_cad_pid,
611 	},
612 #endif
613 	{
614 		.ctl_name	= KERN_MAX_THREADS,
615 		.procname	= "threads-max",
616 		.data		= &max_threads,
617 		.maxlen		= sizeof(int),
618 		.mode		= 0644,
619 		.proc_handler	= &proc_dointvec,
620 	},
621 	{
622 		.ctl_name	= KERN_RANDOM,
623 		.procname	= "random",
624 		.mode		= 0555,
625 		.child		= random_table,
626 	},
627 	{
628 		.ctl_name	= KERN_OVERFLOWUID,
629 		.procname	= "overflowuid",
630 		.data		= &overflowuid,
631 		.maxlen		= sizeof(int),
632 		.mode		= 0644,
633 		.proc_handler	= &proc_dointvec_minmax,
634 		.strategy	= &sysctl_intvec,
635 		.extra1		= &minolduid,
636 		.extra2		= &maxolduid,
637 	},
638 	{
639 		.ctl_name	= KERN_OVERFLOWGID,
640 		.procname	= "overflowgid",
641 		.data		= &overflowgid,
642 		.maxlen		= sizeof(int),
643 		.mode		= 0644,
644 		.proc_handler	= &proc_dointvec_minmax,
645 		.strategy	= &sysctl_intvec,
646 		.extra1		= &minolduid,
647 		.extra2		= &maxolduid,
648 	},
649 #ifdef CONFIG_S390
650 #ifdef CONFIG_MATHEMU
651 	{
652 		.ctl_name	= KERN_IEEE_EMULATION_WARNINGS,
653 		.procname	= "ieee_emulation_warnings",
654 		.data		= &sysctl_ieee_emulation_warnings,
655 		.maxlen		= sizeof(int),
656 		.mode		= 0644,
657 		.proc_handler	= &proc_dointvec,
658 	},
659 #endif
660 	{
661 		.ctl_name	= KERN_S390_USER_DEBUG_LOGGING,
662 		.procname	= "userprocess_debug",
663 		.data		= &sysctl_userprocess_debug,
664 		.maxlen		= sizeof(int),
665 		.mode		= 0644,
666 		.proc_handler	= &proc_dointvec,
667 	},
668 #endif
669 	{
670 		.ctl_name	= KERN_PIDMAX,
671 		.procname	= "pid_max",
672 		.data		= &pid_max,
673 		.maxlen		= sizeof (int),
674 		.mode		= 0644,
675 		.proc_handler	= &proc_dointvec_minmax,
676 		.strategy	= sysctl_intvec,
677 		.extra1		= &pid_max_min,
678 		.extra2		= &pid_max_max,
679 	},
680 	{
681 		.ctl_name	= KERN_PANIC_ON_OOPS,
682 		.procname	= "panic_on_oops",
683 		.data		= &panic_on_oops,
684 		.maxlen		= sizeof(int),
685 		.mode		= 0644,
686 		.proc_handler	= &proc_dointvec,
687 	},
688 #if defined CONFIG_PRINTK
689 	{
690 		.ctl_name	= KERN_PRINTK,
691 		.procname	= "printk",
692 		.data		= &console_loglevel,
693 		.maxlen		= 4*sizeof(int),
694 		.mode		= 0644,
695 		.proc_handler	= &proc_dointvec,
696 	},
697 	{
698 		.ctl_name	= KERN_PRINTK_RATELIMIT,
699 		.procname	= "printk_ratelimit",
700 		.data		= &printk_ratelimit_state.interval,
701 		.maxlen		= sizeof(int),
702 		.mode		= 0644,
703 		.proc_handler	= &proc_dointvec_jiffies,
704 		.strategy	= &sysctl_jiffies,
705 	},
706 	{
707 		.ctl_name	= KERN_PRINTK_RATELIMIT_BURST,
708 		.procname	= "printk_ratelimit_burst",
709 		.data		= &printk_ratelimit_state.burst,
710 		.maxlen		= sizeof(int),
711 		.mode		= 0644,
712 		.proc_handler	= &proc_dointvec,
713 	},
714 #endif
715 	{
716 		.ctl_name	= KERN_NGROUPS_MAX,
717 		.procname	= "ngroups_max",
718 		.data		= &ngroups_max,
719 		.maxlen		= sizeof (int),
720 		.mode		= 0444,
721 		.proc_handler	= &proc_dointvec,
722 	},
723 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
724 	{
725 		.ctl_name       = KERN_UNKNOWN_NMI_PANIC,
726 		.procname       = "unknown_nmi_panic",
727 		.data           = &unknown_nmi_panic,
728 		.maxlen         = sizeof (int),
729 		.mode           = 0644,
730 		.proc_handler   = &proc_dointvec,
731 	},
732 	{
733 		.procname       = "nmi_watchdog",
734 		.data           = &nmi_watchdog_enabled,
735 		.maxlen         = sizeof (int),
736 		.mode           = 0644,
737 		.proc_handler   = &proc_nmi_enabled,
738 	},
739 #endif
740 #if defined(CONFIG_X86)
741 	{
742 		.ctl_name	= KERN_PANIC_ON_NMI,
743 		.procname	= "panic_on_unrecovered_nmi",
744 		.data		= &panic_on_unrecovered_nmi,
745 		.maxlen		= sizeof(int),
746 		.mode		= 0644,
747 		.proc_handler	= &proc_dointvec,
748 	},
749 	{
750 		.ctl_name	= KERN_BOOTLOADER_TYPE,
751 		.procname	= "bootloader_type",
752 		.data		= &bootloader_type,
753 		.maxlen		= sizeof (int),
754 		.mode		= 0444,
755 		.proc_handler	= &proc_dointvec,
756 	},
757 	{
758 		.ctl_name	= CTL_UNNUMBERED,
759 		.procname	= "bootloader_version",
760 		.data		= &bootloader_version,
761 		.maxlen		= sizeof (int),
762 		.mode		= 0444,
763 		.proc_handler	= &proc_dointvec,
764 	},
765 	{
766 		.ctl_name	= CTL_UNNUMBERED,
767 		.procname	= "kstack_depth_to_print",
768 		.data		= &kstack_depth_to_print,
769 		.maxlen		= sizeof(int),
770 		.mode		= 0644,
771 		.proc_handler	= &proc_dointvec,
772 	},
773 	{
774 		.ctl_name	= CTL_UNNUMBERED,
775 		.procname	= "io_delay_type",
776 		.data		= &io_delay_type,
777 		.maxlen		= sizeof(int),
778 		.mode		= 0644,
779 		.proc_handler	= &proc_dointvec,
780 	},
781 #endif
782 #if defined(CONFIG_MMU)
783 	{
784 		.ctl_name	= KERN_RANDOMIZE,
785 		.procname	= "randomize_va_space",
786 		.data		= &randomize_va_space,
787 		.maxlen		= sizeof(int),
788 		.mode		= 0644,
789 		.proc_handler	= &proc_dointvec,
790 	},
791 #endif
792 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
793 	{
794 		.ctl_name	= KERN_SPIN_RETRY,
795 		.procname	= "spin_retry",
796 		.data		= &spin_retry,
797 		.maxlen		= sizeof (int),
798 		.mode		= 0644,
799 		.proc_handler	= &proc_dointvec,
800 	},
801 #endif
802 #if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
803 	{
804 		.procname	= "acpi_video_flags",
805 		.data		= &acpi_realmode_flags,
806 		.maxlen		= sizeof (unsigned long),
807 		.mode		= 0644,
808 		.proc_handler	= &proc_doulongvec_minmax,
809 	},
810 #endif
811 #ifdef CONFIG_IA64
812 	{
813 		.ctl_name	= KERN_IA64_UNALIGNED,
814 		.procname	= "ignore-unaligned-usertrap",
815 		.data		= &no_unaligned_warning,
816 		.maxlen		= sizeof (int),
817 	 	.mode		= 0644,
818 		.proc_handler	= &proc_dointvec,
819 	},
820 	{
821 		.ctl_name	= CTL_UNNUMBERED,
822 		.procname	= "unaligned-dump-stack",
823 		.data		= &unaligned_dump_stack,
824 		.maxlen		= sizeof (int),
825 		.mode		= 0644,
826 		.proc_handler	= &proc_dointvec,
827 	},
828 #endif
829 #ifdef CONFIG_DETECT_SOFTLOCKUP
830 	{
831 		.ctl_name	= CTL_UNNUMBERED,
832 		.procname	= "softlockup_panic",
833 		.data		= &softlockup_panic,
834 		.maxlen		= sizeof(int),
835 		.mode		= 0644,
836 		.proc_handler	= &proc_dointvec_minmax,
837 		.strategy	= &sysctl_intvec,
838 		.extra1		= &zero,
839 		.extra2		= &one,
840 	},
841 	{
842 		.ctl_name	= CTL_UNNUMBERED,
843 		.procname	= "softlockup_thresh",
844 		.data		= &softlockup_thresh,
845 		.maxlen		= sizeof(int),
846 		.mode		= 0644,
847 		.proc_handler	= &proc_dosoftlockup_thresh,
848 		.strategy	= &sysctl_intvec,
849 		.extra1		= &neg_one,
850 		.extra2		= &sixty,
851 	},
852 #endif
853 #ifdef CONFIG_DETECT_HUNG_TASK
854 	{
855 		.ctl_name	= CTL_UNNUMBERED,
856 		.procname	= "hung_task_panic",
857 		.data		= &sysctl_hung_task_panic,
858 		.maxlen		= sizeof(int),
859 		.mode		= 0644,
860 		.proc_handler	= &proc_dointvec_minmax,
861 		.strategy	= &sysctl_intvec,
862 		.extra1		= &zero,
863 		.extra2		= &one,
864 	},
865 	{
866 		.ctl_name	= CTL_UNNUMBERED,
867 		.procname	= "hung_task_check_count",
868 		.data		= &sysctl_hung_task_check_count,
869 		.maxlen		= sizeof(unsigned long),
870 		.mode		= 0644,
871 		.proc_handler	= &proc_doulongvec_minmax,
872 		.strategy	= &sysctl_intvec,
873 	},
874 	{
875 		.ctl_name	= CTL_UNNUMBERED,
876 		.procname	= "hung_task_timeout_secs",
877 		.data		= &sysctl_hung_task_timeout_secs,
878 		.maxlen		= sizeof(unsigned long),
879 		.mode		= 0644,
880 		.proc_handler	= &proc_dohung_task_timeout_secs,
881 		.strategy	= &sysctl_intvec,
882 	},
883 	{
884 		.ctl_name	= CTL_UNNUMBERED,
885 		.procname	= "hung_task_warnings",
886 		.data		= &sysctl_hung_task_warnings,
887 		.maxlen		= sizeof(unsigned long),
888 		.mode		= 0644,
889 		.proc_handler	= &proc_doulongvec_minmax,
890 		.strategy	= &sysctl_intvec,
891 	},
892 #endif
893 #ifdef CONFIG_COMPAT
894 	{
895 		.ctl_name	= KERN_COMPAT_LOG,
896 		.procname	= "compat-log",
897 		.data		= &compat_log,
898 		.maxlen		= sizeof (int),
899 	 	.mode		= 0644,
900 		.proc_handler	= &proc_dointvec,
901 	},
902 #endif
903 #ifdef CONFIG_RT_MUTEXES
904 	{
905 		.ctl_name	= KERN_MAX_LOCK_DEPTH,
906 		.procname	= "max_lock_depth",
907 		.data		= &max_lock_depth,
908 		.maxlen		= sizeof(int),
909 		.mode		= 0644,
910 		.proc_handler	= &proc_dointvec,
911 	},
912 #endif
913 	{
914 		.ctl_name	= CTL_UNNUMBERED,
915 		.procname	= "poweroff_cmd",
916 		.data		= &poweroff_cmd,
917 		.maxlen		= POWEROFF_CMD_PATH_LEN,
918 		.mode		= 0644,
919 		.proc_handler	= &proc_dostring,
920 		.strategy	= &sysctl_string,
921 	},
922 #ifdef CONFIG_KEYS
923 	{
924 		.ctl_name	= CTL_UNNUMBERED,
925 		.procname	= "keys",
926 		.mode		= 0555,
927 		.child		= key_sysctls,
928 	},
929 #endif
930 #ifdef CONFIG_RCU_TORTURE_TEST
931 	{
932 		.ctl_name       = CTL_UNNUMBERED,
933 		.procname       = "rcutorture_runnable",
934 		.data           = &rcutorture_runnable,
935 		.maxlen         = sizeof(int),
936 		.mode           = 0644,
937 		.proc_handler   = &proc_dointvec,
938 	},
939 #endif
940 #ifdef CONFIG_SLOW_WORK
941 	{
942 		.ctl_name	= CTL_UNNUMBERED,
943 		.procname	= "slow-work",
944 		.mode		= 0555,
945 		.child		= slow_work_sysctls,
946 	},
947 #endif
948 #ifdef CONFIG_PERF_COUNTERS
949 	{
950 		.ctl_name	= CTL_UNNUMBERED,
951 		.procname	= "perf_counter_paranoid",
952 		.data		= &sysctl_perf_counter_paranoid,
953 		.maxlen		= sizeof(sysctl_perf_counter_paranoid),
954 		.mode		= 0644,
955 		.proc_handler	= &proc_dointvec,
956 	},
957 	{
958 		.ctl_name	= CTL_UNNUMBERED,
959 		.procname	= "perf_counter_mlock_kb",
960 		.data		= &sysctl_perf_counter_mlock,
961 		.maxlen		= sizeof(sysctl_perf_counter_mlock),
962 		.mode		= 0644,
963 		.proc_handler	= &proc_dointvec,
964 	},
965 	{
966 		.ctl_name	= CTL_UNNUMBERED,
967 		.procname	= "perf_counter_max_sample_rate",
968 		.data		= &sysctl_perf_counter_sample_rate,
969 		.maxlen		= sizeof(sysctl_perf_counter_sample_rate),
970 		.mode		= 0644,
971 		.proc_handler	= &proc_dointvec,
972 	},
973 #endif
974 #ifdef CONFIG_KMEMCHECK
975 	{
976 		.ctl_name	= CTL_UNNUMBERED,
977 		.procname	= "kmemcheck",
978 		.data		= &kmemcheck_enabled,
979 		.maxlen		= sizeof(int),
980 		.mode		= 0644,
981 		.proc_handler	= &proc_dointvec,
982 	},
983 #endif
984 
985 /*
986  * NOTE: do not add new entries to this table unless you have read
987  * Documentation/sysctl/ctl_unnumbered.txt
988  */
989 	{ .ctl_name = 0 }
990 };
991 
992 static struct ctl_table vm_table[] = {
993 	{
994 		.ctl_name	= VM_OVERCOMMIT_MEMORY,
995 		.procname	= "overcommit_memory",
996 		.data		= &sysctl_overcommit_memory,
997 		.maxlen		= sizeof(sysctl_overcommit_memory),
998 		.mode		= 0644,
999 		.proc_handler	= &proc_dointvec,
1000 	},
1001 	{
1002 		.ctl_name	= VM_PANIC_ON_OOM,
1003 		.procname	= "panic_on_oom",
1004 		.data		= &sysctl_panic_on_oom,
1005 		.maxlen		= sizeof(sysctl_panic_on_oom),
1006 		.mode		= 0644,
1007 		.proc_handler	= &proc_dointvec,
1008 	},
1009 	{
1010 		.ctl_name	= CTL_UNNUMBERED,
1011 		.procname	= "oom_kill_allocating_task",
1012 		.data		= &sysctl_oom_kill_allocating_task,
1013 		.maxlen		= sizeof(sysctl_oom_kill_allocating_task),
1014 		.mode		= 0644,
1015 		.proc_handler	= &proc_dointvec,
1016 	},
1017 	{
1018 		.ctl_name	= CTL_UNNUMBERED,
1019 		.procname	= "oom_dump_tasks",
1020 		.data		= &sysctl_oom_dump_tasks,
1021 		.maxlen		= sizeof(sysctl_oom_dump_tasks),
1022 		.mode		= 0644,
1023 		.proc_handler	= &proc_dointvec,
1024 	},
1025 	{
1026 		.ctl_name	= VM_OVERCOMMIT_RATIO,
1027 		.procname	= "overcommit_ratio",
1028 		.data		= &sysctl_overcommit_ratio,
1029 		.maxlen		= sizeof(sysctl_overcommit_ratio),
1030 		.mode		= 0644,
1031 		.proc_handler	= &proc_dointvec,
1032 	},
1033 	{
1034 		.ctl_name	= VM_PAGE_CLUSTER,
1035 		.procname	= "page-cluster",
1036 		.data		= &page_cluster,
1037 		.maxlen		= sizeof(int),
1038 		.mode		= 0644,
1039 		.proc_handler	= &proc_dointvec,
1040 	},
1041 	{
1042 		.ctl_name	= VM_DIRTY_BACKGROUND,
1043 		.procname	= "dirty_background_ratio",
1044 		.data		= &dirty_background_ratio,
1045 		.maxlen		= sizeof(dirty_background_ratio),
1046 		.mode		= 0644,
1047 		.proc_handler	= &dirty_background_ratio_handler,
1048 		.strategy	= &sysctl_intvec,
1049 		.extra1		= &zero,
1050 		.extra2		= &one_hundred,
1051 	},
1052 	{
1053 		.ctl_name	= CTL_UNNUMBERED,
1054 		.procname	= "dirty_background_bytes",
1055 		.data		= &dirty_background_bytes,
1056 		.maxlen		= sizeof(dirty_background_bytes),
1057 		.mode		= 0644,
1058 		.proc_handler	= &dirty_background_bytes_handler,
1059 		.strategy	= &sysctl_intvec,
1060 		.extra1		= &one_ul,
1061 	},
1062 	{
1063 		.ctl_name	= VM_DIRTY_RATIO,
1064 		.procname	= "dirty_ratio",
1065 		.data		= &vm_dirty_ratio,
1066 		.maxlen		= sizeof(vm_dirty_ratio),
1067 		.mode		= 0644,
1068 		.proc_handler	= &dirty_ratio_handler,
1069 		.strategy	= &sysctl_intvec,
1070 		.extra1		= &zero,
1071 		.extra2		= &one_hundred,
1072 	},
1073 	{
1074 		.ctl_name	= CTL_UNNUMBERED,
1075 		.procname	= "dirty_bytes",
1076 		.data		= &vm_dirty_bytes,
1077 		.maxlen		= sizeof(vm_dirty_bytes),
1078 		.mode		= 0644,
1079 		.proc_handler	= &dirty_bytes_handler,
1080 		.strategy	= &sysctl_intvec,
1081 		.extra1		= &dirty_bytes_min,
1082 	},
1083 	{
1084 		.procname	= "dirty_writeback_centisecs",
1085 		.data		= &dirty_writeback_interval,
1086 		.maxlen		= sizeof(dirty_writeback_interval),
1087 		.mode		= 0644,
1088 		.proc_handler	= &dirty_writeback_centisecs_handler,
1089 	},
1090 	{
1091 		.procname	= "dirty_expire_centisecs",
1092 		.data		= &dirty_expire_interval,
1093 		.maxlen		= sizeof(dirty_expire_interval),
1094 		.mode		= 0644,
1095 		.proc_handler	= &proc_dointvec,
1096 	},
1097 	{
1098 		.ctl_name	= VM_NR_PDFLUSH_THREADS,
1099 		.procname	= "nr_pdflush_threads",
1100 		.data		= &nr_pdflush_threads,
1101 		.maxlen		= sizeof nr_pdflush_threads,
1102 		.mode		= 0444 /* read-only*/,
1103 		.proc_handler	= &proc_dointvec,
1104 	},
1105 	{
1106 		.ctl_name	= VM_SWAPPINESS,
1107 		.procname	= "swappiness",
1108 		.data		= &vm_swappiness,
1109 		.maxlen		= sizeof(vm_swappiness),
1110 		.mode		= 0644,
1111 		.proc_handler	= &proc_dointvec_minmax,
1112 		.strategy	= &sysctl_intvec,
1113 		.extra1		= &zero,
1114 		.extra2		= &one_hundred,
1115 	},
1116 #ifdef CONFIG_HUGETLB_PAGE
1117 	 {
1118 		.procname	= "nr_hugepages",
1119 		.data		= NULL,
1120 		.maxlen		= sizeof(unsigned long),
1121 		.mode		= 0644,
1122 		.proc_handler	= &hugetlb_sysctl_handler,
1123 		.extra1		= (void *)&hugetlb_zero,
1124 		.extra2		= (void *)&hugetlb_infinity,
1125 	 },
1126 	 {
1127 		.ctl_name	= VM_HUGETLB_GROUP,
1128 		.procname	= "hugetlb_shm_group",
1129 		.data		= &sysctl_hugetlb_shm_group,
1130 		.maxlen		= sizeof(gid_t),
1131 		.mode		= 0644,
1132 		.proc_handler	= &proc_dointvec,
1133 	 },
1134 	 {
1135 		.ctl_name	= CTL_UNNUMBERED,
1136 		.procname	= "hugepages_treat_as_movable",
1137 		.data		= &hugepages_treat_as_movable,
1138 		.maxlen		= sizeof(int),
1139 		.mode		= 0644,
1140 		.proc_handler	= &hugetlb_treat_movable_handler,
1141 	},
1142 	{
1143 		.ctl_name	= CTL_UNNUMBERED,
1144 		.procname	= "nr_overcommit_hugepages",
1145 		.data		= NULL,
1146 		.maxlen		= sizeof(unsigned long),
1147 		.mode		= 0644,
1148 		.proc_handler	= &hugetlb_overcommit_handler,
1149 		.extra1		= (void *)&hugetlb_zero,
1150 		.extra2		= (void *)&hugetlb_infinity,
1151 	},
1152 #endif
1153 	{
1154 		.ctl_name	= VM_LOWMEM_RESERVE_RATIO,
1155 		.procname	= "lowmem_reserve_ratio",
1156 		.data		= &sysctl_lowmem_reserve_ratio,
1157 		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
1158 		.mode		= 0644,
1159 		.proc_handler	= &lowmem_reserve_ratio_sysctl_handler,
1160 		.strategy	= &sysctl_intvec,
1161 	},
1162 	{
1163 		.ctl_name	= VM_DROP_PAGECACHE,
1164 		.procname	= "drop_caches",
1165 		.data		= &sysctl_drop_caches,
1166 		.maxlen		= sizeof(int),
1167 		.mode		= 0644,
1168 		.proc_handler	= drop_caches_sysctl_handler,
1169 		.strategy	= &sysctl_intvec,
1170 	},
1171 	{
1172 		.ctl_name	= VM_MIN_FREE_KBYTES,
1173 		.procname	= "min_free_kbytes",
1174 		.data		= &min_free_kbytes,
1175 		.maxlen		= sizeof(min_free_kbytes),
1176 		.mode		= 0644,
1177 		.proc_handler	= &min_free_kbytes_sysctl_handler,
1178 		.strategy	= &sysctl_intvec,
1179 		.extra1		= &zero,
1180 	},
1181 	{
1182 		.ctl_name	= VM_PERCPU_PAGELIST_FRACTION,
1183 		.procname	= "percpu_pagelist_fraction",
1184 		.data		= &percpu_pagelist_fraction,
1185 		.maxlen		= sizeof(percpu_pagelist_fraction),
1186 		.mode		= 0644,
1187 		.proc_handler	= &percpu_pagelist_fraction_sysctl_handler,
1188 		.strategy	= &sysctl_intvec,
1189 		.extra1		= &min_percpu_pagelist_fract,
1190 	},
1191 #ifdef CONFIG_MMU
1192 	{
1193 		.ctl_name	= VM_MAX_MAP_COUNT,
1194 		.procname	= "max_map_count",
1195 		.data		= &sysctl_max_map_count,
1196 		.maxlen		= sizeof(sysctl_max_map_count),
1197 		.mode		= 0644,
1198 		.proc_handler	= &proc_dointvec
1199 	},
1200 #else
1201 	{
1202 		.ctl_name	= CTL_UNNUMBERED,
1203 		.procname	= "nr_trim_pages",
1204 		.data		= &sysctl_nr_trim_pages,
1205 		.maxlen		= sizeof(sysctl_nr_trim_pages),
1206 		.mode		= 0644,
1207 		.proc_handler	= &proc_dointvec_minmax,
1208 		.strategy	= &sysctl_intvec,
1209 		.extra1		= &zero,
1210 	},
1211 #endif
1212 	{
1213 		.ctl_name	= VM_LAPTOP_MODE,
1214 		.procname	= "laptop_mode",
1215 		.data		= &laptop_mode,
1216 		.maxlen		= sizeof(laptop_mode),
1217 		.mode		= 0644,
1218 		.proc_handler	= &proc_dointvec_jiffies,
1219 		.strategy	= &sysctl_jiffies,
1220 	},
1221 	{
1222 		.ctl_name	= VM_BLOCK_DUMP,
1223 		.procname	= "block_dump",
1224 		.data		= &block_dump,
1225 		.maxlen		= sizeof(block_dump),
1226 		.mode		= 0644,
1227 		.proc_handler	= &proc_dointvec,
1228 		.strategy	= &sysctl_intvec,
1229 		.extra1		= &zero,
1230 	},
1231 	{
1232 		.ctl_name	= VM_VFS_CACHE_PRESSURE,
1233 		.procname	= "vfs_cache_pressure",
1234 		.data		= &sysctl_vfs_cache_pressure,
1235 		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
1236 		.mode		= 0644,
1237 		.proc_handler	= &proc_dointvec,
1238 		.strategy	= &sysctl_intvec,
1239 		.extra1		= &zero,
1240 	},
1241 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1242 	{
1243 		.ctl_name	= VM_LEGACY_VA_LAYOUT,
1244 		.procname	= "legacy_va_layout",
1245 		.data		= &sysctl_legacy_va_layout,
1246 		.maxlen		= sizeof(sysctl_legacy_va_layout),
1247 		.mode		= 0644,
1248 		.proc_handler	= &proc_dointvec,
1249 		.strategy	= &sysctl_intvec,
1250 		.extra1		= &zero,
1251 	},
1252 #endif
1253 #ifdef CONFIG_NUMA
1254 	{
1255 		.ctl_name	= VM_ZONE_RECLAIM_MODE,
1256 		.procname	= "zone_reclaim_mode",
1257 		.data		= &zone_reclaim_mode,
1258 		.maxlen		= sizeof(zone_reclaim_mode),
1259 		.mode		= 0644,
1260 		.proc_handler	= &proc_dointvec,
1261 		.strategy	= &sysctl_intvec,
1262 		.extra1		= &zero,
1263 	},
1264 	{
1265 		.ctl_name	= VM_MIN_UNMAPPED,
1266 		.procname	= "min_unmapped_ratio",
1267 		.data		= &sysctl_min_unmapped_ratio,
1268 		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
1269 		.mode		= 0644,
1270 		.proc_handler	= &sysctl_min_unmapped_ratio_sysctl_handler,
1271 		.strategy	= &sysctl_intvec,
1272 		.extra1		= &zero,
1273 		.extra2		= &one_hundred,
1274 	},
1275 	{
1276 		.ctl_name	= VM_MIN_SLAB,
1277 		.procname	= "min_slab_ratio",
1278 		.data		= &sysctl_min_slab_ratio,
1279 		.maxlen		= sizeof(sysctl_min_slab_ratio),
1280 		.mode		= 0644,
1281 		.proc_handler	= &sysctl_min_slab_ratio_sysctl_handler,
1282 		.strategy	= &sysctl_intvec,
1283 		.extra1		= &zero,
1284 		.extra2		= &one_hundred,
1285 	},
1286 #endif
1287 #ifdef CONFIG_SMP
1288 	{
1289 		.ctl_name	= CTL_UNNUMBERED,
1290 		.procname	= "stat_interval",
1291 		.data		= &sysctl_stat_interval,
1292 		.maxlen		= sizeof(sysctl_stat_interval),
1293 		.mode		= 0644,
1294 		.proc_handler	= &proc_dointvec_jiffies,
1295 		.strategy	= &sysctl_jiffies,
1296 	},
1297 #endif
1298 	{
1299 		.ctl_name	= CTL_UNNUMBERED,
1300 		.procname	= "mmap_min_addr",
1301 		.data		= &mmap_min_addr,
1302 		.maxlen         = sizeof(unsigned long),
1303 		.mode		= 0644,
1304 		.proc_handler	= &proc_doulongvec_minmax,
1305 	},
1306 #ifdef CONFIG_NUMA
1307 	{
1308 		.ctl_name	= CTL_UNNUMBERED,
1309 		.procname	= "numa_zonelist_order",
1310 		.data		= &numa_zonelist_order,
1311 		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
1312 		.mode		= 0644,
1313 		.proc_handler	= &numa_zonelist_order_handler,
1314 		.strategy	= &sysctl_string,
1315 	},
1316 #endif
1317 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1318    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1319 	{
1320 		.ctl_name	= VM_VDSO_ENABLED,
1321 		.procname	= "vdso_enabled",
1322 		.data		= &vdso_enabled,
1323 		.maxlen		= sizeof(vdso_enabled),
1324 		.mode		= 0644,
1325 		.proc_handler	= &proc_dointvec,
1326 		.strategy	= &sysctl_intvec,
1327 		.extra1		= &zero,
1328 	},
1329 #endif
1330 #ifdef CONFIG_HIGHMEM
1331 	{
1332 		.ctl_name	= CTL_UNNUMBERED,
1333 		.procname	= "highmem_is_dirtyable",
1334 		.data		= &vm_highmem_is_dirtyable,
1335 		.maxlen		= sizeof(vm_highmem_is_dirtyable),
1336 		.mode		= 0644,
1337 		.proc_handler	= &proc_dointvec_minmax,
1338 		.strategy	= &sysctl_intvec,
1339 		.extra1		= &zero,
1340 		.extra2		= &one,
1341 	},
1342 #endif
1343 	{
1344 		.ctl_name	= CTL_UNNUMBERED,
1345 		.procname	= "scan_unevictable_pages",
1346 		.data		= &scan_unevictable_pages,
1347 		.maxlen		= sizeof(scan_unevictable_pages),
1348 		.mode		= 0644,
1349 		.proc_handler	= &scan_unevictable_handler,
1350 	},
1351 /*
1352  * NOTE: do not add new entries to this table unless you have read
1353  * Documentation/sysctl/ctl_unnumbered.txt
1354  */
1355 	{ .ctl_name = 0 }
1356 };
1357 
1358 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1359 static struct ctl_table binfmt_misc_table[] = {
1360 	{ .ctl_name = 0 }
1361 };
1362 #endif
1363 
1364 static struct ctl_table fs_table[] = {
1365 	{
1366 		.ctl_name	= FS_NRINODE,
1367 		.procname	= "inode-nr",
1368 		.data		= &inodes_stat,
1369 		.maxlen		= 2*sizeof(int),
1370 		.mode		= 0444,
1371 		.proc_handler	= &proc_dointvec,
1372 	},
1373 	{
1374 		.ctl_name	= FS_STATINODE,
1375 		.procname	= "inode-state",
1376 		.data		= &inodes_stat,
1377 		.maxlen		= 7*sizeof(int),
1378 		.mode		= 0444,
1379 		.proc_handler	= &proc_dointvec,
1380 	},
1381 	{
1382 		.procname	= "file-nr",
1383 		.data		= &files_stat,
1384 		.maxlen		= 3*sizeof(int),
1385 		.mode		= 0444,
1386 		.proc_handler	= &proc_nr_files,
1387 	},
1388 	{
1389 		.ctl_name	= FS_MAXFILE,
1390 		.procname	= "file-max",
1391 		.data		= &files_stat.max_files,
1392 		.maxlen		= sizeof(int),
1393 		.mode		= 0644,
1394 		.proc_handler	= &proc_dointvec,
1395 	},
1396 	{
1397 		.ctl_name	= CTL_UNNUMBERED,
1398 		.procname	= "nr_open",
1399 		.data		= &sysctl_nr_open,
1400 		.maxlen		= sizeof(int),
1401 		.mode		= 0644,
1402 		.proc_handler	= &proc_dointvec_minmax,
1403 		.extra1		= &sysctl_nr_open_min,
1404 		.extra2		= &sysctl_nr_open_max,
1405 	},
1406 	{
1407 		.ctl_name	= FS_DENTRY,
1408 		.procname	= "dentry-state",
1409 		.data		= &dentry_stat,
1410 		.maxlen		= 6*sizeof(int),
1411 		.mode		= 0444,
1412 		.proc_handler	= &proc_dointvec,
1413 	},
1414 	{
1415 		.ctl_name	= FS_OVERFLOWUID,
1416 		.procname	= "overflowuid",
1417 		.data		= &fs_overflowuid,
1418 		.maxlen		= sizeof(int),
1419 		.mode		= 0644,
1420 		.proc_handler	= &proc_dointvec_minmax,
1421 		.strategy	= &sysctl_intvec,
1422 		.extra1		= &minolduid,
1423 		.extra2		= &maxolduid,
1424 	},
1425 	{
1426 		.ctl_name	= FS_OVERFLOWGID,
1427 		.procname	= "overflowgid",
1428 		.data		= &fs_overflowgid,
1429 		.maxlen		= sizeof(int),
1430 		.mode		= 0644,
1431 		.proc_handler	= &proc_dointvec_minmax,
1432 		.strategy	= &sysctl_intvec,
1433 		.extra1		= &minolduid,
1434 		.extra2		= &maxolduid,
1435 	},
1436 #ifdef CONFIG_FILE_LOCKING
1437 	{
1438 		.ctl_name	= FS_LEASES,
1439 		.procname	= "leases-enable",
1440 		.data		= &leases_enable,
1441 		.maxlen		= sizeof(int),
1442 		.mode		= 0644,
1443 		.proc_handler	= &proc_dointvec,
1444 	},
1445 #endif
1446 #ifdef CONFIG_DNOTIFY
1447 	{
1448 		.ctl_name	= FS_DIR_NOTIFY,
1449 		.procname	= "dir-notify-enable",
1450 		.data		= &dir_notify_enable,
1451 		.maxlen		= sizeof(int),
1452 		.mode		= 0644,
1453 		.proc_handler	= &proc_dointvec,
1454 	},
1455 #endif
1456 #ifdef CONFIG_MMU
1457 #ifdef CONFIG_FILE_LOCKING
1458 	{
1459 		.ctl_name	= FS_LEASE_TIME,
1460 		.procname	= "lease-break-time",
1461 		.data		= &lease_break_time,
1462 		.maxlen		= sizeof(int),
1463 		.mode		= 0644,
1464 		.proc_handler	= &proc_dointvec,
1465 	},
1466 #endif
1467 #ifdef CONFIG_AIO
1468 	{
1469 		.procname	= "aio-nr",
1470 		.data		= &aio_nr,
1471 		.maxlen		= sizeof(aio_nr),
1472 		.mode		= 0444,
1473 		.proc_handler	= &proc_doulongvec_minmax,
1474 	},
1475 	{
1476 		.procname	= "aio-max-nr",
1477 		.data		= &aio_max_nr,
1478 		.maxlen		= sizeof(aio_max_nr),
1479 		.mode		= 0644,
1480 		.proc_handler	= &proc_doulongvec_minmax,
1481 	},
1482 #endif /* CONFIG_AIO */
1483 #ifdef CONFIG_INOTIFY_USER
1484 	{
1485 		.ctl_name	= FS_INOTIFY,
1486 		.procname	= "inotify",
1487 		.mode		= 0555,
1488 		.child		= inotify_table,
1489 	},
1490 #endif
1491 #ifdef CONFIG_EPOLL
1492 	{
1493 		.procname	= "epoll",
1494 		.mode		= 0555,
1495 		.child		= epoll_table,
1496 	},
1497 #endif
1498 #endif
1499 	{
1500 		.ctl_name	= KERN_SETUID_DUMPABLE,
1501 		.procname	= "suid_dumpable",
1502 		.data		= &suid_dumpable,
1503 		.maxlen		= sizeof(int),
1504 		.mode		= 0644,
1505 		.proc_handler	= &proc_dointvec_minmax,
1506 		.strategy	= &sysctl_intvec,
1507 		.extra1		= &zero,
1508 		.extra2		= &two,
1509 	},
1510 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1511 	{
1512 		.ctl_name	= CTL_UNNUMBERED,
1513 		.procname	= "binfmt_misc",
1514 		.mode		= 0555,
1515 		.child		= binfmt_misc_table,
1516 	},
1517 #endif
1518 /*
1519  * NOTE: do not add new entries to this table unless you have read
1520  * Documentation/sysctl/ctl_unnumbered.txt
1521  */
1522 	{ .ctl_name = 0 }
1523 };
1524 
1525 static struct ctl_table debug_table[] = {
1526 #if defined(CONFIG_X86) || defined(CONFIG_PPC)
1527 	{
1528 		.ctl_name	= CTL_UNNUMBERED,
1529 		.procname	= "exception-trace",
1530 		.data		= &show_unhandled_signals,
1531 		.maxlen		= sizeof(int),
1532 		.mode		= 0644,
1533 		.proc_handler	= proc_dointvec
1534 	},
1535 #endif
1536 	{ .ctl_name = 0 }
1537 };
1538 
1539 static struct ctl_table dev_table[] = {
1540 	{ .ctl_name = 0 }
1541 };
1542 
1543 static DEFINE_SPINLOCK(sysctl_lock);
1544 
1545 /* called under sysctl_lock */
1546 static int use_table(struct ctl_table_header *p)
1547 {
1548 	if (unlikely(p->unregistering))
1549 		return 0;
1550 	p->used++;
1551 	return 1;
1552 }
1553 
1554 /* called under sysctl_lock */
1555 static void unuse_table(struct ctl_table_header *p)
1556 {
1557 	if (!--p->used)
1558 		if (unlikely(p->unregistering))
1559 			complete(p->unregistering);
1560 }
1561 
1562 /* called under sysctl_lock, will reacquire if has to wait */
1563 static void start_unregistering(struct ctl_table_header *p)
1564 {
1565 	/*
1566 	 * if p->used is 0, nobody will ever touch that entry again;
1567 	 * we'll eliminate all paths to it before dropping sysctl_lock
1568 	 */
1569 	if (unlikely(p->used)) {
1570 		struct completion wait;
1571 		init_completion(&wait);
1572 		p->unregistering = &wait;
1573 		spin_unlock(&sysctl_lock);
1574 		wait_for_completion(&wait);
1575 		spin_lock(&sysctl_lock);
1576 	} else {
1577 		/* anything non-NULL; we'll never dereference it */
1578 		p->unregistering = ERR_PTR(-EINVAL);
1579 	}
1580 	/*
1581 	 * do not remove from the list until nobody holds it; walking the
1582 	 * list in do_sysctl() relies on that.
1583 	 */
1584 	list_del_init(&p->ctl_entry);
1585 }
1586 
1587 void sysctl_head_get(struct ctl_table_header *head)
1588 {
1589 	spin_lock(&sysctl_lock);
1590 	head->count++;
1591 	spin_unlock(&sysctl_lock);
1592 }
1593 
1594 void sysctl_head_put(struct ctl_table_header *head)
1595 {
1596 	spin_lock(&sysctl_lock);
1597 	if (!--head->count)
1598 		kfree(head);
1599 	spin_unlock(&sysctl_lock);
1600 }
1601 
1602 struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
1603 {
1604 	if (!head)
1605 		BUG();
1606 	spin_lock(&sysctl_lock);
1607 	if (!use_table(head))
1608 		head = ERR_PTR(-ENOENT);
1609 	spin_unlock(&sysctl_lock);
1610 	return head;
1611 }
1612 
1613 void sysctl_head_finish(struct ctl_table_header *head)
1614 {
1615 	if (!head)
1616 		return;
1617 	spin_lock(&sysctl_lock);
1618 	unuse_table(head);
1619 	spin_unlock(&sysctl_lock);
1620 }
1621 
1622 static struct ctl_table_set *
1623 lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
1624 {
1625 	struct ctl_table_set *set = &root->default_set;
1626 	if (root->lookup)
1627 		set = root->lookup(root, namespaces);
1628 	return set;
1629 }
1630 
1631 static struct list_head *
1632 lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces)
1633 {
1634 	struct ctl_table_set *set = lookup_header_set(root, namespaces);
1635 	return &set->list;
1636 }
1637 
1638 struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces,
1639 					    struct ctl_table_header *prev)
1640 {
1641 	struct ctl_table_root *root;
1642 	struct list_head *header_list;
1643 	struct ctl_table_header *head;
1644 	struct list_head *tmp;
1645 
1646 	spin_lock(&sysctl_lock);
1647 	if (prev) {
1648 		head = prev;
1649 		tmp = &prev->ctl_entry;
1650 		unuse_table(prev);
1651 		goto next;
1652 	}
1653 	tmp = &root_table_header.ctl_entry;
1654 	for (;;) {
1655 		head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1656 
1657 		if (!use_table(head))
1658 			goto next;
1659 		spin_unlock(&sysctl_lock);
1660 		return head;
1661 	next:
1662 		root = head->root;
1663 		tmp = tmp->next;
1664 		header_list = lookup_header_list(root, namespaces);
1665 		if (tmp != header_list)
1666 			continue;
1667 
1668 		do {
1669 			root = list_entry(root->root_list.next,
1670 					struct ctl_table_root, root_list);
1671 			if (root == &sysctl_table_root)
1672 				goto out;
1673 			header_list = lookup_header_list(root, namespaces);
1674 		} while (list_empty(header_list));
1675 		tmp = header_list->next;
1676 	}
1677 out:
1678 	spin_unlock(&sysctl_lock);
1679 	return NULL;
1680 }
1681 
1682 struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1683 {
1684 	return __sysctl_head_next(current->nsproxy, prev);
1685 }
1686 
1687 void register_sysctl_root(struct ctl_table_root *root)
1688 {
1689 	spin_lock(&sysctl_lock);
1690 	list_add_tail(&root->root_list, &sysctl_table_root.root_list);
1691 	spin_unlock(&sysctl_lock);
1692 }
1693 
1694 #ifdef CONFIG_SYSCTL_SYSCALL
1695 /* Perform the actual read/write of a sysctl table entry. */
1696 static int do_sysctl_strategy(struct ctl_table_root *root,
1697 			struct ctl_table *table,
1698 			void __user *oldval, size_t __user *oldlenp,
1699 			void __user *newval, size_t newlen)
1700 {
1701 	int op = 0, rc;
1702 
1703 	if (oldval)
1704 		op |= MAY_READ;
1705 	if (newval)
1706 		op |= MAY_WRITE;
1707 	if (sysctl_perm(root, table, op))
1708 		return -EPERM;
1709 
1710 	if (table->strategy) {
1711 		rc = table->strategy(table, oldval, oldlenp, newval, newlen);
1712 		if (rc < 0)
1713 			return rc;
1714 		if (rc > 0)
1715 			return 0;
1716 	}
1717 
1718 	/* If there is no strategy routine, or if the strategy returns
1719 	 * zero, proceed with automatic r/w */
1720 	if (table->data && table->maxlen) {
1721 		rc = sysctl_data(table, oldval, oldlenp, newval, newlen);
1722 		if (rc < 0)
1723 			return rc;
1724 	}
1725 	return 0;
1726 }
1727 
1728 static int parse_table(int __user *name, int nlen,
1729 		       void __user *oldval, size_t __user *oldlenp,
1730 		       void __user *newval, size_t newlen,
1731 		       struct ctl_table_root *root,
1732 		       struct ctl_table *table)
1733 {
1734 	int n;
1735 repeat:
1736 	if (!nlen)
1737 		return -ENOTDIR;
1738 	if (get_user(n, name))
1739 		return -EFAULT;
1740 	for ( ; table->ctl_name || table->procname; table++) {
1741 		if (!table->ctl_name)
1742 			continue;
1743 		if (n == table->ctl_name) {
1744 			int error;
1745 			if (table->child) {
1746 				if (sysctl_perm(root, table, MAY_EXEC))
1747 					return -EPERM;
1748 				name++;
1749 				nlen--;
1750 				table = table->child;
1751 				goto repeat;
1752 			}
1753 			error = do_sysctl_strategy(root, table,
1754 						   oldval, oldlenp,
1755 						   newval, newlen);
1756 			return error;
1757 		}
1758 	}
1759 	return -ENOTDIR;
1760 }
1761 
1762 int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
1763 	       void __user *newval, size_t newlen)
1764 {
1765 	struct ctl_table_header *head;
1766 	int error = -ENOTDIR;
1767 
1768 	if (nlen <= 0 || nlen >= CTL_MAXNAME)
1769 		return -ENOTDIR;
1770 	if (oldval) {
1771 		int old_len;
1772 		if (!oldlenp || get_user(old_len, oldlenp))
1773 			return -EFAULT;
1774 	}
1775 
1776 	for (head = sysctl_head_next(NULL); head;
1777 			head = sysctl_head_next(head)) {
1778 		error = parse_table(name, nlen, oldval, oldlenp,
1779 					newval, newlen,
1780 					head->root, head->ctl_table);
1781 		if (error != -ENOTDIR) {
1782 			sysctl_head_finish(head);
1783 			break;
1784 		}
1785 	}
1786 	return error;
1787 }
1788 
1789 SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args)
1790 {
1791 	struct __sysctl_args tmp;
1792 	int error;
1793 
1794 	if (copy_from_user(&tmp, args, sizeof(tmp)))
1795 		return -EFAULT;
1796 
1797 	error = deprecated_sysctl_warning(&tmp);
1798 	if (error)
1799 		goto out;
1800 
1801 	lock_kernel();
1802 	error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
1803 			  tmp.newval, tmp.newlen);
1804 	unlock_kernel();
1805 out:
1806 	return error;
1807 }
1808 #endif /* CONFIG_SYSCTL_SYSCALL */
1809 
1810 /*
1811  * sysctl_perm does NOT grant the superuser all rights automatically, because
1812  * some sysctl variables are readonly even to root.
1813  */
1814 
1815 static int test_perm(int mode, int op)
1816 {
1817 	if (!current_euid())
1818 		mode >>= 6;
1819 	else if (in_egroup_p(0))
1820 		mode >>= 3;
1821 	if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
1822 		return 0;
1823 	return -EACCES;
1824 }
1825 
1826 int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
1827 {
1828 	int error;
1829 	int mode;
1830 
1831 	error = security_sysctl(table, op & (MAY_READ | MAY_WRITE | MAY_EXEC));
1832 	if (error)
1833 		return error;
1834 
1835 	if (root->permissions)
1836 		mode = root->permissions(root, current->nsproxy, table);
1837 	else
1838 		mode = table->mode;
1839 
1840 	return test_perm(mode, op);
1841 }
1842 
1843 static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1844 {
1845 	for (; table->ctl_name || table->procname; table++) {
1846 		table->parent = parent;
1847 		if (table->child)
1848 			sysctl_set_parent(table, table->child);
1849 	}
1850 }
1851 
1852 static __init int sysctl_init(void)
1853 {
1854 	sysctl_set_parent(NULL, root_table);
1855 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1856 	{
1857 		int err;
1858 		err = sysctl_check_table(current->nsproxy, root_table);
1859 	}
1860 #endif
1861 	return 0;
1862 }
1863 
1864 core_initcall(sysctl_init);
1865 
1866 static struct ctl_table *is_branch_in(struct ctl_table *branch,
1867 				      struct ctl_table *table)
1868 {
1869 	struct ctl_table *p;
1870 	const char *s = branch->procname;
1871 
1872 	/* branch should have named subdirectory as its first element */
1873 	if (!s || !branch->child)
1874 		return NULL;
1875 
1876 	/* ... and nothing else */
1877 	if (branch[1].procname || branch[1].ctl_name)
1878 		return NULL;
1879 
1880 	/* table should contain subdirectory with the same name */
1881 	for (p = table; p->procname || p->ctl_name; p++) {
1882 		if (!p->child)
1883 			continue;
1884 		if (p->procname && strcmp(p->procname, s) == 0)
1885 			return p;
1886 	}
1887 	return NULL;
1888 }
1889 
1890 /* see if attaching q to p would be an improvement */
1891 static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
1892 {
1893 	struct ctl_table *to = p->ctl_table, *by = q->ctl_table;
1894 	struct ctl_table *next;
1895 	int is_better = 0;
1896 	int not_in_parent = !p->attached_by;
1897 
1898 	while ((next = is_branch_in(by, to)) != NULL) {
1899 		if (by == q->attached_by)
1900 			is_better = 1;
1901 		if (to == p->attached_by)
1902 			not_in_parent = 1;
1903 		by = by->child;
1904 		to = next->child;
1905 	}
1906 
1907 	if (is_better && not_in_parent) {
1908 		q->attached_by = by;
1909 		q->attached_to = to;
1910 		q->parent = p;
1911 	}
1912 }
1913 
1914 /**
1915  * __register_sysctl_paths - register a sysctl hierarchy
1916  * @root: List of sysctl headers to register on
1917  * @namespaces: Data to compute which lists of sysctl entries are visible
1918  * @path: The path to the directory the sysctl table is in.
1919  * @table: the top-level table structure
1920  *
1921  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1922  * array. A completely 0 filled entry terminates the table.
1923  *
1924  * The members of the &struct ctl_table structure are used as follows:
1925  *
1926  * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
1927  *            must be unique within that level of sysctl
1928  *
1929  * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1930  *            enter a sysctl file
1931  *
1932  * data - a pointer to data for use by proc_handler
1933  *
1934  * maxlen - the maximum size in bytes of the data
1935  *
1936  * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1937  *
1938  * child - a pointer to the child sysctl table if this entry is a directory, or
1939  *         %NULL.
1940  *
1941  * proc_handler - the text handler routine (described below)
1942  *
1943  * strategy - the strategy routine (described below)
1944  *
1945  * de - for internal use by the sysctl routines
1946  *
1947  * extra1, extra2 - extra pointers usable by the proc handler routines
1948  *
1949  * Leaf nodes in the sysctl tree will be represented by a single file
1950  * under /proc; non-leaf nodes will be represented by directories.
1951  *
1952  * sysctl(2) can automatically manage read and write requests through
1953  * the sysctl table.  The data and maxlen fields of the ctl_table
1954  * struct enable minimal validation of the values being written to be
1955  * performed, and the mode field allows minimal authentication.
1956  *
1957  * More sophisticated management can be enabled by the provision of a
1958  * strategy routine with the table entry.  This will be called before
1959  * any automatic read or write of the data is performed.
1960  *
1961  * The strategy routine may return
1962  *
1963  * < 0 - Error occurred (error is passed to user process)
1964  *
1965  * 0   - OK - proceed with automatic read or write.
1966  *
1967  * > 0 - OK - read or write has been done by the strategy routine, so
1968  *       return immediately.
1969  *
1970  * There must be a proc_handler routine for any terminal nodes
1971  * mirrored under /proc/sys (non-terminals are handled by a built-in
1972  * directory handler).  Several default handlers are available to
1973  * cover common cases -
1974  *
1975  * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1976  * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1977  * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1978  *
1979  * It is the handler's job to read the input buffer from user memory
1980  * and process it. The handler should return 0 on success.
1981  *
1982  * This routine returns %NULL on a failure to register, and a pointer
1983  * to the table header on success.
1984  */
1985 struct ctl_table_header *__register_sysctl_paths(
1986 	struct ctl_table_root *root,
1987 	struct nsproxy *namespaces,
1988 	const struct ctl_path *path, struct ctl_table *table)
1989 {
1990 	struct ctl_table_header *header;
1991 	struct ctl_table *new, **prevp;
1992 	unsigned int n, npath;
1993 	struct ctl_table_set *set;
1994 
1995 	/* Count the path components */
1996 	for (npath = 0; path[npath].ctl_name || path[npath].procname; ++npath)
1997 		;
1998 
1999 	/*
2000 	 * For each path component, allocate a 2-element ctl_table array.
2001 	 * The first array element will be filled with the sysctl entry
2002 	 * for this, the second will be the sentinel (ctl_name == 0).
2003 	 *
2004 	 * We allocate everything in one go so that we don't have to
2005 	 * worry about freeing additional memory in unregister_sysctl_table.
2006 	 */
2007 	header = kzalloc(sizeof(struct ctl_table_header) +
2008 			 (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL);
2009 	if (!header)
2010 		return NULL;
2011 
2012 	new = (struct ctl_table *) (header + 1);
2013 
2014 	/* Now connect the dots */
2015 	prevp = &header->ctl_table;
2016 	for (n = 0; n < npath; ++n, ++path) {
2017 		/* Copy the procname */
2018 		new->procname = path->procname;
2019 		new->ctl_name = path->ctl_name;
2020 		new->mode     = 0555;
2021 
2022 		*prevp = new;
2023 		prevp = &new->child;
2024 
2025 		new += 2;
2026 	}
2027 	*prevp = table;
2028 	header->ctl_table_arg = table;
2029 
2030 	INIT_LIST_HEAD(&header->ctl_entry);
2031 	header->used = 0;
2032 	header->unregistering = NULL;
2033 	header->root = root;
2034 	sysctl_set_parent(NULL, header->ctl_table);
2035 	header->count = 1;
2036 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
2037 	if (sysctl_check_table(namespaces, header->ctl_table)) {
2038 		kfree(header);
2039 		return NULL;
2040 	}
2041 #endif
2042 	spin_lock(&sysctl_lock);
2043 	header->set = lookup_header_set(root, namespaces);
2044 	header->attached_by = header->ctl_table;
2045 	header->attached_to = root_table;
2046 	header->parent = &root_table_header;
2047 	for (set = header->set; set; set = set->parent) {
2048 		struct ctl_table_header *p;
2049 		list_for_each_entry(p, &set->list, ctl_entry) {
2050 			if (p->unregistering)
2051 				continue;
2052 			try_attach(p, header);
2053 		}
2054 	}
2055 	header->parent->count++;
2056 	list_add_tail(&header->ctl_entry, &header->set->list);
2057 	spin_unlock(&sysctl_lock);
2058 
2059 	return header;
2060 }
2061 
2062 /**
2063  * register_sysctl_table_path - register a sysctl table hierarchy
2064  * @path: The path to the directory the sysctl table is in.
2065  * @table: the top-level table structure
2066  *
2067  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
2068  * array. A completely 0 filled entry terminates the table.
2069  *
2070  * See __register_sysctl_paths for more details.
2071  */
2072 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
2073 						struct ctl_table *table)
2074 {
2075 	return __register_sysctl_paths(&sysctl_table_root, current->nsproxy,
2076 					path, table);
2077 }
2078 
2079 /**
2080  * register_sysctl_table - register a sysctl table hierarchy
2081  * @table: the top-level table structure
2082  *
2083  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
2084  * array. A completely 0 filled entry terminates the table.
2085  *
2086  * See register_sysctl_paths for more details.
2087  */
2088 struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
2089 {
2090 	static const struct ctl_path null_path[] = { {} };
2091 
2092 	return register_sysctl_paths(null_path, table);
2093 }
2094 
2095 /**
2096  * unregister_sysctl_table - unregister a sysctl table hierarchy
2097  * @header: the header returned from register_sysctl_table
2098  *
2099  * Unregisters the sysctl table and all children. proc entries may not
2100  * actually be removed until they are no longer used by anyone.
2101  */
2102 void unregister_sysctl_table(struct ctl_table_header * header)
2103 {
2104 	might_sleep();
2105 
2106 	if (header == NULL)
2107 		return;
2108 
2109 	spin_lock(&sysctl_lock);
2110 	start_unregistering(header);
2111 	if (!--header->parent->count) {
2112 		WARN_ON(1);
2113 		kfree(header->parent);
2114 	}
2115 	if (!--header->count)
2116 		kfree(header);
2117 	spin_unlock(&sysctl_lock);
2118 }
2119 
2120 int sysctl_is_seen(struct ctl_table_header *p)
2121 {
2122 	struct ctl_table_set *set = p->set;
2123 	int res;
2124 	spin_lock(&sysctl_lock);
2125 	if (p->unregistering)
2126 		res = 0;
2127 	else if (!set->is_seen)
2128 		res = 1;
2129 	else
2130 		res = set->is_seen(set);
2131 	spin_unlock(&sysctl_lock);
2132 	return res;
2133 }
2134 
2135 void setup_sysctl_set(struct ctl_table_set *p,
2136 	struct ctl_table_set *parent,
2137 	int (*is_seen)(struct ctl_table_set *))
2138 {
2139 	INIT_LIST_HEAD(&p->list);
2140 	p->parent = parent ? parent : &sysctl_table_root.default_set;
2141 	p->is_seen = is_seen;
2142 }
2143 
2144 #else /* !CONFIG_SYSCTL */
2145 struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
2146 {
2147 	return NULL;
2148 }
2149 
2150 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
2151 						    struct ctl_table *table)
2152 {
2153 	return NULL;
2154 }
2155 
2156 void unregister_sysctl_table(struct ctl_table_header * table)
2157 {
2158 }
2159 
2160 void setup_sysctl_set(struct ctl_table_set *p,
2161 	struct ctl_table_set *parent,
2162 	int (*is_seen)(struct ctl_table_set *))
2163 {
2164 }
2165 
2166 void sysctl_head_put(struct ctl_table_header *head)
2167 {
2168 }
2169 
2170 #endif /* CONFIG_SYSCTL */
2171 
2172 /*
2173  * /proc/sys support
2174  */
2175 
2176 #ifdef CONFIG_PROC_SYSCTL
2177 
2178 static int _proc_do_string(void* data, int maxlen, int write,
2179 			   struct file *filp, void __user *buffer,
2180 			   size_t *lenp, loff_t *ppos)
2181 {
2182 	size_t len;
2183 	char __user *p;
2184 	char c;
2185 
2186 	if (!data || !maxlen || !*lenp) {
2187 		*lenp = 0;
2188 		return 0;
2189 	}
2190 
2191 	if (write) {
2192 		len = 0;
2193 		p = buffer;
2194 		while (len < *lenp) {
2195 			if (get_user(c, p++))
2196 				return -EFAULT;
2197 			if (c == 0 || c == '\n')
2198 				break;
2199 			len++;
2200 		}
2201 		if (len >= maxlen)
2202 			len = maxlen-1;
2203 		if(copy_from_user(data, buffer, len))
2204 			return -EFAULT;
2205 		((char *) data)[len] = 0;
2206 		*ppos += *lenp;
2207 	} else {
2208 		len = strlen(data);
2209 		if (len > maxlen)
2210 			len = maxlen;
2211 
2212 		if (*ppos > len) {
2213 			*lenp = 0;
2214 			return 0;
2215 		}
2216 
2217 		data += *ppos;
2218 		len  -= *ppos;
2219 
2220 		if (len > *lenp)
2221 			len = *lenp;
2222 		if (len)
2223 			if(copy_to_user(buffer, data, len))
2224 				return -EFAULT;
2225 		if (len < *lenp) {
2226 			if(put_user('\n', ((char __user *) buffer) + len))
2227 				return -EFAULT;
2228 			len++;
2229 		}
2230 		*lenp = len;
2231 		*ppos += len;
2232 	}
2233 	return 0;
2234 }
2235 
2236 /**
2237  * proc_dostring - read a string sysctl
2238  * @table: the sysctl table
2239  * @write: %TRUE if this is a write to the sysctl file
2240  * @filp: the file structure
2241  * @buffer: the user buffer
2242  * @lenp: the size of the user buffer
2243  * @ppos: file position
2244  *
2245  * Reads/writes a string from/to the user buffer. If the kernel
2246  * buffer provided is not large enough to hold the string, the
2247  * string is truncated. The copied string is %NULL-terminated.
2248  * If the string is being read by the user process, it is copied
2249  * and a newline '\n' is added. It is truncated if the buffer is
2250  * not large enough.
2251  *
2252  * Returns 0 on success.
2253  */
2254 int proc_dostring(struct ctl_table *table, int write, struct file *filp,
2255 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2256 {
2257 	return _proc_do_string(table->data, table->maxlen, write, filp,
2258 			       buffer, lenp, ppos);
2259 }
2260 
2261 
2262 static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
2263 				 int *valp,
2264 				 int write, void *data)
2265 {
2266 	if (write) {
2267 		*valp = *negp ? -*lvalp : *lvalp;
2268 	} else {
2269 		int val = *valp;
2270 		if (val < 0) {
2271 			*negp = -1;
2272 			*lvalp = (unsigned long)-val;
2273 		} else {
2274 			*negp = 0;
2275 			*lvalp = (unsigned long)val;
2276 		}
2277 	}
2278 	return 0;
2279 }
2280 
2281 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2282 		  int write, struct file *filp, void __user *buffer,
2283 		  size_t *lenp, loff_t *ppos,
2284 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
2285 			      int write, void *data),
2286 		  void *data)
2287 {
2288 #define TMPBUFLEN 21
2289 	int *i, vleft, first = 1, neg;
2290 	unsigned long lval;
2291 	size_t left, len;
2292 
2293 	char buf[TMPBUFLEN], *p;
2294 	char __user *s = buffer;
2295 
2296 	if (!tbl_data || !table->maxlen || !*lenp ||
2297 	    (*ppos && !write)) {
2298 		*lenp = 0;
2299 		return 0;
2300 	}
2301 
2302 	i = (int *) tbl_data;
2303 	vleft = table->maxlen / sizeof(*i);
2304 	left = *lenp;
2305 
2306 	if (!conv)
2307 		conv = do_proc_dointvec_conv;
2308 
2309 	for (; left && vleft--; i++, first=0) {
2310 		if (write) {
2311 			while (left) {
2312 				char c;
2313 				if (get_user(c, s))
2314 					return -EFAULT;
2315 				if (!isspace(c))
2316 					break;
2317 				left--;
2318 				s++;
2319 			}
2320 			if (!left)
2321 				break;
2322 			neg = 0;
2323 			len = left;
2324 			if (len > sizeof(buf) - 1)
2325 				len = sizeof(buf) - 1;
2326 			if (copy_from_user(buf, s, len))
2327 				return -EFAULT;
2328 			buf[len] = 0;
2329 			p = buf;
2330 			if (*p == '-' && left > 1) {
2331 				neg = 1;
2332 				p++;
2333 			}
2334 			if (*p < '0' || *p > '9')
2335 				break;
2336 
2337 			lval = simple_strtoul(p, &p, 0);
2338 
2339 			len = p-buf;
2340 			if ((len < left) && *p && !isspace(*p))
2341 				break;
2342 			s += len;
2343 			left -= len;
2344 
2345 			if (conv(&neg, &lval, i, 1, data))
2346 				break;
2347 		} else {
2348 			p = buf;
2349 			if (!first)
2350 				*p++ = '\t';
2351 
2352 			if (conv(&neg, &lval, i, 0, data))
2353 				break;
2354 
2355 			sprintf(p, "%s%lu", neg ? "-" : "", lval);
2356 			len = strlen(buf);
2357 			if (len > left)
2358 				len = left;
2359 			if(copy_to_user(s, buf, len))
2360 				return -EFAULT;
2361 			left -= len;
2362 			s += len;
2363 		}
2364 	}
2365 
2366 	if (!write && !first && left) {
2367 		if(put_user('\n', s))
2368 			return -EFAULT;
2369 		left--, s++;
2370 	}
2371 	if (write) {
2372 		while (left) {
2373 			char c;
2374 			if (get_user(c, s++))
2375 				return -EFAULT;
2376 			if (!isspace(c))
2377 				break;
2378 			left--;
2379 		}
2380 	}
2381 	if (write && first)
2382 		return -EINVAL;
2383 	*lenp -= left;
2384 	*ppos += *lenp;
2385 	return 0;
2386 #undef TMPBUFLEN
2387 }
2388 
2389 static int do_proc_dointvec(struct ctl_table *table, int write, struct file *filp,
2390 		  void __user *buffer, size_t *lenp, loff_t *ppos,
2391 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
2392 			      int write, void *data),
2393 		  void *data)
2394 {
2395 	return __do_proc_dointvec(table->data, table, write, filp,
2396 			buffer, lenp, ppos, conv, data);
2397 }
2398 
2399 /**
2400  * proc_dointvec - read a vector of integers
2401  * @table: the sysctl table
2402  * @write: %TRUE if this is a write to the sysctl file
2403  * @filp: the file structure
2404  * @buffer: the user buffer
2405  * @lenp: the size of the user buffer
2406  * @ppos: file position
2407  *
2408  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2409  * values from/to the user buffer, treated as an ASCII string.
2410  *
2411  * Returns 0 on success.
2412  */
2413 int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
2414 		     void __user *buffer, size_t *lenp, loff_t *ppos)
2415 {
2416     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2417 		    	    NULL,NULL);
2418 }
2419 
2420 /*
2421  * Taint values can only be increased
2422  * This means we can safely use a temporary.
2423  */
2424 static int proc_taint(struct ctl_table *table, int write, struct file *filp,
2425 			       void __user *buffer, size_t *lenp, loff_t *ppos)
2426 {
2427 	struct ctl_table t;
2428 	unsigned long tmptaint = get_taint();
2429 	int err;
2430 
2431 	if (write && !capable(CAP_SYS_ADMIN))
2432 		return -EPERM;
2433 
2434 	t = *table;
2435 	t.data = &tmptaint;
2436 	err = proc_doulongvec_minmax(&t, write, filp, buffer, lenp, ppos);
2437 	if (err < 0)
2438 		return err;
2439 
2440 	if (write) {
2441 		/*
2442 		 * Poor man's atomic or. Not worth adding a primitive
2443 		 * to everyone's atomic.h for this
2444 		 */
2445 		int i;
2446 		for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2447 			if ((tmptaint >> i) & 1)
2448 				add_taint(i);
2449 		}
2450 	}
2451 
2452 	return err;
2453 }
2454 
2455 struct do_proc_dointvec_minmax_conv_param {
2456 	int *min;
2457 	int *max;
2458 };
2459 
2460 static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
2461 					int *valp,
2462 					int write, void *data)
2463 {
2464 	struct do_proc_dointvec_minmax_conv_param *param = data;
2465 	if (write) {
2466 		int val = *negp ? -*lvalp : *lvalp;
2467 		if ((param->min && *param->min > val) ||
2468 		    (param->max && *param->max < val))
2469 			return -EINVAL;
2470 		*valp = val;
2471 	} else {
2472 		int val = *valp;
2473 		if (val < 0) {
2474 			*negp = -1;
2475 			*lvalp = (unsigned long)-val;
2476 		} else {
2477 			*negp = 0;
2478 			*lvalp = (unsigned long)val;
2479 		}
2480 	}
2481 	return 0;
2482 }
2483 
2484 /**
2485  * proc_dointvec_minmax - read a vector of integers with min/max values
2486  * @table: the sysctl table
2487  * @write: %TRUE if this is a write to the sysctl file
2488  * @filp: the file structure
2489  * @buffer: the user buffer
2490  * @lenp: the size of the user buffer
2491  * @ppos: file position
2492  *
2493  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2494  * values from/to the user buffer, treated as an ASCII string.
2495  *
2496  * This routine will ensure the values are within the range specified by
2497  * table->extra1 (min) and table->extra2 (max).
2498  *
2499  * Returns 0 on success.
2500  */
2501 int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp,
2502 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2503 {
2504 	struct do_proc_dointvec_minmax_conv_param param = {
2505 		.min = (int *) table->extra1,
2506 		.max = (int *) table->extra2,
2507 	};
2508 	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2509 				do_proc_dointvec_minmax_conv, &param);
2510 }
2511 
2512 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2513 				     struct file *filp,
2514 				     void __user *buffer,
2515 				     size_t *lenp, loff_t *ppos,
2516 				     unsigned long convmul,
2517 				     unsigned long convdiv)
2518 {
2519 #define TMPBUFLEN 21
2520 	unsigned long *i, *min, *max, val;
2521 	int vleft, first=1, neg;
2522 	size_t len, left;
2523 	char buf[TMPBUFLEN], *p;
2524 	char __user *s = buffer;
2525 
2526 	if (!data || !table->maxlen || !*lenp ||
2527 	    (*ppos && !write)) {
2528 		*lenp = 0;
2529 		return 0;
2530 	}
2531 
2532 	i = (unsigned long *) data;
2533 	min = (unsigned long *) table->extra1;
2534 	max = (unsigned long *) table->extra2;
2535 	vleft = table->maxlen / sizeof(unsigned long);
2536 	left = *lenp;
2537 
2538 	for (; left && vleft--; i++, min++, max++, first=0) {
2539 		if (write) {
2540 			while (left) {
2541 				char c;
2542 				if (get_user(c, s))
2543 					return -EFAULT;
2544 				if (!isspace(c))
2545 					break;
2546 				left--;
2547 				s++;
2548 			}
2549 			if (!left)
2550 				break;
2551 			neg = 0;
2552 			len = left;
2553 			if (len > TMPBUFLEN-1)
2554 				len = TMPBUFLEN-1;
2555 			if (copy_from_user(buf, s, len))
2556 				return -EFAULT;
2557 			buf[len] = 0;
2558 			p = buf;
2559 			if (*p == '-' && left > 1) {
2560 				neg = 1;
2561 				p++;
2562 			}
2563 			if (*p < '0' || *p > '9')
2564 				break;
2565 			val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
2566 			len = p-buf;
2567 			if ((len < left) && *p && !isspace(*p))
2568 				break;
2569 			if (neg)
2570 				val = -val;
2571 			s += len;
2572 			left -= len;
2573 
2574 			if(neg)
2575 				continue;
2576 			if ((min && val < *min) || (max && val > *max))
2577 				continue;
2578 			*i = val;
2579 		} else {
2580 			p = buf;
2581 			if (!first)
2582 				*p++ = '\t';
2583 			sprintf(p, "%lu", convdiv * (*i) / convmul);
2584 			len = strlen(buf);
2585 			if (len > left)
2586 				len = left;
2587 			if(copy_to_user(s, buf, len))
2588 				return -EFAULT;
2589 			left -= len;
2590 			s += len;
2591 		}
2592 	}
2593 
2594 	if (!write && !first && left) {
2595 		if(put_user('\n', s))
2596 			return -EFAULT;
2597 		left--, s++;
2598 	}
2599 	if (write) {
2600 		while (left) {
2601 			char c;
2602 			if (get_user(c, s++))
2603 				return -EFAULT;
2604 			if (!isspace(c))
2605 				break;
2606 			left--;
2607 		}
2608 	}
2609 	if (write && first)
2610 		return -EINVAL;
2611 	*lenp -= left;
2612 	*ppos += *lenp;
2613 	return 0;
2614 #undef TMPBUFLEN
2615 }
2616 
2617 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2618 				     struct file *filp,
2619 				     void __user *buffer,
2620 				     size_t *lenp, loff_t *ppos,
2621 				     unsigned long convmul,
2622 				     unsigned long convdiv)
2623 {
2624 	return __do_proc_doulongvec_minmax(table->data, table, write,
2625 			filp, buffer, lenp, ppos, convmul, convdiv);
2626 }
2627 
2628 /**
2629  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2630  * @table: the sysctl table
2631  * @write: %TRUE if this is a write to the sysctl file
2632  * @filp: the file structure
2633  * @buffer: the user buffer
2634  * @lenp: the size of the user buffer
2635  * @ppos: file position
2636  *
2637  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2638  * values from/to the user buffer, treated as an ASCII string.
2639  *
2640  * This routine will ensure the values are within the range specified by
2641  * table->extra1 (min) and table->extra2 (max).
2642  *
2643  * Returns 0 on success.
2644  */
2645 int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp,
2646 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2647 {
2648     return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
2649 }
2650 
2651 /**
2652  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2653  * @table: the sysctl table
2654  * @write: %TRUE if this is a write to the sysctl file
2655  * @filp: the file structure
2656  * @buffer: the user buffer
2657  * @lenp: the size of the user buffer
2658  * @ppos: file position
2659  *
2660  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2661  * values from/to the user buffer, treated as an ASCII string. The values
2662  * are treated as milliseconds, and converted to jiffies when they are stored.
2663  *
2664  * This routine will ensure the values are within the range specified by
2665  * table->extra1 (min) and table->extra2 (max).
2666  *
2667  * Returns 0 on success.
2668  */
2669 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2670 				      struct file *filp,
2671 				      void __user *buffer,
2672 				      size_t *lenp, loff_t *ppos)
2673 {
2674     return do_proc_doulongvec_minmax(table, write, filp, buffer,
2675 				     lenp, ppos, HZ, 1000l);
2676 }
2677 
2678 
2679 static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
2680 					 int *valp,
2681 					 int write, void *data)
2682 {
2683 	if (write) {
2684 		if (*lvalp > LONG_MAX / HZ)
2685 			return 1;
2686 		*valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2687 	} else {
2688 		int val = *valp;
2689 		unsigned long lval;
2690 		if (val < 0) {
2691 			*negp = -1;
2692 			lval = (unsigned long)-val;
2693 		} else {
2694 			*negp = 0;
2695 			lval = (unsigned long)val;
2696 		}
2697 		*lvalp = lval / HZ;
2698 	}
2699 	return 0;
2700 }
2701 
2702 static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
2703 						int *valp,
2704 						int write, void *data)
2705 {
2706 	if (write) {
2707 		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2708 			return 1;
2709 		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2710 	} else {
2711 		int val = *valp;
2712 		unsigned long lval;
2713 		if (val < 0) {
2714 			*negp = -1;
2715 			lval = (unsigned long)-val;
2716 		} else {
2717 			*negp = 0;
2718 			lval = (unsigned long)val;
2719 		}
2720 		*lvalp = jiffies_to_clock_t(lval);
2721 	}
2722 	return 0;
2723 }
2724 
2725 static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
2726 					    int *valp,
2727 					    int write, void *data)
2728 {
2729 	if (write) {
2730 		*valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2731 	} else {
2732 		int val = *valp;
2733 		unsigned long lval;
2734 		if (val < 0) {
2735 			*negp = -1;
2736 			lval = (unsigned long)-val;
2737 		} else {
2738 			*negp = 0;
2739 			lval = (unsigned long)val;
2740 		}
2741 		*lvalp = jiffies_to_msecs(lval);
2742 	}
2743 	return 0;
2744 }
2745 
2746 /**
2747  * proc_dointvec_jiffies - read a vector of integers as seconds
2748  * @table: the sysctl table
2749  * @write: %TRUE if this is a write to the sysctl file
2750  * @filp: the file structure
2751  * @buffer: the user buffer
2752  * @lenp: the size of the user buffer
2753  * @ppos: file position
2754  *
2755  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2756  * values from/to the user buffer, treated as an ASCII string.
2757  * The values read are assumed to be in seconds, and are converted into
2758  * jiffies.
2759  *
2760  * Returns 0 on success.
2761  */
2762 int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
2763 			  void __user *buffer, size_t *lenp, loff_t *ppos)
2764 {
2765     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2766 		    	    do_proc_dointvec_jiffies_conv,NULL);
2767 }
2768 
2769 /**
2770  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2771  * @table: the sysctl table
2772  * @write: %TRUE if this is a write to the sysctl file
2773  * @filp: the file structure
2774  * @buffer: the user buffer
2775  * @lenp: the size of the user buffer
2776  * @ppos: pointer to the file position
2777  *
2778  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2779  * values from/to the user buffer, treated as an ASCII string.
2780  * The values read are assumed to be in 1/USER_HZ seconds, and
2781  * are converted into jiffies.
2782  *
2783  * Returns 0 on success.
2784  */
2785 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp,
2786 				 void __user *buffer, size_t *lenp, loff_t *ppos)
2787 {
2788     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2789 		    	    do_proc_dointvec_userhz_jiffies_conv,NULL);
2790 }
2791 
2792 /**
2793  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2794  * @table: the sysctl table
2795  * @write: %TRUE if this is a write to the sysctl file
2796  * @filp: the file structure
2797  * @buffer: the user buffer
2798  * @lenp: the size of the user buffer
2799  * @ppos: file position
2800  * @ppos: the current position in the file
2801  *
2802  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2803  * values from/to the user buffer, treated as an ASCII string.
2804  * The values read are assumed to be in 1/1000 seconds, and
2805  * are converted into jiffies.
2806  *
2807  * Returns 0 on success.
2808  */
2809 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp,
2810 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2811 {
2812 	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2813 				do_proc_dointvec_ms_jiffies_conv, NULL);
2814 }
2815 
2816 static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp,
2817 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2818 {
2819 	struct pid *new_pid;
2820 	pid_t tmp;
2821 	int r;
2822 
2823 	tmp = pid_vnr(cad_pid);
2824 
2825 	r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
2826 			       lenp, ppos, NULL, NULL);
2827 	if (r || !write)
2828 		return r;
2829 
2830 	new_pid = find_get_pid(tmp);
2831 	if (!new_pid)
2832 		return -ESRCH;
2833 
2834 	put_pid(xchg(&cad_pid, new_pid));
2835 	return 0;
2836 }
2837 
2838 #else /* CONFIG_PROC_FS */
2839 
2840 int proc_dostring(struct ctl_table *table, int write, struct file *filp,
2841 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2842 {
2843 	return -ENOSYS;
2844 }
2845 
2846 int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
2847 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2848 {
2849 	return -ENOSYS;
2850 }
2851 
2852 int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp,
2853 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2854 {
2855 	return -ENOSYS;
2856 }
2857 
2858 int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
2859 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2860 {
2861 	return -ENOSYS;
2862 }
2863 
2864 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp,
2865 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2866 {
2867 	return -ENOSYS;
2868 }
2869 
2870 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp,
2871 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2872 {
2873 	return -ENOSYS;
2874 }
2875 
2876 int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp,
2877 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2878 {
2879 	return -ENOSYS;
2880 }
2881 
2882 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2883 				      struct file *filp,
2884 				      void __user *buffer,
2885 				      size_t *lenp, loff_t *ppos)
2886 {
2887     return -ENOSYS;
2888 }
2889 
2890 
2891 #endif /* CONFIG_PROC_FS */
2892 
2893 
2894 #ifdef CONFIG_SYSCTL_SYSCALL
2895 /*
2896  * General sysctl support routines
2897  */
2898 
2899 /* The generic sysctl data routine (used if no strategy routine supplied) */
2900 int sysctl_data(struct ctl_table *table,
2901 		void __user *oldval, size_t __user *oldlenp,
2902 		void __user *newval, size_t newlen)
2903 {
2904 	size_t len;
2905 
2906 	/* Get out of I don't have a variable */
2907 	if (!table->data || !table->maxlen)
2908 		return -ENOTDIR;
2909 
2910 	if (oldval && oldlenp) {
2911 		if (get_user(len, oldlenp))
2912 			return -EFAULT;
2913 		if (len) {
2914 			if (len > table->maxlen)
2915 				len = table->maxlen;
2916 			if (copy_to_user(oldval, table->data, len))
2917 				return -EFAULT;
2918 			if (put_user(len, oldlenp))
2919 				return -EFAULT;
2920 		}
2921 	}
2922 
2923 	if (newval && newlen) {
2924 		if (newlen > table->maxlen)
2925 			newlen = table->maxlen;
2926 
2927 		if (copy_from_user(table->data, newval, newlen))
2928 			return -EFAULT;
2929 	}
2930 	return 1;
2931 }
2932 
2933 /* The generic string strategy routine: */
2934 int sysctl_string(struct ctl_table *table,
2935 		  void __user *oldval, size_t __user *oldlenp,
2936 		  void __user *newval, size_t newlen)
2937 {
2938 	if (!table->data || !table->maxlen)
2939 		return -ENOTDIR;
2940 
2941 	if (oldval && oldlenp) {
2942 		size_t bufsize;
2943 		if (get_user(bufsize, oldlenp))
2944 			return -EFAULT;
2945 		if (bufsize) {
2946 			size_t len = strlen(table->data), copied;
2947 
2948 			/* This shouldn't trigger for a well-formed sysctl */
2949 			if (len > table->maxlen)
2950 				len = table->maxlen;
2951 
2952 			/* Copy up to a max of bufsize-1 bytes of the string */
2953 			copied = (len >= bufsize) ? bufsize - 1 : len;
2954 
2955 			if (copy_to_user(oldval, table->data, copied) ||
2956 			    put_user(0, (char __user *)(oldval + copied)))
2957 				return -EFAULT;
2958 			if (put_user(len, oldlenp))
2959 				return -EFAULT;
2960 		}
2961 	}
2962 	if (newval && newlen) {
2963 		size_t len = newlen;
2964 		if (len > table->maxlen)
2965 			len = table->maxlen;
2966 		if(copy_from_user(table->data, newval, len))
2967 			return -EFAULT;
2968 		if (len == table->maxlen)
2969 			len--;
2970 		((char *) table->data)[len] = 0;
2971 	}
2972 	return 1;
2973 }
2974 
2975 /*
2976  * This function makes sure that all of the integers in the vector
2977  * are between the minimum and maximum values given in the arrays
2978  * table->extra1 and table->extra2, respectively.
2979  */
2980 int sysctl_intvec(struct ctl_table *table,
2981 		void __user *oldval, size_t __user *oldlenp,
2982 		void __user *newval, size_t newlen)
2983 {
2984 
2985 	if (newval && newlen) {
2986 		int __user *vec = (int __user *) newval;
2987 		int *min = (int *) table->extra1;
2988 		int *max = (int *) table->extra2;
2989 		size_t length;
2990 		int i;
2991 
2992 		if (newlen % sizeof(int) != 0)
2993 			return -EINVAL;
2994 
2995 		if (!table->extra1 && !table->extra2)
2996 			return 0;
2997 
2998 		if (newlen > table->maxlen)
2999 			newlen = table->maxlen;
3000 		length = newlen / sizeof(int);
3001 
3002 		for (i = 0; i < length; i++) {
3003 			int value;
3004 			if (get_user(value, vec + i))
3005 				return -EFAULT;
3006 			if (min && value < min[i])
3007 				return -EINVAL;
3008 			if (max && value > max[i])
3009 				return -EINVAL;
3010 		}
3011 	}
3012 	return 0;
3013 }
3014 
3015 /* Strategy function to convert jiffies to seconds */
3016 int sysctl_jiffies(struct ctl_table *table,
3017 		void __user *oldval, size_t __user *oldlenp,
3018 		void __user *newval, size_t newlen)
3019 {
3020 	if (oldval && oldlenp) {
3021 		size_t olen;
3022 
3023 		if (get_user(olen, oldlenp))
3024 			return -EFAULT;
3025 		if (olen) {
3026 			int val;
3027 
3028 			if (olen < sizeof(int))
3029 				return -EINVAL;
3030 
3031 			val = *(int *)(table->data) / HZ;
3032 			if (put_user(val, (int __user *)oldval))
3033 				return -EFAULT;
3034 			if (put_user(sizeof(int), oldlenp))
3035 				return -EFAULT;
3036 		}
3037 	}
3038 	if (newval && newlen) {
3039 		int new;
3040 		if (newlen != sizeof(int))
3041 			return -EINVAL;
3042 		if (get_user(new, (int __user *)newval))
3043 			return -EFAULT;
3044 		*(int *)(table->data) = new*HZ;
3045 	}
3046 	return 1;
3047 }
3048 
3049 /* Strategy function to convert jiffies to seconds */
3050 int sysctl_ms_jiffies(struct ctl_table *table,
3051 		void __user *oldval, size_t __user *oldlenp,
3052 		void __user *newval, size_t newlen)
3053 {
3054 	if (oldval && oldlenp) {
3055 		size_t olen;
3056 
3057 		if (get_user(olen, oldlenp))
3058 			return -EFAULT;
3059 		if (olen) {
3060 			int val;
3061 
3062 			if (olen < sizeof(int))
3063 				return -EINVAL;
3064 
3065 			val = jiffies_to_msecs(*(int *)(table->data));
3066 			if (put_user(val, (int __user *)oldval))
3067 				return -EFAULT;
3068 			if (put_user(sizeof(int), oldlenp))
3069 				return -EFAULT;
3070 		}
3071 	}
3072 	if (newval && newlen) {
3073 		int new;
3074 		if (newlen != sizeof(int))
3075 			return -EINVAL;
3076 		if (get_user(new, (int __user *)newval))
3077 			return -EFAULT;
3078 		*(int *)(table->data) = msecs_to_jiffies(new);
3079 	}
3080 	return 1;
3081 }
3082 
3083 
3084 
3085 #else /* CONFIG_SYSCTL_SYSCALL */
3086 
3087 
3088 SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args)
3089 {
3090 	struct __sysctl_args tmp;
3091 	int error;
3092 
3093 	if (copy_from_user(&tmp, args, sizeof(tmp)))
3094 		return -EFAULT;
3095 
3096 	error = deprecated_sysctl_warning(&tmp);
3097 
3098 	/* If no error reading the parameters then just -ENOSYS ... */
3099 	if (!error)
3100 		error = -ENOSYS;
3101 
3102 	return error;
3103 }
3104 
3105 int sysctl_data(struct ctl_table *table,
3106 		  void __user *oldval, size_t __user *oldlenp,
3107 		  void __user *newval, size_t newlen)
3108 {
3109 	return -ENOSYS;
3110 }
3111 
3112 int sysctl_string(struct ctl_table *table,
3113 		  void __user *oldval, size_t __user *oldlenp,
3114 		  void __user *newval, size_t newlen)
3115 {
3116 	return -ENOSYS;
3117 }
3118 
3119 int sysctl_intvec(struct ctl_table *table,
3120 		void __user *oldval, size_t __user *oldlenp,
3121 		void __user *newval, size_t newlen)
3122 {
3123 	return -ENOSYS;
3124 }
3125 
3126 int sysctl_jiffies(struct ctl_table *table,
3127 		void __user *oldval, size_t __user *oldlenp,
3128 		void __user *newval, size_t newlen)
3129 {
3130 	return -ENOSYS;
3131 }
3132 
3133 int sysctl_ms_jiffies(struct ctl_table *table,
3134 		void __user *oldval, size_t __user *oldlenp,
3135 		void __user *newval, size_t newlen)
3136 {
3137 	return -ENOSYS;
3138 }
3139 
3140 #endif /* CONFIG_SYSCTL_SYSCALL */
3141 
3142 static int deprecated_sysctl_warning(struct __sysctl_args *args)
3143 {
3144 	static int msg_count;
3145 	int name[CTL_MAXNAME];
3146 	int i;
3147 
3148 	/* Check args->nlen. */
3149 	if (args->nlen < 0 || args->nlen > CTL_MAXNAME)
3150 		return -ENOTDIR;
3151 
3152 	/* Read in the sysctl name for better debug message logging */
3153 	for (i = 0; i < args->nlen; i++)
3154 		if (get_user(name[i], args->name + i))
3155 			return -EFAULT;
3156 
3157 	/* Ignore accesses to kernel.version */
3158 	if ((args->nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION))
3159 		return 0;
3160 
3161 	if (msg_count < 5) {
3162 		msg_count++;
3163 		printk(KERN_INFO
3164 			"warning: process `%s' used the deprecated sysctl "
3165 			"system call with ", current->comm);
3166 		for (i = 0; i < args->nlen; i++)
3167 			printk("%d.", name[i]);
3168 		printk("\n");
3169 	}
3170 	return 0;
3171 }
3172 
3173 /*
3174  * No sense putting this after each symbol definition, twice,
3175  * exception granted :-)
3176  */
3177 EXPORT_SYMBOL(proc_dointvec);
3178 EXPORT_SYMBOL(proc_dointvec_jiffies);
3179 EXPORT_SYMBOL(proc_dointvec_minmax);
3180 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
3181 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
3182 EXPORT_SYMBOL(proc_dostring);
3183 EXPORT_SYMBOL(proc_doulongvec_minmax);
3184 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
3185 EXPORT_SYMBOL(register_sysctl_table);
3186 EXPORT_SYMBOL(register_sysctl_paths);
3187 EXPORT_SYMBOL(sysctl_intvec);
3188 EXPORT_SYMBOL(sysctl_jiffies);
3189 EXPORT_SYMBOL(sysctl_ms_jiffies);
3190 EXPORT_SYMBOL(sysctl_string);
3191 EXPORT_SYMBOL(sysctl_data);
3192 EXPORT_SYMBOL(unregister_sysctl_table);
3193