xref: /openbmc/linux/kernel/sysctl.c (revision baa7eb025ab14f3cba2e35c0a8648f9c9f01d24f)
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20 
21 #include <linux/module.h>
22 #include <linux/mm.h>
23 #include <linux/swap.h>
24 #include <linux/slab.h>
25 #include <linux/sysctl.h>
26 #include <linux/signal.h>
27 #include <linux/proc_fs.h>
28 #include <linux/security.h>
29 #include <linux/ctype.h>
30 #include <linux/kmemcheck.h>
31 #include <linux/fs.h>
32 #include <linux/init.h>
33 #include <linux/kernel.h>
34 #include <linux/kobject.h>
35 #include <linux/net.h>
36 #include <linux/sysrq.h>
37 #include <linux/highuid.h>
38 #include <linux/writeback.h>
39 #include <linux/ratelimit.h>
40 #include <linux/compaction.h>
41 #include <linux/hugetlb.h>
42 #include <linux/initrd.h>
43 #include <linux/key.h>
44 #include <linux/times.h>
45 #include <linux/limits.h>
46 #include <linux/dcache.h>
47 #include <linux/dnotify.h>
48 #include <linux/syscalls.h>
49 #include <linux/vmstat.h>
50 #include <linux/nfs_fs.h>
51 #include <linux/acpi.h>
52 #include <linux/reboot.h>
53 #include <linux/ftrace.h>
54 #include <linux/perf_event.h>
55 #include <linux/kprobes.h>
56 #include <linux/pipe_fs_i.h>
57 #include <linux/oom.h>
58 
59 #include <asm/uaccess.h>
60 #include <asm/processor.h>
61 
62 #ifdef CONFIG_X86
63 #include <asm/nmi.h>
64 #include <asm/stacktrace.h>
65 #include <asm/io.h>
66 #endif
67 #ifdef CONFIG_BSD_PROCESS_ACCT
68 #include <linux/acct.h>
69 #endif
70 #ifdef CONFIG_RT_MUTEXES
71 #include <linux/rtmutex.h>
72 #endif
73 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
74 #include <linux/lockdep.h>
75 #endif
76 #ifdef CONFIG_CHR_DEV_SG
77 #include <scsi/sg.h>
78 #endif
79 
80 #ifdef CONFIG_LOCKUP_DETECTOR
81 #include <linux/nmi.h>
82 #endif
83 
84 
85 #if defined(CONFIG_SYSCTL)
86 
87 /* External variables not in a header file. */
88 extern int sysctl_overcommit_memory;
89 extern int sysctl_overcommit_ratio;
90 extern int max_threads;
91 extern int core_uses_pid;
92 extern int suid_dumpable;
93 extern char core_pattern[];
94 extern unsigned int core_pipe_limit;
95 extern int pid_max;
96 extern int min_free_kbytes;
97 extern int pid_max_min, pid_max_max;
98 extern int sysctl_drop_caches;
99 extern int percpu_pagelist_fraction;
100 extern int compat_log;
101 extern int latencytop_enabled;
102 extern int sysctl_nr_open_min, sysctl_nr_open_max;
103 #ifndef CONFIG_MMU
104 extern int sysctl_nr_trim_pages;
105 #endif
106 #ifdef CONFIG_BLOCK
107 extern int blk_iopoll_enabled;
108 #endif
109 
110 /* Constants used for minimum and  maximum */
111 #ifdef CONFIG_LOCKUP_DETECTOR
112 static int sixty = 60;
113 static int neg_one = -1;
114 #endif
115 
116 static int zero;
117 static int __maybe_unused one = 1;
118 static int __maybe_unused two = 2;
119 static unsigned long one_ul = 1;
120 static int one_hundred = 100;
121 #ifdef CONFIG_PRINTK
122 static int ten_thousand = 10000;
123 #endif
124 
125 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
126 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
127 
128 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
129 static int maxolduid = 65535;
130 static int minolduid;
131 static int min_percpu_pagelist_fract = 8;
132 
133 static int ngroups_max = NGROUPS_MAX;
134 
135 #ifdef CONFIG_INOTIFY_USER
136 #include <linux/inotify.h>
137 #endif
138 #ifdef CONFIG_SPARC
139 #include <asm/system.h>
140 #endif
141 
142 #ifdef CONFIG_SPARC64
143 extern int sysctl_tsb_ratio;
144 #endif
145 
146 #ifdef __hppa__
147 extern int pwrsw_enabled;
148 extern int unaligned_enabled;
149 #endif
150 
151 #ifdef CONFIG_S390
152 #ifdef CONFIG_MATHEMU
153 extern int sysctl_ieee_emulation_warnings;
154 #endif
155 extern int sysctl_userprocess_debug;
156 extern int spin_retry;
157 #endif
158 
159 #ifdef CONFIG_IA64
160 extern int no_unaligned_warning;
161 extern int unaligned_dump_stack;
162 #endif
163 
164 #ifdef CONFIG_PROC_SYSCTL
165 static int proc_do_cad_pid(struct ctl_table *table, int write,
166 		  void __user *buffer, size_t *lenp, loff_t *ppos);
167 static int proc_taint(struct ctl_table *table, int write,
168 			       void __user *buffer, size_t *lenp, loff_t *ppos);
169 #endif
170 
171 #ifdef CONFIG_MAGIC_SYSRQ
172 static int __sysrq_enabled; /* Note: sysrq code ises it's own private copy */
173 
174 static int sysrq_sysctl_handler(ctl_table *table, int write,
175 				void __user *buffer, size_t *lenp,
176 				loff_t *ppos)
177 {
178 	int error;
179 
180 	error = proc_dointvec(table, write, buffer, lenp, ppos);
181 	if (error)
182 		return error;
183 
184 	if (write)
185 		sysrq_toggle_support(__sysrq_enabled);
186 
187 	return 0;
188 }
189 
190 #endif
191 
192 static struct ctl_table root_table[];
193 static struct ctl_table_root sysctl_table_root;
194 static struct ctl_table_header root_table_header = {
195 	.count = 1,
196 	.ctl_table = root_table,
197 	.ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),
198 	.root = &sysctl_table_root,
199 	.set = &sysctl_table_root.default_set,
200 };
201 static struct ctl_table_root sysctl_table_root = {
202 	.root_list = LIST_HEAD_INIT(sysctl_table_root.root_list),
203 	.default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry),
204 };
205 
206 static struct ctl_table kern_table[];
207 static struct ctl_table vm_table[];
208 static struct ctl_table fs_table[];
209 static struct ctl_table debug_table[];
210 static struct ctl_table dev_table[];
211 extern struct ctl_table random_table[];
212 #ifdef CONFIG_EPOLL
213 extern struct ctl_table epoll_table[];
214 #endif
215 
216 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
217 int sysctl_legacy_va_layout;
218 #endif
219 
220 /* The default sysctl tables: */
221 
222 static struct ctl_table root_table[] = {
223 	{
224 		.procname	= "kernel",
225 		.mode		= 0555,
226 		.child		= kern_table,
227 	},
228 	{
229 		.procname	= "vm",
230 		.mode		= 0555,
231 		.child		= vm_table,
232 	},
233 	{
234 		.procname	= "fs",
235 		.mode		= 0555,
236 		.child		= fs_table,
237 	},
238 	{
239 		.procname	= "debug",
240 		.mode		= 0555,
241 		.child		= debug_table,
242 	},
243 	{
244 		.procname	= "dev",
245 		.mode		= 0555,
246 		.child		= dev_table,
247 	},
248 /*
249  * NOTE: do not add new entries to this table unless you have read
250  * Documentation/sysctl/ctl_unnumbered.txt
251  */
252 	{ }
253 };
254 
255 #ifdef CONFIG_SCHED_DEBUG
256 static int min_sched_granularity_ns = 100000;		/* 100 usecs */
257 static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
258 static int min_wakeup_granularity_ns;			/* 0 usecs */
259 static int max_wakeup_granularity_ns = NSEC_PER_SEC;	/* 1 second */
260 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
261 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
262 #endif
263 
264 #ifdef CONFIG_COMPACTION
265 static int min_extfrag_threshold;
266 static int max_extfrag_threshold = 1000;
267 #endif
268 
269 static struct ctl_table kern_table[] = {
270 	{
271 		.procname	= "sched_child_runs_first",
272 		.data		= &sysctl_sched_child_runs_first,
273 		.maxlen		= sizeof(unsigned int),
274 		.mode		= 0644,
275 		.proc_handler	= proc_dointvec,
276 	},
277 #ifdef CONFIG_SCHED_DEBUG
278 	{
279 		.procname	= "sched_min_granularity_ns",
280 		.data		= &sysctl_sched_min_granularity,
281 		.maxlen		= sizeof(unsigned int),
282 		.mode		= 0644,
283 		.proc_handler	= sched_proc_update_handler,
284 		.extra1		= &min_sched_granularity_ns,
285 		.extra2		= &max_sched_granularity_ns,
286 	},
287 	{
288 		.procname	= "sched_latency_ns",
289 		.data		= &sysctl_sched_latency,
290 		.maxlen		= sizeof(unsigned int),
291 		.mode		= 0644,
292 		.proc_handler	= sched_proc_update_handler,
293 		.extra1		= &min_sched_granularity_ns,
294 		.extra2		= &max_sched_granularity_ns,
295 	},
296 	{
297 		.procname	= "sched_wakeup_granularity_ns",
298 		.data		= &sysctl_sched_wakeup_granularity,
299 		.maxlen		= sizeof(unsigned int),
300 		.mode		= 0644,
301 		.proc_handler	= sched_proc_update_handler,
302 		.extra1		= &min_wakeup_granularity_ns,
303 		.extra2		= &max_wakeup_granularity_ns,
304 	},
305 	{
306 		.procname	= "sched_tunable_scaling",
307 		.data		= &sysctl_sched_tunable_scaling,
308 		.maxlen		= sizeof(enum sched_tunable_scaling),
309 		.mode		= 0644,
310 		.proc_handler	= sched_proc_update_handler,
311 		.extra1		= &min_sched_tunable_scaling,
312 		.extra2		= &max_sched_tunable_scaling,
313 	},
314 	{
315 		.procname	= "sched_migration_cost",
316 		.data		= &sysctl_sched_migration_cost,
317 		.maxlen		= sizeof(unsigned int),
318 		.mode		= 0644,
319 		.proc_handler	= proc_dointvec,
320 	},
321 	{
322 		.procname	= "sched_nr_migrate",
323 		.data		= &sysctl_sched_nr_migrate,
324 		.maxlen		= sizeof(unsigned int),
325 		.mode		= 0644,
326 		.proc_handler	= proc_dointvec,
327 	},
328 	{
329 		.procname	= "sched_time_avg",
330 		.data		= &sysctl_sched_time_avg,
331 		.maxlen		= sizeof(unsigned int),
332 		.mode		= 0644,
333 		.proc_handler	= proc_dointvec,
334 	},
335 	{
336 		.procname	= "sched_shares_window",
337 		.data		= &sysctl_sched_shares_window,
338 		.maxlen		= sizeof(unsigned int),
339 		.mode		= 0644,
340 		.proc_handler	= proc_dointvec,
341 	},
342 	{
343 		.procname	= "timer_migration",
344 		.data		= &sysctl_timer_migration,
345 		.maxlen		= sizeof(unsigned int),
346 		.mode		= 0644,
347 		.proc_handler	= proc_dointvec_minmax,
348 		.extra1		= &zero,
349 		.extra2		= &one,
350 	},
351 #endif
352 	{
353 		.procname	= "sched_rt_period_us",
354 		.data		= &sysctl_sched_rt_period,
355 		.maxlen		= sizeof(unsigned int),
356 		.mode		= 0644,
357 		.proc_handler	= sched_rt_handler,
358 	},
359 	{
360 		.procname	= "sched_rt_runtime_us",
361 		.data		= &sysctl_sched_rt_runtime,
362 		.maxlen		= sizeof(int),
363 		.mode		= 0644,
364 		.proc_handler	= sched_rt_handler,
365 	},
366 	{
367 		.procname	= "sched_compat_yield",
368 		.data		= &sysctl_sched_compat_yield,
369 		.maxlen		= sizeof(unsigned int),
370 		.mode		= 0644,
371 		.proc_handler	= proc_dointvec,
372 	},
373 #ifdef CONFIG_SCHED_AUTOGROUP
374 	{
375 		.procname	= "sched_autogroup_enabled",
376 		.data		= &sysctl_sched_autogroup_enabled,
377 		.maxlen		= sizeof(unsigned int),
378 		.mode		= 0644,
379 		.proc_handler	= proc_dointvec,
380 		.extra1		= &zero,
381 		.extra2		= &one,
382 	},
383 #endif
384 #ifdef CONFIG_PROVE_LOCKING
385 	{
386 		.procname	= "prove_locking",
387 		.data		= &prove_locking,
388 		.maxlen		= sizeof(int),
389 		.mode		= 0644,
390 		.proc_handler	= proc_dointvec,
391 	},
392 #endif
393 #ifdef CONFIG_LOCK_STAT
394 	{
395 		.procname	= "lock_stat",
396 		.data		= &lock_stat,
397 		.maxlen		= sizeof(int),
398 		.mode		= 0644,
399 		.proc_handler	= proc_dointvec,
400 	},
401 #endif
402 	{
403 		.procname	= "panic",
404 		.data		= &panic_timeout,
405 		.maxlen		= sizeof(int),
406 		.mode		= 0644,
407 		.proc_handler	= proc_dointvec,
408 	},
409 	{
410 		.procname	= "core_uses_pid",
411 		.data		= &core_uses_pid,
412 		.maxlen		= sizeof(int),
413 		.mode		= 0644,
414 		.proc_handler	= proc_dointvec,
415 	},
416 	{
417 		.procname	= "core_pattern",
418 		.data		= core_pattern,
419 		.maxlen		= CORENAME_MAX_SIZE,
420 		.mode		= 0644,
421 		.proc_handler	= proc_dostring,
422 	},
423 	{
424 		.procname	= "core_pipe_limit",
425 		.data		= &core_pipe_limit,
426 		.maxlen		= sizeof(unsigned int),
427 		.mode		= 0644,
428 		.proc_handler	= proc_dointvec,
429 	},
430 #ifdef CONFIG_PROC_SYSCTL
431 	{
432 		.procname	= "tainted",
433 		.maxlen 	= sizeof(long),
434 		.mode		= 0644,
435 		.proc_handler	= proc_taint,
436 	},
437 #endif
438 #ifdef CONFIG_LATENCYTOP
439 	{
440 		.procname	= "latencytop",
441 		.data		= &latencytop_enabled,
442 		.maxlen		= sizeof(int),
443 		.mode		= 0644,
444 		.proc_handler	= proc_dointvec,
445 	},
446 #endif
447 #ifdef CONFIG_BLK_DEV_INITRD
448 	{
449 		.procname	= "real-root-dev",
450 		.data		= &real_root_dev,
451 		.maxlen		= sizeof(int),
452 		.mode		= 0644,
453 		.proc_handler	= proc_dointvec,
454 	},
455 #endif
456 	{
457 		.procname	= "print-fatal-signals",
458 		.data		= &print_fatal_signals,
459 		.maxlen		= sizeof(int),
460 		.mode		= 0644,
461 		.proc_handler	= proc_dointvec,
462 	},
463 #ifdef CONFIG_SPARC
464 	{
465 		.procname	= "reboot-cmd",
466 		.data		= reboot_command,
467 		.maxlen		= 256,
468 		.mode		= 0644,
469 		.proc_handler	= proc_dostring,
470 	},
471 	{
472 		.procname	= "stop-a",
473 		.data		= &stop_a_enabled,
474 		.maxlen		= sizeof (int),
475 		.mode		= 0644,
476 		.proc_handler	= proc_dointvec,
477 	},
478 	{
479 		.procname	= "scons-poweroff",
480 		.data		= &scons_pwroff,
481 		.maxlen		= sizeof (int),
482 		.mode		= 0644,
483 		.proc_handler	= proc_dointvec,
484 	},
485 #endif
486 #ifdef CONFIG_SPARC64
487 	{
488 		.procname	= "tsb-ratio",
489 		.data		= &sysctl_tsb_ratio,
490 		.maxlen		= sizeof (int),
491 		.mode		= 0644,
492 		.proc_handler	= proc_dointvec,
493 	},
494 #endif
495 #ifdef __hppa__
496 	{
497 		.procname	= "soft-power",
498 		.data		= &pwrsw_enabled,
499 		.maxlen		= sizeof (int),
500 	 	.mode		= 0644,
501 		.proc_handler	= proc_dointvec,
502 	},
503 	{
504 		.procname	= "unaligned-trap",
505 		.data		= &unaligned_enabled,
506 		.maxlen		= sizeof (int),
507 		.mode		= 0644,
508 		.proc_handler	= proc_dointvec,
509 	},
510 #endif
511 	{
512 		.procname	= "ctrl-alt-del",
513 		.data		= &C_A_D,
514 		.maxlen		= sizeof(int),
515 		.mode		= 0644,
516 		.proc_handler	= proc_dointvec,
517 	},
518 #ifdef CONFIG_FUNCTION_TRACER
519 	{
520 		.procname	= "ftrace_enabled",
521 		.data		= &ftrace_enabled,
522 		.maxlen		= sizeof(int),
523 		.mode		= 0644,
524 		.proc_handler	= ftrace_enable_sysctl,
525 	},
526 #endif
527 #ifdef CONFIG_STACK_TRACER
528 	{
529 		.procname	= "stack_tracer_enabled",
530 		.data		= &stack_tracer_enabled,
531 		.maxlen		= sizeof(int),
532 		.mode		= 0644,
533 		.proc_handler	= stack_trace_sysctl,
534 	},
535 #endif
536 #ifdef CONFIG_TRACING
537 	{
538 		.procname	= "ftrace_dump_on_oops",
539 		.data		= &ftrace_dump_on_oops,
540 		.maxlen		= sizeof(int),
541 		.mode		= 0644,
542 		.proc_handler	= proc_dointvec,
543 	},
544 #endif
545 #ifdef CONFIG_MODULES
546 	{
547 		.procname	= "modprobe",
548 		.data		= &modprobe_path,
549 		.maxlen		= KMOD_PATH_LEN,
550 		.mode		= 0644,
551 		.proc_handler	= proc_dostring,
552 	},
553 	{
554 		.procname	= "modules_disabled",
555 		.data		= &modules_disabled,
556 		.maxlen		= sizeof(int),
557 		.mode		= 0644,
558 		/* only handle a transition from default "0" to "1" */
559 		.proc_handler	= proc_dointvec_minmax,
560 		.extra1		= &one,
561 		.extra2		= &one,
562 	},
563 #endif
564 #ifdef CONFIG_HOTPLUG
565 	{
566 		.procname	= "hotplug",
567 		.data		= &uevent_helper,
568 		.maxlen		= UEVENT_HELPER_PATH_LEN,
569 		.mode		= 0644,
570 		.proc_handler	= proc_dostring,
571 	},
572 #endif
573 #ifdef CONFIG_CHR_DEV_SG
574 	{
575 		.procname	= "sg-big-buff",
576 		.data		= &sg_big_buff,
577 		.maxlen		= sizeof (int),
578 		.mode		= 0444,
579 		.proc_handler	= proc_dointvec,
580 	},
581 #endif
582 #ifdef CONFIG_BSD_PROCESS_ACCT
583 	{
584 		.procname	= "acct",
585 		.data		= &acct_parm,
586 		.maxlen		= 3*sizeof(int),
587 		.mode		= 0644,
588 		.proc_handler	= proc_dointvec,
589 	},
590 #endif
591 #ifdef CONFIG_MAGIC_SYSRQ
592 	{
593 		.procname	= "sysrq",
594 		.data		= &__sysrq_enabled,
595 		.maxlen		= sizeof (int),
596 		.mode		= 0644,
597 		.proc_handler	= sysrq_sysctl_handler,
598 	},
599 #endif
600 #ifdef CONFIG_PROC_SYSCTL
601 	{
602 		.procname	= "cad_pid",
603 		.data		= NULL,
604 		.maxlen		= sizeof (int),
605 		.mode		= 0600,
606 		.proc_handler	= proc_do_cad_pid,
607 	},
608 #endif
609 	{
610 		.procname	= "threads-max",
611 		.data		= &max_threads,
612 		.maxlen		= sizeof(int),
613 		.mode		= 0644,
614 		.proc_handler	= proc_dointvec,
615 	},
616 	{
617 		.procname	= "random",
618 		.mode		= 0555,
619 		.child		= random_table,
620 	},
621 	{
622 		.procname	= "overflowuid",
623 		.data		= &overflowuid,
624 		.maxlen		= sizeof(int),
625 		.mode		= 0644,
626 		.proc_handler	= proc_dointvec_minmax,
627 		.extra1		= &minolduid,
628 		.extra2		= &maxolduid,
629 	},
630 	{
631 		.procname	= "overflowgid",
632 		.data		= &overflowgid,
633 		.maxlen		= sizeof(int),
634 		.mode		= 0644,
635 		.proc_handler	= proc_dointvec_minmax,
636 		.extra1		= &minolduid,
637 		.extra2		= &maxolduid,
638 	},
639 #ifdef CONFIG_S390
640 #ifdef CONFIG_MATHEMU
641 	{
642 		.procname	= "ieee_emulation_warnings",
643 		.data		= &sysctl_ieee_emulation_warnings,
644 		.maxlen		= sizeof(int),
645 		.mode		= 0644,
646 		.proc_handler	= proc_dointvec,
647 	},
648 #endif
649 	{
650 		.procname	= "userprocess_debug",
651 		.data		= &show_unhandled_signals,
652 		.maxlen		= sizeof(int),
653 		.mode		= 0644,
654 		.proc_handler	= proc_dointvec,
655 	},
656 #endif
657 	{
658 		.procname	= "pid_max",
659 		.data		= &pid_max,
660 		.maxlen		= sizeof (int),
661 		.mode		= 0644,
662 		.proc_handler	= proc_dointvec_minmax,
663 		.extra1		= &pid_max_min,
664 		.extra2		= &pid_max_max,
665 	},
666 	{
667 		.procname	= "panic_on_oops",
668 		.data		= &panic_on_oops,
669 		.maxlen		= sizeof(int),
670 		.mode		= 0644,
671 		.proc_handler	= proc_dointvec,
672 	},
673 #if defined CONFIG_PRINTK
674 	{
675 		.procname	= "printk",
676 		.data		= &console_loglevel,
677 		.maxlen		= 4*sizeof(int),
678 		.mode		= 0644,
679 		.proc_handler	= proc_dointvec,
680 	},
681 	{
682 		.procname	= "printk_ratelimit",
683 		.data		= &printk_ratelimit_state.interval,
684 		.maxlen		= sizeof(int),
685 		.mode		= 0644,
686 		.proc_handler	= proc_dointvec_jiffies,
687 	},
688 	{
689 		.procname	= "printk_ratelimit_burst",
690 		.data		= &printk_ratelimit_state.burst,
691 		.maxlen		= sizeof(int),
692 		.mode		= 0644,
693 		.proc_handler	= proc_dointvec,
694 	},
695 	{
696 		.procname	= "printk_delay",
697 		.data		= &printk_delay_msec,
698 		.maxlen		= sizeof(int),
699 		.mode		= 0644,
700 		.proc_handler	= proc_dointvec_minmax,
701 		.extra1		= &zero,
702 		.extra2		= &ten_thousand,
703 	},
704 	{
705 		.procname	= "dmesg_restrict",
706 		.data		= &dmesg_restrict,
707 		.maxlen		= sizeof(int),
708 		.mode		= 0644,
709 		.proc_handler	= proc_dointvec_minmax,
710 		.extra1		= &zero,
711 		.extra2		= &one,
712 	},
713 #endif
714 	{
715 		.procname	= "ngroups_max",
716 		.data		= &ngroups_max,
717 		.maxlen		= sizeof (int),
718 		.mode		= 0444,
719 		.proc_handler	= proc_dointvec,
720 	},
721 #if defined(CONFIG_LOCKUP_DETECTOR)
722 	{
723 		.procname       = "watchdog",
724 		.data           = &watchdog_enabled,
725 		.maxlen         = sizeof (int),
726 		.mode           = 0644,
727 		.proc_handler   = proc_dowatchdog_enabled,
728 	},
729 	{
730 		.procname	= "watchdog_thresh",
731 		.data		= &softlockup_thresh,
732 		.maxlen		= sizeof(int),
733 		.mode		= 0644,
734 		.proc_handler	= proc_dowatchdog_thresh,
735 		.extra1		= &neg_one,
736 		.extra2		= &sixty,
737 	},
738 	{
739 		.procname	= "softlockup_panic",
740 		.data		= &softlockup_panic,
741 		.maxlen		= sizeof(int),
742 		.mode		= 0644,
743 		.proc_handler	= proc_dointvec_minmax,
744 		.extra1		= &zero,
745 		.extra2		= &one,
746 	},
747 	{
748 		.procname       = "nmi_watchdog",
749 		.data           = &watchdog_enabled,
750 		.maxlen         = sizeof (int),
751 		.mode           = 0644,
752 		.proc_handler   = proc_dowatchdog_enabled,
753 	},
754 #endif
755 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
756 	{
757 		.procname       = "unknown_nmi_panic",
758 		.data           = &unknown_nmi_panic,
759 		.maxlen         = sizeof (int),
760 		.mode           = 0644,
761 		.proc_handler   = proc_dointvec,
762 	},
763 #endif
764 #if defined(CONFIG_X86)
765 	{
766 		.procname	= "panic_on_unrecovered_nmi",
767 		.data		= &panic_on_unrecovered_nmi,
768 		.maxlen		= sizeof(int),
769 		.mode		= 0644,
770 		.proc_handler	= proc_dointvec,
771 	},
772 	{
773 		.procname	= "panic_on_io_nmi",
774 		.data		= &panic_on_io_nmi,
775 		.maxlen		= sizeof(int),
776 		.mode		= 0644,
777 		.proc_handler	= proc_dointvec,
778 	},
779 	{
780 		.procname	= "bootloader_type",
781 		.data		= &bootloader_type,
782 		.maxlen		= sizeof (int),
783 		.mode		= 0444,
784 		.proc_handler	= proc_dointvec,
785 	},
786 	{
787 		.procname	= "bootloader_version",
788 		.data		= &bootloader_version,
789 		.maxlen		= sizeof (int),
790 		.mode		= 0444,
791 		.proc_handler	= proc_dointvec,
792 	},
793 	{
794 		.procname	= "kstack_depth_to_print",
795 		.data		= &kstack_depth_to_print,
796 		.maxlen		= sizeof(int),
797 		.mode		= 0644,
798 		.proc_handler	= proc_dointvec,
799 	},
800 	{
801 		.procname	= "io_delay_type",
802 		.data		= &io_delay_type,
803 		.maxlen		= sizeof(int),
804 		.mode		= 0644,
805 		.proc_handler	= proc_dointvec,
806 	},
807 #endif
808 #if defined(CONFIG_MMU)
809 	{
810 		.procname	= "randomize_va_space",
811 		.data		= &randomize_va_space,
812 		.maxlen		= sizeof(int),
813 		.mode		= 0644,
814 		.proc_handler	= proc_dointvec,
815 	},
816 #endif
817 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
818 	{
819 		.procname	= "spin_retry",
820 		.data		= &spin_retry,
821 		.maxlen		= sizeof (int),
822 		.mode		= 0644,
823 		.proc_handler	= proc_dointvec,
824 	},
825 #endif
826 #if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
827 	{
828 		.procname	= "acpi_video_flags",
829 		.data		= &acpi_realmode_flags,
830 		.maxlen		= sizeof (unsigned long),
831 		.mode		= 0644,
832 		.proc_handler	= proc_doulongvec_minmax,
833 	},
834 #endif
835 #ifdef CONFIG_IA64
836 	{
837 		.procname	= "ignore-unaligned-usertrap",
838 		.data		= &no_unaligned_warning,
839 		.maxlen		= sizeof (int),
840 	 	.mode		= 0644,
841 		.proc_handler	= proc_dointvec,
842 	},
843 	{
844 		.procname	= "unaligned-dump-stack",
845 		.data		= &unaligned_dump_stack,
846 		.maxlen		= sizeof (int),
847 		.mode		= 0644,
848 		.proc_handler	= proc_dointvec,
849 	},
850 #endif
851 #ifdef CONFIG_DETECT_HUNG_TASK
852 	{
853 		.procname	= "hung_task_panic",
854 		.data		= &sysctl_hung_task_panic,
855 		.maxlen		= sizeof(int),
856 		.mode		= 0644,
857 		.proc_handler	= proc_dointvec_minmax,
858 		.extra1		= &zero,
859 		.extra2		= &one,
860 	},
861 	{
862 		.procname	= "hung_task_check_count",
863 		.data		= &sysctl_hung_task_check_count,
864 		.maxlen		= sizeof(unsigned long),
865 		.mode		= 0644,
866 		.proc_handler	= proc_doulongvec_minmax,
867 	},
868 	{
869 		.procname	= "hung_task_timeout_secs",
870 		.data		= &sysctl_hung_task_timeout_secs,
871 		.maxlen		= sizeof(unsigned long),
872 		.mode		= 0644,
873 		.proc_handler	= proc_dohung_task_timeout_secs,
874 	},
875 	{
876 		.procname	= "hung_task_warnings",
877 		.data		= &sysctl_hung_task_warnings,
878 		.maxlen		= sizeof(unsigned long),
879 		.mode		= 0644,
880 		.proc_handler	= proc_doulongvec_minmax,
881 	},
882 #endif
883 #ifdef CONFIG_COMPAT
884 	{
885 		.procname	= "compat-log",
886 		.data		= &compat_log,
887 		.maxlen		= sizeof (int),
888 	 	.mode		= 0644,
889 		.proc_handler	= proc_dointvec,
890 	},
891 #endif
892 #ifdef CONFIG_RT_MUTEXES
893 	{
894 		.procname	= "max_lock_depth",
895 		.data		= &max_lock_depth,
896 		.maxlen		= sizeof(int),
897 		.mode		= 0644,
898 		.proc_handler	= proc_dointvec,
899 	},
900 #endif
901 	{
902 		.procname	= "poweroff_cmd",
903 		.data		= &poweroff_cmd,
904 		.maxlen		= POWEROFF_CMD_PATH_LEN,
905 		.mode		= 0644,
906 		.proc_handler	= proc_dostring,
907 	},
908 #ifdef CONFIG_KEYS
909 	{
910 		.procname	= "keys",
911 		.mode		= 0555,
912 		.child		= key_sysctls,
913 	},
914 #endif
915 #ifdef CONFIG_RCU_TORTURE_TEST
916 	{
917 		.procname       = "rcutorture_runnable",
918 		.data           = &rcutorture_runnable,
919 		.maxlen         = sizeof(int),
920 		.mode           = 0644,
921 		.proc_handler	= proc_dointvec,
922 	},
923 #endif
924 #ifdef CONFIG_PERF_EVENTS
925 	{
926 		.procname	= "perf_event_paranoid",
927 		.data		= &sysctl_perf_event_paranoid,
928 		.maxlen		= sizeof(sysctl_perf_event_paranoid),
929 		.mode		= 0644,
930 		.proc_handler	= proc_dointvec,
931 	},
932 	{
933 		.procname	= "perf_event_mlock_kb",
934 		.data		= &sysctl_perf_event_mlock,
935 		.maxlen		= sizeof(sysctl_perf_event_mlock),
936 		.mode		= 0644,
937 		.proc_handler	= proc_dointvec,
938 	},
939 	{
940 		.procname	= "perf_event_max_sample_rate",
941 		.data		= &sysctl_perf_event_sample_rate,
942 		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
943 		.mode		= 0644,
944 		.proc_handler	= proc_dointvec,
945 	},
946 #endif
947 #ifdef CONFIG_KMEMCHECK
948 	{
949 		.procname	= "kmemcheck",
950 		.data		= &kmemcheck_enabled,
951 		.maxlen		= sizeof(int),
952 		.mode		= 0644,
953 		.proc_handler	= proc_dointvec,
954 	},
955 #endif
956 #ifdef CONFIG_BLOCK
957 	{
958 		.procname	= "blk_iopoll",
959 		.data		= &blk_iopoll_enabled,
960 		.maxlen		= sizeof(int),
961 		.mode		= 0644,
962 		.proc_handler	= proc_dointvec,
963 	},
964 #endif
965 /*
966  * NOTE: do not add new entries to this table unless you have read
967  * Documentation/sysctl/ctl_unnumbered.txt
968  */
969 	{ }
970 };
971 
972 static struct ctl_table vm_table[] = {
973 	{
974 		.procname	= "overcommit_memory",
975 		.data		= &sysctl_overcommit_memory,
976 		.maxlen		= sizeof(sysctl_overcommit_memory),
977 		.mode		= 0644,
978 		.proc_handler	= proc_dointvec,
979 	},
980 	{
981 		.procname	= "panic_on_oom",
982 		.data		= &sysctl_panic_on_oom,
983 		.maxlen		= sizeof(sysctl_panic_on_oom),
984 		.mode		= 0644,
985 		.proc_handler	= proc_dointvec,
986 	},
987 	{
988 		.procname	= "oom_kill_allocating_task",
989 		.data		= &sysctl_oom_kill_allocating_task,
990 		.maxlen		= sizeof(sysctl_oom_kill_allocating_task),
991 		.mode		= 0644,
992 		.proc_handler	= proc_dointvec,
993 	},
994 	{
995 		.procname	= "oom_dump_tasks",
996 		.data		= &sysctl_oom_dump_tasks,
997 		.maxlen		= sizeof(sysctl_oom_dump_tasks),
998 		.mode		= 0644,
999 		.proc_handler	= proc_dointvec,
1000 	},
1001 	{
1002 		.procname	= "overcommit_ratio",
1003 		.data		= &sysctl_overcommit_ratio,
1004 		.maxlen		= sizeof(sysctl_overcommit_ratio),
1005 		.mode		= 0644,
1006 		.proc_handler	= proc_dointvec,
1007 	},
1008 	{
1009 		.procname	= "page-cluster",
1010 		.data		= &page_cluster,
1011 		.maxlen		= sizeof(int),
1012 		.mode		= 0644,
1013 		.proc_handler	= proc_dointvec,
1014 	},
1015 	{
1016 		.procname	= "dirty_background_ratio",
1017 		.data		= &dirty_background_ratio,
1018 		.maxlen		= sizeof(dirty_background_ratio),
1019 		.mode		= 0644,
1020 		.proc_handler	= dirty_background_ratio_handler,
1021 		.extra1		= &zero,
1022 		.extra2		= &one_hundred,
1023 	},
1024 	{
1025 		.procname	= "dirty_background_bytes",
1026 		.data		= &dirty_background_bytes,
1027 		.maxlen		= sizeof(dirty_background_bytes),
1028 		.mode		= 0644,
1029 		.proc_handler	= dirty_background_bytes_handler,
1030 		.extra1		= &one_ul,
1031 	},
1032 	{
1033 		.procname	= "dirty_ratio",
1034 		.data		= &vm_dirty_ratio,
1035 		.maxlen		= sizeof(vm_dirty_ratio),
1036 		.mode		= 0644,
1037 		.proc_handler	= dirty_ratio_handler,
1038 		.extra1		= &zero,
1039 		.extra2		= &one_hundred,
1040 	},
1041 	{
1042 		.procname	= "dirty_bytes",
1043 		.data		= &vm_dirty_bytes,
1044 		.maxlen		= sizeof(vm_dirty_bytes),
1045 		.mode		= 0644,
1046 		.proc_handler	= dirty_bytes_handler,
1047 		.extra1		= &dirty_bytes_min,
1048 	},
1049 	{
1050 		.procname	= "dirty_writeback_centisecs",
1051 		.data		= &dirty_writeback_interval,
1052 		.maxlen		= sizeof(dirty_writeback_interval),
1053 		.mode		= 0644,
1054 		.proc_handler	= dirty_writeback_centisecs_handler,
1055 	},
1056 	{
1057 		.procname	= "dirty_expire_centisecs",
1058 		.data		= &dirty_expire_interval,
1059 		.maxlen		= sizeof(dirty_expire_interval),
1060 		.mode		= 0644,
1061 		.proc_handler	= proc_dointvec,
1062 	},
1063 	{
1064 		.procname	= "nr_pdflush_threads",
1065 		.data		= &nr_pdflush_threads,
1066 		.maxlen		= sizeof nr_pdflush_threads,
1067 		.mode		= 0444 /* read-only*/,
1068 		.proc_handler	= proc_dointvec,
1069 	},
1070 	{
1071 		.procname	= "swappiness",
1072 		.data		= &vm_swappiness,
1073 		.maxlen		= sizeof(vm_swappiness),
1074 		.mode		= 0644,
1075 		.proc_handler	= proc_dointvec_minmax,
1076 		.extra1		= &zero,
1077 		.extra2		= &one_hundred,
1078 	},
1079 #ifdef CONFIG_HUGETLB_PAGE
1080 	{
1081 		.procname	= "nr_hugepages",
1082 		.data		= NULL,
1083 		.maxlen		= sizeof(unsigned long),
1084 		.mode		= 0644,
1085 		.proc_handler	= hugetlb_sysctl_handler,
1086 		.extra1		= (void *)&hugetlb_zero,
1087 		.extra2		= (void *)&hugetlb_infinity,
1088 	},
1089 #ifdef CONFIG_NUMA
1090 	{
1091 		.procname       = "nr_hugepages_mempolicy",
1092 		.data           = NULL,
1093 		.maxlen         = sizeof(unsigned long),
1094 		.mode           = 0644,
1095 		.proc_handler   = &hugetlb_mempolicy_sysctl_handler,
1096 		.extra1		= (void *)&hugetlb_zero,
1097 		.extra2		= (void *)&hugetlb_infinity,
1098 	},
1099 #endif
1100 	 {
1101 		.procname	= "hugetlb_shm_group",
1102 		.data		= &sysctl_hugetlb_shm_group,
1103 		.maxlen		= sizeof(gid_t),
1104 		.mode		= 0644,
1105 		.proc_handler	= proc_dointvec,
1106 	 },
1107 	 {
1108 		.procname	= "hugepages_treat_as_movable",
1109 		.data		= &hugepages_treat_as_movable,
1110 		.maxlen		= sizeof(int),
1111 		.mode		= 0644,
1112 		.proc_handler	= hugetlb_treat_movable_handler,
1113 	},
1114 	{
1115 		.procname	= "nr_overcommit_hugepages",
1116 		.data		= NULL,
1117 		.maxlen		= sizeof(unsigned long),
1118 		.mode		= 0644,
1119 		.proc_handler	= hugetlb_overcommit_handler,
1120 		.extra1		= (void *)&hugetlb_zero,
1121 		.extra2		= (void *)&hugetlb_infinity,
1122 	},
1123 #endif
1124 	{
1125 		.procname	= "lowmem_reserve_ratio",
1126 		.data		= &sysctl_lowmem_reserve_ratio,
1127 		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
1128 		.mode		= 0644,
1129 		.proc_handler	= lowmem_reserve_ratio_sysctl_handler,
1130 	},
1131 	{
1132 		.procname	= "drop_caches",
1133 		.data		= &sysctl_drop_caches,
1134 		.maxlen		= sizeof(int),
1135 		.mode		= 0644,
1136 		.proc_handler	= drop_caches_sysctl_handler,
1137 	},
1138 #ifdef CONFIG_COMPACTION
1139 	{
1140 		.procname	= "compact_memory",
1141 		.data		= &sysctl_compact_memory,
1142 		.maxlen		= sizeof(int),
1143 		.mode		= 0200,
1144 		.proc_handler	= sysctl_compaction_handler,
1145 	},
1146 	{
1147 		.procname	= "extfrag_threshold",
1148 		.data		= &sysctl_extfrag_threshold,
1149 		.maxlen		= sizeof(int),
1150 		.mode		= 0644,
1151 		.proc_handler	= sysctl_extfrag_handler,
1152 		.extra1		= &min_extfrag_threshold,
1153 		.extra2		= &max_extfrag_threshold,
1154 	},
1155 
1156 #endif /* CONFIG_COMPACTION */
1157 	{
1158 		.procname	= "min_free_kbytes",
1159 		.data		= &min_free_kbytes,
1160 		.maxlen		= sizeof(min_free_kbytes),
1161 		.mode		= 0644,
1162 		.proc_handler	= min_free_kbytes_sysctl_handler,
1163 		.extra1		= &zero,
1164 	},
1165 	{
1166 		.procname	= "percpu_pagelist_fraction",
1167 		.data		= &percpu_pagelist_fraction,
1168 		.maxlen		= sizeof(percpu_pagelist_fraction),
1169 		.mode		= 0644,
1170 		.proc_handler	= percpu_pagelist_fraction_sysctl_handler,
1171 		.extra1		= &min_percpu_pagelist_fract,
1172 	},
1173 #ifdef CONFIG_MMU
1174 	{
1175 		.procname	= "max_map_count",
1176 		.data		= &sysctl_max_map_count,
1177 		.maxlen		= sizeof(sysctl_max_map_count),
1178 		.mode		= 0644,
1179 		.proc_handler	= proc_dointvec_minmax,
1180 		.extra1		= &zero,
1181 	},
1182 #else
1183 	{
1184 		.procname	= "nr_trim_pages",
1185 		.data		= &sysctl_nr_trim_pages,
1186 		.maxlen		= sizeof(sysctl_nr_trim_pages),
1187 		.mode		= 0644,
1188 		.proc_handler	= proc_dointvec_minmax,
1189 		.extra1		= &zero,
1190 	},
1191 #endif
1192 	{
1193 		.procname	= "laptop_mode",
1194 		.data		= &laptop_mode,
1195 		.maxlen		= sizeof(laptop_mode),
1196 		.mode		= 0644,
1197 		.proc_handler	= proc_dointvec_jiffies,
1198 	},
1199 	{
1200 		.procname	= "block_dump",
1201 		.data		= &block_dump,
1202 		.maxlen		= sizeof(block_dump),
1203 		.mode		= 0644,
1204 		.proc_handler	= proc_dointvec,
1205 		.extra1		= &zero,
1206 	},
1207 	{
1208 		.procname	= "vfs_cache_pressure",
1209 		.data		= &sysctl_vfs_cache_pressure,
1210 		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
1211 		.mode		= 0644,
1212 		.proc_handler	= proc_dointvec,
1213 		.extra1		= &zero,
1214 	},
1215 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1216 	{
1217 		.procname	= "legacy_va_layout",
1218 		.data		= &sysctl_legacy_va_layout,
1219 		.maxlen		= sizeof(sysctl_legacy_va_layout),
1220 		.mode		= 0644,
1221 		.proc_handler	= proc_dointvec,
1222 		.extra1		= &zero,
1223 	},
1224 #endif
1225 #ifdef CONFIG_NUMA
1226 	{
1227 		.procname	= "zone_reclaim_mode",
1228 		.data		= &zone_reclaim_mode,
1229 		.maxlen		= sizeof(zone_reclaim_mode),
1230 		.mode		= 0644,
1231 		.proc_handler	= proc_dointvec,
1232 		.extra1		= &zero,
1233 	},
1234 	{
1235 		.procname	= "min_unmapped_ratio",
1236 		.data		= &sysctl_min_unmapped_ratio,
1237 		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
1238 		.mode		= 0644,
1239 		.proc_handler	= sysctl_min_unmapped_ratio_sysctl_handler,
1240 		.extra1		= &zero,
1241 		.extra2		= &one_hundred,
1242 	},
1243 	{
1244 		.procname	= "min_slab_ratio",
1245 		.data		= &sysctl_min_slab_ratio,
1246 		.maxlen		= sizeof(sysctl_min_slab_ratio),
1247 		.mode		= 0644,
1248 		.proc_handler	= sysctl_min_slab_ratio_sysctl_handler,
1249 		.extra1		= &zero,
1250 		.extra2		= &one_hundred,
1251 	},
1252 #endif
1253 #ifdef CONFIG_SMP
1254 	{
1255 		.procname	= "stat_interval",
1256 		.data		= &sysctl_stat_interval,
1257 		.maxlen		= sizeof(sysctl_stat_interval),
1258 		.mode		= 0644,
1259 		.proc_handler	= proc_dointvec_jiffies,
1260 	},
1261 #endif
1262 #ifdef CONFIG_MMU
1263 	{
1264 		.procname	= "mmap_min_addr",
1265 		.data		= &dac_mmap_min_addr,
1266 		.maxlen		= sizeof(unsigned long),
1267 		.mode		= 0644,
1268 		.proc_handler	= mmap_min_addr_handler,
1269 	},
1270 #endif
1271 #ifdef CONFIG_NUMA
1272 	{
1273 		.procname	= "numa_zonelist_order",
1274 		.data		= &numa_zonelist_order,
1275 		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
1276 		.mode		= 0644,
1277 		.proc_handler	= numa_zonelist_order_handler,
1278 	},
1279 #endif
1280 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1281    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1282 	{
1283 		.procname	= "vdso_enabled",
1284 		.data		= &vdso_enabled,
1285 		.maxlen		= sizeof(vdso_enabled),
1286 		.mode		= 0644,
1287 		.proc_handler	= proc_dointvec,
1288 		.extra1		= &zero,
1289 	},
1290 #endif
1291 #ifdef CONFIG_HIGHMEM
1292 	{
1293 		.procname	= "highmem_is_dirtyable",
1294 		.data		= &vm_highmem_is_dirtyable,
1295 		.maxlen		= sizeof(vm_highmem_is_dirtyable),
1296 		.mode		= 0644,
1297 		.proc_handler	= proc_dointvec_minmax,
1298 		.extra1		= &zero,
1299 		.extra2		= &one,
1300 	},
1301 #endif
1302 	{
1303 		.procname	= "scan_unevictable_pages",
1304 		.data		= &scan_unevictable_pages,
1305 		.maxlen		= sizeof(scan_unevictable_pages),
1306 		.mode		= 0644,
1307 		.proc_handler	= scan_unevictable_handler,
1308 	},
1309 #ifdef CONFIG_MEMORY_FAILURE
1310 	{
1311 		.procname	= "memory_failure_early_kill",
1312 		.data		= &sysctl_memory_failure_early_kill,
1313 		.maxlen		= sizeof(sysctl_memory_failure_early_kill),
1314 		.mode		= 0644,
1315 		.proc_handler	= proc_dointvec_minmax,
1316 		.extra1		= &zero,
1317 		.extra2		= &one,
1318 	},
1319 	{
1320 		.procname	= "memory_failure_recovery",
1321 		.data		= &sysctl_memory_failure_recovery,
1322 		.maxlen		= sizeof(sysctl_memory_failure_recovery),
1323 		.mode		= 0644,
1324 		.proc_handler	= proc_dointvec_minmax,
1325 		.extra1		= &zero,
1326 		.extra2		= &one,
1327 	},
1328 #endif
1329 
1330 /*
1331  * NOTE: do not add new entries to this table unless you have read
1332  * Documentation/sysctl/ctl_unnumbered.txt
1333  */
1334 	{ }
1335 };
1336 
1337 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1338 static struct ctl_table binfmt_misc_table[] = {
1339 	{ }
1340 };
1341 #endif
1342 
1343 static struct ctl_table fs_table[] = {
1344 	{
1345 		.procname	= "inode-nr",
1346 		.data		= &inodes_stat,
1347 		.maxlen		= 2*sizeof(int),
1348 		.mode		= 0444,
1349 		.proc_handler	= proc_nr_inodes,
1350 	},
1351 	{
1352 		.procname	= "inode-state",
1353 		.data		= &inodes_stat,
1354 		.maxlen		= 7*sizeof(int),
1355 		.mode		= 0444,
1356 		.proc_handler	= proc_nr_inodes,
1357 	},
1358 	{
1359 		.procname	= "file-nr",
1360 		.data		= &files_stat,
1361 		.maxlen		= sizeof(files_stat),
1362 		.mode		= 0444,
1363 		.proc_handler	= proc_nr_files,
1364 	},
1365 	{
1366 		.procname	= "file-max",
1367 		.data		= &files_stat.max_files,
1368 		.maxlen		= sizeof(files_stat.max_files),
1369 		.mode		= 0644,
1370 		.proc_handler	= proc_doulongvec_minmax,
1371 	},
1372 	{
1373 		.procname	= "nr_open",
1374 		.data		= &sysctl_nr_open,
1375 		.maxlen		= sizeof(int),
1376 		.mode		= 0644,
1377 		.proc_handler	= proc_dointvec_minmax,
1378 		.extra1		= &sysctl_nr_open_min,
1379 		.extra2		= &sysctl_nr_open_max,
1380 	},
1381 	{
1382 		.procname	= "dentry-state",
1383 		.data		= &dentry_stat,
1384 		.maxlen		= 6*sizeof(int),
1385 		.mode		= 0444,
1386 		.proc_handler	= proc_nr_dentry,
1387 	},
1388 	{
1389 		.procname	= "overflowuid",
1390 		.data		= &fs_overflowuid,
1391 		.maxlen		= sizeof(int),
1392 		.mode		= 0644,
1393 		.proc_handler	= proc_dointvec_minmax,
1394 		.extra1		= &minolduid,
1395 		.extra2		= &maxolduid,
1396 	},
1397 	{
1398 		.procname	= "overflowgid",
1399 		.data		= &fs_overflowgid,
1400 		.maxlen		= sizeof(int),
1401 		.mode		= 0644,
1402 		.proc_handler	= proc_dointvec_minmax,
1403 		.extra1		= &minolduid,
1404 		.extra2		= &maxolduid,
1405 	},
1406 #ifdef CONFIG_FILE_LOCKING
1407 	{
1408 		.procname	= "leases-enable",
1409 		.data		= &leases_enable,
1410 		.maxlen		= sizeof(int),
1411 		.mode		= 0644,
1412 		.proc_handler	= proc_dointvec,
1413 	},
1414 #endif
1415 #ifdef CONFIG_DNOTIFY
1416 	{
1417 		.procname	= "dir-notify-enable",
1418 		.data		= &dir_notify_enable,
1419 		.maxlen		= sizeof(int),
1420 		.mode		= 0644,
1421 		.proc_handler	= proc_dointvec,
1422 	},
1423 #endif
1424 #ifdef CONFIG_MMU
1425 #ifdef CONFIG_FILE_LOCKING
1426 	{
1427 		.procname	= "lease-break-time",
1428 		.data		= &lease_break_time,
1429 		.maxlen		= sizeof(int),
1430 		.mode		= 0644,
1431 		.proc_handler	= proc_dointvec,
1432 	},
1433 #endif
1434 #ifdef CONFIG_AIO
1435 	{
1436 		.procname	= "aio-nr",
1437 		.data		= &aio_nr,
1438 		.maxlen		= sizeof(aio_nr),
1439 		.mode		= 0444,
1440 		.proc_handler	= proc_doulongvec_minmax,
1441 	},
1442 	{
1443 		.procname	= "aio-max-nr",
1444 		.data		= &aio_max_nr,
1445 		.maxlen		= sizeof(aio_max_nr),
1446 		.mode		= 0644,
1447 		.proc_handler	= proc_doulongvec_minmax,
1448 	},
1449 #endif /* CONFIG_AIO */
1450 #ifdef CONFIG_INOTIFY_USER
1451 	{
1452 		.procname	= "inotify",
1453 		.mode		= 0555,
1454 		.child		= inotify_table,
1455 	},
1456 #endif
1457 #ifdef CONFIG_EPOLL
1458 	{
1459 		.procname	= "epoll",
1460 		.mode		= 0555,
1461 		.child		= epoll_table,
1462 	},
1463 #endif
1464 #endif
1465 	{
1466 		.procname	= "suid_dumpable",
1467 		.data		= &suid_dumpable,
1468 		.maxlen		= sizeof(int),
1469 		.mode		= 0644,
1470 		.proc_handler	= proc_dointvec_minmax,
1471 		.extra1		= &zero,
1472 		.extra2		= &two,
1473 	},
1474 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1475 	{
1476 		.procname	= "binfmt_misc",
1477 		.mode		= 0555,
1478 		.child		= binfmt_misc_table,
1479 	},
1480 #endif
1481 	{
1482 		.procname	= "pipe-max-size",
1483 		.data		= &pipe_max_size,
1484 		.maxlen		= sizeof(int),
1485 		.mode		= 0644,
1486 		.proc_handler	= &pipe_proc_fn,
1487 		.extra1		= &pipe_min_size,
1488 	},
1489 /*
1490  * NOTE: do not add new entries to this table unless you have read
1491  * Documentation/sysctl/ctl_unnumbered.txt
1492  */
1493 	{ }
1494 };
1495 
1496 static struct ctl_table debug_table[] = {
1497 #if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) || \
1498     defined(CONFIG_S390)
1499 	{
1500 		.procname	= "exception-trace",
1501 		.data		= &show_unhandled_signals,
1502 		.maxlen		= sizeof(int),
1503 		.mode		= 0644,
1504 		.proc_handler	= proc_dointvec
1505 	},
1506 #endif
1507 #if defined(CONFIG_OPTPROBES)
1508 	{
1509 		.procname	= "kprobes-optimization",
1510 		.data		= &sysctl_kprobes_optimization,
1511 		.maxlen		= sizeof(int),
1512 		.mode		= 0644,
1513 		.proc_handler	= proc_kprobes_optimization_handler,
1514 		.extra1		= &zero,
1515 		.extra2		= &one,
1516 	},
1517 #endif
1518 	{ }
1519 };
1520 
1521 static struct ctl_table dev_table[] = {
1522 	{ }
1523 };
1524 
1525 static DEFINE_SPINLOCK(sysctl_lock);
1526 
1527 /* called under sysctl_lock */
1528 static int use_table(struct ctl_table_header *p)
1529 {
1530 	if (unlikely(p->unregistering))
1531 		return 0;
1532 	p->used++;
1533 	return 1;
1534 }
1535 
1536 /* called under sysctl_lock */
1537 static void unuse_table(struct ctl_table_header *p)
1538 {
1539 	if (!--p->used)
1540 		if (unlikely(p->unregistering))
1541 			complete(p->unregistering);
1542 }
1543 
1544 /* called under sysctl_lock, will reacquire if has to wait */
1545 static void start_unregistering(struct ctl_table_header *p)
1546 {
1547 	/*
1548 	 * if p->used is 0, nobody will ever touch that entry again;
1549 	 * we'll eliminate all paths to it before dropping sysctl_lock
1550 	 */
1551 	if (unlikely(p->used)) {
1552 		struct completion wait;
1553 		init_completion(&wait);
1554 		p->unregistering = &wait;
1555 		spin_unlock(&sysctl_lock);
1556 		wait_for_completion(&wait);
1557 		spin_lock(&sysctl_lock);
1558 	} else {
1559 		/* anything non-NULL; we'll never dereference it */
1560 		p->unregistering = ERR_PTR(-EINVAL);
1561 	}
1562 	/*
1563 	 * do not remove from the list until nobody holds it; walking the
1564 	 * list in do_sysctl() relies on that.
1565 	 */
1566 	list_del_init(&p->ctl_entry);
1567 }
1568 
1569 void sysctl_head_get(struct ctl_table_header *head)
1570 {
1571 	spin_lock(&sysctl_lock);
1572 	head->count++;
1573 	spin_unlock(&sysctl_lock);
1574 }
1575 
1576 void sysctl_head_put(struct ctl_table_header *head)
1577 {
1578 	spin_lock(&sysctl_lock);
1579 	if (!--head->count)
1580 		kfree(head);
1581 	spin_unlock(&sysctl_lock);
1582 }
1583 
1584 struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
1585 {
1586 	if (!head)
1587 		BUG();
1588 	spin_lock(&sysctl_lock);
1589 	if (!use_table(head))
1590 		head = ERR_PTR(-ENOENT);
1591 	spin_unlock(&sysctl_lock);
1592 	return head;
1593 }
1594 
1595 void sysctl_head_finish(struct ctl_table_header *head)
1596 {
1597 	if (!head)
1598 		return;
1599 	spin_lock(&sysctl_lock);
1600 	unuse_table(head);
1601 	spin_unlock(&sysctl_lock);
1602 }
1603 
1604 static struct ctl_table_set *
1605 lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
1606 {
1607 	struct ctl_table_set *set = &root->default_set;
1608 	if (root->lookup)
1609 		set = root->lookup(root, namespaces);
1610 	return set;
1611 }
1612 
1613 static struct list_head *
1614 lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces)
1615 {
1616 	struct ctl_table_set *set = lookup_header_set(root, namespaces);
1617 	return &set->list;
1618 }
1619 
1620 struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces,
1621 					    struct ctl_table_header *prev)
1622 {
1623 	struct ctl_table_root *root;
1624 	struct list_head *header_list;
1625 	struct ctl_table_header *head;
1626 	struct list_head *tmp;
1627 
1628 	spin_lock(&sysctl_lock);
1629 	if (prev) {
1630 		head = prev;
1631 		tmp = &prev->ctl_entry;
1632 		unuse_table(prev);
1633 		goto next;
1634 	}
1635 	tmp = &root_table_header.ctl_entry;
1636 	for (;;) {
1637 		head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1638 
1639 		if (!use_table(head))
1640 			goto next;
1641 		spin_unlock(&sysctl_lock);
1642 		return head;
1643 	next:
1644 		root = head->root;
1645 		tmp = tmp->next;
1646 		header_list = lookup_header_list(root, namespaces);
1647 		if (tmp != header_list)
1648 			continue;
1649 
1650 		do {
1651 			root = list_entry(root->root_list.next,
1652 					struct ctl_table_root, root_list);
1653 			if (root == &sysctl_table_root)
1654 				goto out;
1655 			header_list = lookup_header_list(root, namespaces);
1656 		} while (list_empty(header_list));
1657 		tmp = header_list->next;
1658 	}
1659 out:
1660 	spin_unlock(&sysctl_lock);
1661 	return NULL;
1662 }
1663 
1664 struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1665 {
1666 	return __sysctl_head_next(current->nsproxy, prev);
1667 }
1668 
1669 void register_sysctl_root(struct ctl_table_root *root)
1670 {
1671 	spin_lock(&sysctl_lock);
1672 	list_add_tail(&root->root_list, &sysctl_table_root.root_list);
1673 	spin_unlock(&sysctl_lock);
1674 }
1675 
1676 /*
1677  * sysctl_perm does NOT grant the superuser all rights automatically, because
1678  * some sysctl variables are readonly even to root.
1679  */
1680 
1681 static int test_perm(int mode, int op)
1682 {
1683 	if (!current_euid())
1684 		mode >>= 6;
1685 	else if (in_egroup_p(0))
1686 		mode >>= 3;
1687 	if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
1688 		return 0;
1689 	return -EACCES;
1690 }
1691 
1692 int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
1693 {
1694 	int error;
1695 	int mode;
1696 
1697 	error = security_sysctl(table, op & (MAY_READ | MAY_WRITE | MAY_EXEC));
1698 	if (error)
1699 		return error;
1700 
1701 	if (root->permissions)
1702 		mode = root->permissions(root, current->nsproxy, table);
1703 	else
1704 		mode = table->mode;
1705 
1706 	return test_perm(mode, op);
1707 }
1708 
1709 static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1710 {
1711 	for (; table->procname; table++) {
1712 		table->parent = parent;
1713 		if (table->child)
1714 			sysctl_set_parent(table, table->child);
1715 	}
1716 }
1717 
1718 static __init int sysctl_init(void)
1719 {
1720 	sysctl_set_parent(NULL, root_table);
1721 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1722 	sysctl_check_table(current->nsproxy, root_table);
1723 #endif
1724 	return 0;
1725 }
1726 
1727 core_initcall(sysctl_init);
1728 
1729 static struct ctl_table *is_branch_in(struct ctl_table *branch,
1730 				      struct ctl_table *table)
1731 {
1732 	struct ctl_table *p;
1733 	const char *s = branch->procname;
1734 
1735 	/* branch should have named subdirectory as its first element */
1736 	if (!s || !branch->child)
1737 		return NULL;
1738 
1739 	/* ... and nothing else */
1740 	if (branch[1].procname)
1741 		return NULL;
1742 
1743 	/* table should contain subdirectory with the same name */
1744 	for (p = table; p->procname; p++) {
1745 		if (!p->child)
1746 			continue;
1747 		if (p->procname && strcmp(p->procname, s) == 0)
1748 			return p;
1749 	}
1750 	return NULL;
1751 }
1752 
1753 /* see if attaching q to p would be an improvement */
1754 static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
1755 {
1756 	struct ctl_table *to = p->ctl_table, *by = q->ctl_table;
1757 	struct ctl_table *next;
1758 	int is_better = 0;
1759 	int not_in_parent = !p->attached_by;
1760 
1761 	while ((next = is_branch_in(by, to)) != NULL) {
1762 		if (by == q->attached_by)
1763 			is_better = 1;
1764 		if (to == p->attached_by)
1765 			not_in_parent = 1;
1766 		by = by->child;
1767 		to = next->child;
1768 	}
1769 
1770 	if (is_better && not_in_parent) {
1771 		q->attached_by = by;
1772 		q->attached_to = to;
1773 		q->parent = p;
1774 	}
1775 }
1776 
1777 /**
1778  * __register_sysctl_paths - register a sysctl hierarchy
1779  * @root: List of sysctl headers to register on
1780  * @namespaces: Data to compute which lists of sysctl entries are visible
1781  * @path: The path to the directory the sysctl table is in.
1782  * @table: the top-level table structure
1783  *
1784  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1785  * array. A completely 0 filled entry terminates the table.
1786  *
1787  * The members of the &struct ctl_table structure are used as follows:
1788  *
1789  * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1790  *            enter a sysctl file
1791  *
1792  * data - a pointer to data for use by proc_handler
1793  *
1794  * maxlen - the maximum size in bytes of the data
1795  *
1796  * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1797  *
1798  * child - a pointer to the child sysctl table if this entry is a directory, or
1799  *         %NULL.
1800  *
1801  * proc_handler - the text handler routine (described below)
1802  *
1803  * de - for internal use by the sysctl routines
1804  *
1805  * extra1, extra2 - extra pointers usable by the proc handler routines
1806  *
1807  * Leaf nodes in the sysctl tree will be represented by a single file
1808  * under /proc; non-leaf nodes will be represented by directories.
1809  *
1810  * sysctl(2) can automatically manage read and write requests through
1811  * the sysctl table.  The data and maxlen fields of the ctl_table
1812  * struct enable minimal validation of the values being written to be
1813  * performed, and the mode field allows minimal authentication.
1814  *
1815  * There must be a proc_handler routine for any terminal nodes
1816  * mirrored under /proc/sys (non-terminals are handled by a built-in
1817  * directory handler).  Several default handlers are available to
1818  * cover common cases -
1819  *
1820  * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1821  * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1822  * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1823  *
1824  * It is the handler's job to read the input buffer from user memory
1825  * and process it. The handler should return 0 on success.
1826  *
1827  * This routine returns %NULL on a failure to register, and a pointer
1828  * to the table header on success.
1829  */
1830 struct ctl_table_header *__register_sysctl_paths(
1831 	struct ctl_table_root *root,
1832 	struct nsproxy *namespaces,
1833 	const struct ctl_path *path, struct ctl_table *table)
1834 {
1835 	struct ctl_table_header *header;
1836 	struct ctl_table *new, **prevp;
1837 	unsigned int n, npath;
1838 	struct ctl_table_set *set;
1839 
1840 	/* Count the path components */
1841 	for (npath = 0; path[npath].procname; ++npath)
1842 		;
1843 
1844 	/*
1845 	 * For each path component, allocate a 2-element ctl_table array.
1846 	 * The first array element will be filled with the sysctl entry
1847 	 * for this, the second will be the sentinel (procname == 0).
1848 	 *
1849 	 * We allocate everything in one go so that we don't have to
1850 	 * worry about freeing additional memory in unregister_sysctl_table.
1851 	 */
1852 	header = kzalloc(sizeof(struct ctl_table_header) +
1853 			 (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL);
1854 	if (!header)
1855 		return NULL;
1856 
1857 	new = (struct ctl_table *) (header + 1);
1858 
1859 	/* Now connect the dots */
1860 	prevp = &header->ctl_table;
1861 	for (n = 0; n < npath; ++n, ++path) {
1862 		/* Copy the procname */
1863 		new->procname = path->procname;
1864 		new->mode     = 0555;
1865 
1866 		*prevp = new;
1867 		prevp = &new->child;
1868 
1869 		new += 2;
1870 	}
1871 	*prevp = table;
1872 	header->ctl_table_arg = table;
1873 
1874 	INIT_LIST_HEAD(&header->ctl_entry);
1875 	header->used = 0;
1876 	header->unregistering = NULL;
1877 	header->root = root;
1878 	sysctl_set_parent(NULL, header->ctl_table);
1879 	header->count = 1;
1880 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1881 	if (sysctl_check_table(namespaces, header->ctl_table)) {
1882 		kfree(header);
1883 		return NULL;
1884 	}
1885 #endif
1886 	spin_lock(&sysctl_lock);
1887 	header->set = lookup_header_set(root, namespaces);
1888 	header->attached_by = header->ctl_table;
1889 	header->attached_to = root_table;
1890 	header->parent = &root_table_header;
1891 	for (set = header->set; set; set = set->parent) {
1892 		struct ctl_table_header *p;
1893 		list_for_each_entry(p, &set->list, ctl_entry) {
1894 			if (p->unregistering)
1895 				continue;
1896 			try_attach(p, header);
1897 		}
1898 	}
1899 	header->parent->count++;
1900 	list_add_tail(&header->ctl_entry, &header->set->list);
1901 	spin_unlock(&sysctl_lock);
1902 
1903 	return header;
1904 }
1905 
1906 /**
1907  * register_sysctl_table_path - register a sysctl table hierarchy
1908  * @path: The path to the directory the sysctl table is in.
1909  * @table: the top-level table structure
1910  *
1911  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1912  * array. A completely 0 filled entry terminates the table.
1913  *
1914  * See __register_sysctl_paths for more details.
1915  */
1916 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
1917 						struct ctl_table *table)
1918 {
1919 	return __register_sysctl_paths(&sysctl_table_root, current->nsproxy,
1920 					path, table);
1921 }
1922 
1923 /**
1924  * register_sysctl_table - register a sysctl table hierarchy
1925  * @table: the top-level table structure
1926  *
1927  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1928  * array. A completely 0 filled entry terminates the table.
1929  *
1930  * See register_sysctl_paths for more details.
1931  */
1932 struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
1933 {
1934 	static const struct ctl_path null_path[] = { {} };
1935 
1936 	return register_sysctl_paths(null_path, table);
1937 }
1938 
1939 /**
1940  * unregister_sysctl_table - unregister a sysctl table hierarchy
1941  * @header: the header returned from register_sysctl_table
1942  *
1943  * Unregisters the sysctl table and all children. proc entries may not
1944  * actually be removed until they are no longer used by anyone.
1945  */
1946 void unregister_sysctl_table(struct ctl_table_header * header)
1947 {
1948 	might_sleep();
1949 
1950 	if (header == NULL)
1951 		return;
1952 
1953 	spin_lock(&sysctl_lock);
1954 	start_unregistering(header);
1955 	if (!--header->parent->count) {
1956 		WARN_ON(1);
1957 		kfree(header->parent);
1958 	}
1959 	if (!--header->count)
1960 		kfree(header);
1961 	spin_unlock(&sysctl_lock);
1962 }
1963 
1964 int sysctl_is_seen(struct ctl_table_header *p)
1965 {
1966 	struct ctl_table_set *set = p->set;
1967 	int res;
1968 	spin_lock(&sysctl_lock);
1969 	if (p->unregistering)
1970 		res = 0;
1971 	else if (!set->is_seen)
1972 		res = 1;
1973 	else
1974 		res = set->is_seen(set);
1975 	spin_unlock(&sysctl_lock);
1976 	return res;
1977 }
1978 
1979 void setup_sysctl_set(struct ctl_table_set *p,
1980 	struct ctl_table_set *parent,
1981 	int (*is_seen)(struct ctl_table_set *))
1982 {
1983 	INIT_LIST_HEAD(&p->list);
1984 	p->parent = parent ? parent : &sysctl_table_root.default_set;
1985 	p->is_seen = is_seen;
1986 }
1987 
1988 #else /* !CONFIG_SYSCTL */
1989 struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
1990 {
1991 	return NULL;
1992 }
1993 
1994 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
1995 						    struct ctl_table *table)
1996 {
1997 	return NULL;
1998 }
1999 
2000 void unregister_sysctl_table(struct ctl_table_header * table)
2001 {
2002 }
2003 
2004 void setup_sysctl_set(struct ctl_table_set *p,
2005 	struct ctl_table_set *parent,
2006 	int (*is_seen)(struct ctl_table_set *))
2007 {
2008 }
2009 
2010 void sysctl_head_put(struct ctl_table_header *head)
2011 {
2012 }
2013 
2014 #endif /* CONFIG_SYSCTL */
2015 
2016 /*
2017  * /proc/sys support
2018  */
2019 
2020 #ifdef CONFIG_PROC_SYSCTL
2021 
2022 static int _proc_do_string(void* data, int maxlen, int write,
2023 			   void __user *buffer,
2024 			   size_t *lenp, loff_t *ppos)
2025 {
2026 	size_t len;
2027 	char __user *p;
2028 	char c;
2029 
2030 	if (!data || !maxlen || !*lenp) {
2031 		*lenp = 0;
2032 		return 0;
2033 	}
2034 
2035 	if (write) {
2036 		len = 0;
2037 		p = buffer;
2038 		while (len < *lenp) {
2039 			if (get_user(c, p++))
2040 				return -EFAULT;
2041 			if (c == 0 || c == '\n')
2042 				break;
2043 			len++;
2044 		}
2045 		if (len >= maxlen)
2046 			len = maxlen-1;
2047 		if(copy_from_user(data, buffer, len))
2048 			return -EFAULT;
2049 		((char *) data)[len] = 0;
2050 		*ppos += *lenp;
2051 	} else {
2052 		len = strlen(data);
2053 		if (len > maxlen)
2054 			len = maxlen;
2055 
2056 		if (*ppos > len) {
2057 			*lenp = 0;
2058 			return 0;
2059 		}
2060 
2061 		data += *ppos;
2062 		len  -= *ppos;
2063 
2064 		if (len > *lenp)
2065 			len = *lenp;
2066 		if (len)
2067 			if(copy_to_user(buffer, data, len))
2068 				return -EFAULT;
2069 		if (len < *lenp) {
2070 			if(put_user('\n', ((char __user *) buffer) + len))
2071 				return -EFAULT;
2072 			len++;
2073 		}
2074 		*lenp = len;
2075 		*ppos += len;
2076 	}
2077 	return 0;
2078 }
2079 
2080 /**
2081  * proc_dostring - read a string sysctl
2082  * @table: the sysctl table
2083  * @write: %TRUE if this is a write to the sysctl file
2084  * @buffer: the user buffer
2085  * @lenp: the size of the user buffer
2086  * @ppos: file position
2087  *
2088  * Reads/writes a string from/to the user buffer. If the kernel
2089  * buffer provided is not large enough to hold the string, the
2090  * string is truncated. The copied string is %NULL-terminated.
2091  * If the string is being read by the user process, it is copied
2092  * and a newline '\n' is added. It is truncated if the buffer is
2093  * not large enough.
2094  *
2095  * Returns 0 on success.
2096  */
2097 int proc_dostring(struct ctl_table *table, int write,
2098 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2099 {
2100 	return _proc_do_string(table->data, table->maxlen, write,
2101 			       buffer, lenp, ppos);
2102 }
2103 
2104 static size_t proc_skip_spaces(char **buf)
2105 {
2106 	size_t ret;
2107 	char *tmp = skip_spaces(*buf);
2108 	ret = tmp - *buf;
2109 	*buf = tmp;
2110 	return ret;
2111 }
2112 
2113 static void proc_skip_char(char **buf, size_t *size, const char v)
2114 {
2115 	while (*size) {
2116 		if (**buf != v)
2117 			break;
2118 		(*size)--;
2119 		(*buf)++;
2120 	}
2121 }
2122 
2123 #define TMPBUFLEN 22
2124 /**
2125  * proc_get_long - reads an ASCII formatted integer from a user buffer
2126  *
2127  * @buf: a kernel buffer
2128  * @size: size of the kernel buffer
2129  * @val: this is where the number will be stored
2130  * @neg: set to %TRUE if number is negative
2131  * @perm_tr: a vector which contains the allowed trailers
2132  * @perm_tr_len: size of the perm_tr vector
2133  * @tr: pointer to store the trailer character
2134  *
2135  * In case of success %0 is returned and @buf and @size are updated with
2136  * the amount of bytes read. If @tr is non-NULL and a trailing
2137  * character exists (size is non-zero after returning from this
2138  * function), @tr is updated with the trailing character.
2139  */
2140 static int proc_get_long(char **buf, size_t *size,
2141 			  unsigned long *val, bool *neg,
2142 			  const char *perm_tr, unsigned perm_tr_len, char *tr)
2143 {
2144 	int len;
2145 	char *p, tmp[TMPBUFLEN];
2146 
2147 	if (!*size)
2148 		return -EINVAL;
2149 
2150 	len = *size;
2151 	if (len > TMPBUFLEN - 1)
2152 		len = TMPBUFLEN - 1;
2153 
2154 	memcpy(tmp, *buf, len);
2155 
2156 	tmp[len] = 0;
2157 	p = tmp;
2158 	if (*p == '-' && *size > 1) {
2159 		*neg = true;
2160 		p++;
2161 	} else
2162 		*neg = false;
2163 	if (!isdigit(*p))
2164 		return -EINVAL;
2165 
2166 	*val = simple_strtoul(p, &p, 0);
2167 
2168 	len = p - tmp;
2169 
2170 	/* We don't know if the next char is whitespace thus we may accept
2171 	 * invalid integers (e.g. 1234...a) or two integers instead of one
2172 	 * (e.g. 123...1). So lets not allow such large numbers. */
2173 	if (len == TMPBUFLEN - 1)
2174 		return -EINVAL;
2175 
2176 	if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
2177 		return -EINVAL;
2178 
2179 	if (tr && (len < *size))
2180 		*tr = *p;
2181 
2182 	*buf += len;
2183 	*size -= len;
2184 
2185 	return 0;
2186 }
2187 
2188 /**
2189  * proc_put_long - converts an integer to a decimal ASCII formatted string
2190  *
2191  * @buf: the user buffer
2192  * @size: the size of the user buffer
2193  * @val: the integer to be converted
2194  * @neg: sign of the number, %TRUE for negative
2195  *
2196  * In case of success %0 is returned and @buf and @size are updated with
2197  * the amount of bytes written.
2198  */
2199 static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
2200 			  bool neg)
2201 {
2202 	int len;
2203 	char tmp[TMPBUFLEN], *p = tmp;
2204 
2205 	sprintf(p, "%s%lu", neg ? "-" : "", val);
2206 	len = strlen(tmp);
2207 	if (len > *size)
2208 		len = *size;
2209 	if (copy_to_user(*buf, tmp, len))
2210 		return -EFAULT;
2211 	*size -= len;
2212 	*buf += len;
2213 	return 0;
2214 }
2215 #undef TMPBUFLEN
2216 
2217 static int proc_put_char(void __user **buf, size_t *size, char c)
2218 {
2219 	if (*size) {
2220 		char __user **buffer = (char __user **)buf;
2221 		if (put_user(c, *buffer))
2222 			return -EFAULT;
2223 		(*size)--, (*buffer)++;
2224 		*buf = *buffer;
2225 	}
2226 	return 0;
2227 }
2228 
2229 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
2230 				 int *valp,
2231 				 int write, void *data)
2232 {
2233 	if (write) {
2234 		*valp = *negp ? -*lvalp : *lvalp;
2235 	} else {
2236 		int val = *valp;
2237 		if (val < 0) {
2238 			*negp = true;
2239 			*lvalp = (unsigned long)-val;
2240 		} else {
2241 			*negp = false;
2242 			*lvalp = (unsigned long)val;
2243 		}
2244 	}
2245 	return 0;
2246 }
2247 
2248 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
2249 
2250 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2251 		  int write, void __user *buffer,
2252 		  size_t *lenp, loff_t *ppos,
2253 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2254 			      int write, void *data),
2255 		  void *data)
2256 {
2257 	int *i, vleft, first = 1, err = 0;
2258 	unsigned long page = 0;
2259 	size_t left;
2260 	char *kbuf;
2261 
2262 	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2263 		*lenp = 0;
2264 		return 0;
2265 	}
2266 
2267 	i = (int *) tbl_data;
2268 	vleft = table->maxlen / sizeof(*i);
2269 	left = *lenp;
2270 
2271 	if (!conv)
2272 		conv = do_proc_dointvec_conv;
2273 
2274 	if (write) {
2275 		if (left > PAGE_SIZE - 1)
2276 			left = PAGE_SIZE - 1;
2277 		page = __get_free_page(GFP_TEMPORARY);
2278 		kbuf = (char *) page;
2279 		if (!kbuf)
2280 			return -ENOMEM;
2281 		if (copy_from_user(kbuf, buffer, left)) {
2282 			err = -EFAULT;
2283 			goto free;
2284 		}
2285 		kbuf[left] = 0;
2286 	}
2287 
2288 	for (; left && vleft--; i++, first=0) {
2289 		unsigned long lval;
2290 		bool neg;
2291 
2292 		if (write) {
2293 			left -= proc_skip_spaces(&kbuf);
2294 
2295 			if (!left)
2296 				break;
2297 			err = proc_get_long(&kbuf, &left, &lval, &neg,
2298 					     proc_wspace_sep,
2299 					     sizeof(proc_wspace_sep), NULL);
2300 			if (err)
2301 				break;
2302 			if (conv(&neg, &lval, i, 1, data)) {
2303 				err = -EINVAL;
2304 				break;
2305 			}
2306 		} else {
2307 			if (conv(&neg, &lval, i, 0, data)) {
2308 				err = -EINVAL;
2309 				break;
2310 			}
2311 			if (!first)
2312 				err = proc_put_char(&buffer, &left, '\t');
2313 			if (err)
2314 				break;
2315 			err = proc_put_long(&buffer, &left, lval, neg);
2316 			if (err)
2317 				break;
2318 		}
2319 	}
2320 
2321 	if (!write && !first && left && !err)
2322 		err = proc_put_char(&buffer, &left, '\n');
2323 	if (write && !err && left)
2324 		left -= proc_skip_spaces(&kbuf);
2325 free:
2326 	if (write) {
2327 		free_page(page);
2328 		if (first)
2329 			return err ? : -EINVAL;
2330 	}
2331 	*lenp -= left;
2332 	*ppos += *lenp;
2333 	return err;
2334 }
2335 
2336 static int do_proc_dointvec(struct ctl_table *table, int write,
2337 		  void __user *buffer, size_t *lenp, loff_t *ppos,
2338 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2339 			      int write, void *data),
2340 		  void *data)
2341 {
2342 	return __do_proc_dointvec(table->data, table, write,
2343 			buffer, lenp, ppos, conv, data);
2344 }
2345 
2346 /**
2347  * proc_dointvec - read a vector of integers
2348  * @table: the sysctl table
2349  * @write: %TRUE if this is a write to the sysctl file
2350  * @buffer: the user buffer
2351  * @lenp: the size of the user buffer
2352  * @ppos: file position
2353  *
2354  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2355  * values from/to the user buffer, treated as an ASCII string.
2356  *
2357  * Returns 0 on success.
2358  */
2359 int proc_dointvec(struct ctl_table *table, int write,
2360 		     void __user *buffer, size_t *lenp, loff_t *ppos)
2361 {
2362     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2363 		    	    NULL,NULL);
2364 }
2365 
2366 /*
2367  * Taint values can only be increased
2368  * This means we can safely use a temporary.
2369  */
2370 static int proc_taint(struct ctl_table *table, int write,
2371 			       void __user *buffer, size_t *lenp, loff_t *ppos)
2372 {
2373 	struct ctl_table t;
2374 	unsigned long tmptaint = get_taint();
2375 	int err;
2376 
2377 	if (write && !capable(CAP_SYS_ADMIN))
2378 		return -EPERM;
2379 
2380 	t = *table;
2381 	t.data = &tmptaint;
2382 	err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2383 	if (err < 0)
2384 		return err;
2385 
2386 	if (write) {
2387 		/*
2388 		 * Poor man's atomic or. Not worth adding a primitive
2389 		 * to everyone's atomic.h for this
2390 		 */
2391 		int i;
2392 		for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2393 			if ((tmptaint >> i) & 1)
2394 				add_taint(i);
2395 		}
2396 	}
2397 
2398 	return err;
2399 }
2400 
2401 struct do_proc_dointvec_minmax_conv_param {
2402 	int *min;
2403 	int *max;
2404 };
2405 
2406 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2407 					int *valp,
2408 					int write, void *data)
2409 {
2410 	struct do_proc_dointvec_minmax_conv_param *param = data;
2411 	if (write) {
2412 		int val = *negp ? -*lvalp : *lvalp;
2413 		if ((param->min && *param->min > val) ||
2414 		    (param->max && *param->max < val))
2415 			return -EINVAL;
2416 		*valp = val;
2417 	} else {
2418 		int val = *valp;
2419 		if (val < 0) {
2420 			*negp = true;
2421 			*lvalp = (unsigned long)-val;
2422 		} else {
2423 			*negp = false;
2424 			*lvalp = (unsigned long)val;
2425 		}
2426 	}
2427 	return 0;
2428 }
2429 
2430 /**
2431  * proc_dointvec_minmax - read a vector of integers with min/max values
2432  * @table: the sysctl table
2433  * @write: %TRUE if this is a write to the sysctl file
2434  * @buffer: the user buffer
2435  * @lenp: the size of the user buffer
2436  * @ppos: file position
2437  *
2438  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2439  * values from/to the user buffer, treated as an ASCII string.
2440  *
2441  * This routine will ensure the values are within the range specified by
2442  * table->extra1 (min) and table->extra2 (max).
2443  *
2444  * Returns 0 on success.
2445  */
2446 int proc_dointvec_minmax(struct ctl_table *table, int write,
2447 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2448 {
2449 	struct do_proc_dointvec_minmax_conv_param param = {
2450 		.min = (int *) table->extra1,
2451 		.max = (int *) table->extra2,
2452 	};
2453 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
2454 				do_proc_dointvec_minmax_conv, &param);
2455 }
2456 
2457 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2458 				     void __user *buffer,
2459 				     size_t *lenp, loff_t *ppos,
2460 				     unsigned long convmul,
2461 				     unsigned long convdiv)
2462 {
2463 	unsigned long *i, *min, *max;
2464 	int vleft, first = 1, err = 0;
2465 	unsigned long page = 0;
2466 	size_t left;
2467 	char *kbuf;
2468 
2469 	if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
2470 		*lenp = 0;
2471 		return 0;
2472 	}
2473 
2474 	i = (unsigned long *) data;
2475 	min = (unsigned long *) table->extra1;
2476 	max = (unsigned long *) table->extra2;
2477 	vleft = table->maxlen / sizeof(unsigned long);
2478 	left = *lenp;
2479 
2480 	if (write) {
2481 		if (left > PAGE_SIZE - 1)
2482 			left = PAGE_SIZE - 1;
2483 		page = __get_free_page(GFP_TEMPORARY);
2484 		kbuf = (char *) page;
2485 		if (!kbuf)
2486 			return -ENOMEM;
2487 		if (copy_from_user(kbuf, buffer, left)) {
2488 			err = -EFAULT;
2489 			goto free;
2490 		}
2491 		kbuf[left] = 0;
2492 	}
2493 
2494 	for (; left && vleft--; i++, first = 0) {
2495 		unsigned long val;
2496 
2497 		if (write) {
2498 			bool neg;
2499 
2500 			left -= proc_skip_spaces(&kbuf);
2501 
2502 			err = proc_get_long(&kbuf, &left, &val, &neg,
2503 					     proc_wspace_sep,
2504 					     sizeof(proc_wspace_sep), NULL);
2505 			if (err)
2506 				break;
2507 			if (neg)
2508 				continue;
2509 			if ((min && val < *min) || (max && val > *max))
2510 				continue;
2511 			*i = val;
2512 		} else {
2513 			val = convdiv * (*i) / convmul;
2514 			if (!first)
2515 				err = proc_put_char(&buffer, &left, '\t');
2516 			err = proc_put_long(&buffer, &left, val, false);
2517 			if (err)
2518 				break;
2519 		}
2520 	}
2521 
2522 	if (!write && !first && left && !err)
2523 		err = proc_put_char(&buffer, &left, '\n');
2524 	if (write && !err)
2525 		left -= proc_skip_spaces(&kbuf);
2526 free:
2527 	if (write) {
2528 		free_page(page);
2529 		if (first)
2530 			return err ? : -EINVAL;
2531 	}
2532 	*lenp -= left;
2533 	*ppos += *lenp;
2534 	return err;
2535 }
2536 
2537 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2538 				     void __user *buffer,
2539 				     size_t *lenp, loff_t *ppos,
2540 				     unsigned long convmul,
2541 				     unsigned long convdiv)
2542 {
2543 	return __do_proc_doulongvec_minmax(table->data, table, write,
2544 			buffer, lenp, ppos, convmul, convdiv);
2545 }
2546 
2547 /**
2548  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2549  * @table: the sysctl table
2550  * @write: %TRUE if this is a write to the sysctl file
2551  * @buffer: the user buffer
2552  * @lenp: the size of the user buffer
2553  * @ppos: file position
2554  *
2555  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2556  * values from/to the user buffer, treated as an ASCII string.
2557  *
2558  * This routine will ensure the values are within the range specified by
2559  * table->extra1 (min) and table->extra2 (max).
2560  *
2561  * Returns 0 on success.
2562  */
2563 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2564 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2565 {
2566     return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2567 }
2568 
2569 /**
2570  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2571  * @table: the sysctl table
2572  * @write: %TRUE if this is a write to the sysctl file
2573  * @buffer: the user buffer
2574  * @lenp: the size of the user buffer
2575  * @ppos: file position
2576  *
2577  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2578  * values from/to the user buffer, treated as an ASCII string. The values
2579  * are treated as milliseconds, and converted to jiffies when they are stored.
2580  *
2581  * This routine will ensure the values are within the range specified by
2582  * table->extra1 (min) and table->extra2 (max).
2583  *
2584  * Returns 0 on success.
2585  */
2586 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2587 				      void __user *buffer,
2588 				      size_t *lenp, loff_t *ppos)
2589 {
2590     return do_proc_doulongvec_minmax(table, write, buffer,
2591 				     lenp, ppos, HZ, 1000l);
2592 }
2593 
2594 
2595 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
2596 					 int *valp,
2597 					 int write, void *data)
2598 {
2599 	if (write) {
2600 		if (*lvalp > LONG_MAX / HZ)
2601 			return 1;
2602 		*valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2603 	} else {
2604 		int val = *valp;
2605 		unsigned long lval;
2606 		if (val < 0) {
2607 			*negp = true;
2608 			lval = (unsigned long)-val;
2609 		} else {
2610 			*negp = false;
2611 			lval = (unsigned long)val;
2612 		}
2613 		*lvalp = lval / HZ;
2614 	}
2615 	return 0;
2616 }
2617 
2618 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
2619 						int *valp,
2620 						int write, void *data)
2621 {
2622 	if (write) {
2623 		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2624 			return 1;
2625 		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2626 	} else {
2627 		int val = *valp;
2628 		unsigned long lval;
2629 		if (val < 0) {
2630 			*negp = true;
2631 			lval = (unsigned long)-val;
2632 		} else {
2633 			*negp = false;
2634 			lval = (unsigned long)val;
2635 		}
2636 		*lvalp = jiffies_to_clock_t(lval);
2637 	}
2638 	return 0;
2639 }
2640 
2641 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
2642 					    int *valp,
2643 					    int write, void *data)
2644 {
2645 	if (write) {
2646 		*valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2647 	} else {
2648 		int val = *valp;
2649 		unsigned long lval;
2650 		if (val < 0) {
2651 			*negp = true;
2652 			lval = (unsigned long)-val;
2653 		} else {
2654 			*negp = false;
2655 			lval = (unsigned long)val;
2656 		}
2657 		*lvalp = jiffies_to_msecs(lval);
2658 	}
2659 	return 0;
2660 }
2661 
2662 /**
2663  * proc_dointvec_jiffies - read a vector of integers as seconds
2664  * @table: the sysctl table
2665  * @write: %TRUE if this is a write to the sysctl file
2666  * @buffer: the user buffer
2667  * @lenp: the size of the user buffer
2668  * @ppos: file position
2669  *
2670  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2671  * values from/to the user buffer, treated as an ASCII string.
2672  * The values read are assumed to be in seconds, and are converted into
2673  * jiffies.
2674  *
2675  * Returns 0 on success.
2676  */
2677 int proc_dointvec_jiffies(struct ctl_table *table, int write,
2678 			  void __user *buffer, size_t *lenp, loff_t *ppos)
2679 {
2680     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2681 		    	    do_proc_dointvec_jiffies_conv,NULL);
2682 }
2683 
2684 /**
2685  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2686  * @table: the sysctl table
2687  * @write: %TRUE if this is a write to the sysctl file
2688  * @buffer: the user buffer
2689  * @lenp: the size of the user buffer
2690  * @ppos: pointer to the file position
2691  *
2692  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2693  * values from/to the user buffer, treated as an ASCII string.
2694  * The values read are assumed to be in 1/USER_HZ seconds, and
2695  * are converted into jiffies.
2696  *
2697  * Returns 0 on success.
2698  */
2699 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2700 				 void __user *buffer, size_t *lenp, loff_t *ppos)
2701 {
2702     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2703 		    	    do_proc_dointvec_userhz_jiffies_conv,NULL);
2704 }
2705 
2706 /**
2707  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2708  * @table: the sysctl table
2709  * @write: %TRUE if this is a write to the sysctl file
2710  * @buffer: the user buffer
2711  * @lenp: the size of the user buffer
2712  * @ppos: file position
2713  * @ppos: the current position in the file
2714  *
2715  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2716  * values from/to the user buffer, treated as an ASCII string.
2717  * The values read are assumed to be in 1/1000 seconds, and
2718  * are converted into jiffies.
2719  *
2720  * Returns 0 on success.
2721  */
2722 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2723 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2724 {
2725 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
2726 				do_proc_dointvec_ms_jiffies_conv, NULL);
2727 }
2728 
2729 static int proc_do_cad_pid(struct ctl_table *table, int write,
2730 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2731 {
2732 	struct pid *new_pid;
2733 	pid_t tmp;
2734 	int r;
2735 
2736 	tmp = pid_vnr(cad_pid);
2737 
2738 	r = __do_proc_dointvec(&tmp, table, write, buffer,
2739 			       lenp, ppos, NULL, NULL);
2740 	if (r || !write)
2741 		return r;
2742 
2743 	new_pid = find_get_pid(tmp);
2744 	if (!new_pid)
2745 		return -ESRCH;
2746 
2747 	put_pid(xchg(&cad_pid, new_pid));
2748 	return 0;
2749 }
2750 
2751 /**
2752  * proc_do_large_bitmap - read/write from/to a large bitmap
2753  * @table: the sysctl table
2754  * @write: %TRUE if this is a write to the sysctl file
2755  * @buffer: the user buffer
2756  * @lenp: the size of the user buffer
2757  * @ppos: file position
2758  *
2759  * The bitmap is stored at table->data and the bitmap length (in bits)
2760  * in table->maxlen.
2761  *
2762  * We use a range comma separated format (e.g. 1,3-4,10-10) so that
2763  * large bitmaps may be represented in a compact manner. Writing into
2764  * the file will clear the bitmap then update it with the given input.
2765  *
2766  * Returns 0 on success.
2767  */
2768 int proc_do_large_bitmap(struct ctl_table *table, int write,
2769 			 void __user *buffer, size_t *lenp, loff_t *ppos)
2770 {
2771 	int err = 0;
2772 	bool first = 1;
2773 	size_t left = *lenp;
2774 	unsigned long bitmap_len = table->maxlen;
2775 	unsigned long *bitmap = (unsigned long *) table->data;
2776 	unsigned long *tmp_bitmap = NULL;
2777 	char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
2778 
2779 	if (!bitmap_len || !left || (*ppos && !write)) {
2780 		*lenp = 0;
2781 		return 0;
2782 	}
2783 
2784 	if (write) {
2785 		unsigned long page = 0;
2786 		char *kbuf;
2787 
2788 		if (left > PAGE_SIZE - 1)
2789 			left = PAGE_SIZE - 1;
2790 
2791 		page = __get_free_page(GFP_TEMPORARY);
2792 		kbuf = (char *) page;
2793 		if (!kbuf)
2794 			return -ENOMEM;
2795 		if (copy_from_user(kbuf, buffer, left)) {
2796 			free_page(page);
2797 			return -EFAULT;
2798                 }
2799 		kbuf[left] = 0;
2800 
2801 		tmp_bitmap = kzalloc(BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long),
2802 				     GFP_KERNEL);
2803 		if (!tmp_bitmap) {
2804 			free_page(page);
2805 			return -ENOMEM;
2806 		}
2807 		proc_skip_char(&kbuf, &left, '\n');
2808 		while (!err && left) {
2809 			unsigned long val_a, val_b;
2810 			bool neg;
2811 
2812 			err = proc_get_long(&kbuf, &left, &val_a, &neg, tr_a,
2813 					     sizeof(tr_a), &c);
2814 			if (err)
2815 				break;
2816 			if (val_a >= bitmap_len || neg) {
2817 				err = -EINVAL;
2818 				break;
2819 			}
2820 
2821 			val_b = val_a;
2822 			if (left) {
2823 				kbuf++;
2824 				left--;
2825 			}
2826 
2827 			if (c == '-') {
2828 				err = proc_get_long(&kbuf, &left, &val_b,
2829 						     &neg, tr_b, sizeof(tr_b),
2830 						     &c);
2831 				if (err)
2832 					break;
2833 				if (val_b >= bitmap_len || neg ||
2834 				    val_a > val_b) {
2835 					err = -EINVAL;
2836 					break;
2837 				}
2838 				if (left) {
2839 					kbuf++;
2840 					left--;
2841 				}
2842 			}
2843 
2844 			while (val_a <= val_b)
2845 				set_bit(val_a++, tmp_bitmap);
2846 
2847 			first = 0;
2848 			proc_skip_char(&kbuf, &left, '\n');
2849 		}
2850 		free_page(page);
2851 	} else {
2852 		unsigned long bit_a, bit_b = 0;
2853 
2854 		while (left) {
2855 			bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
2856 			if (bit_a >= bitmap_len)
2857 				break;
2858 			bit_b = find_next_zero_bit(bitmap, bitmap_len,
2859 						   bit_a + 1) - 1;
2860 
2861 			if (!first) {
2862 				err = proc_put_char(&buffer, &left, ',');
2863 				if (err)
2864 					break;
2865 			}
2866 			err = proc_put_long(&buffer, &left, bit_a, false);
2867 			if (err)
2868 				break;
2869 			if (bit_a != bit_b) {
2870 				err = proc_put_char(&buffer, &left, '-');
2871 				if (err)
2872 					break;
2873 				err = proc_put_long(&buffer, &left, bit_b, false);
2874 				if (err)
2875 					break;
2876 			}
2877 
2878 			first = 0; bit_b++;
2879 		}
2880 		if (!err)
2881 			err = proc_put_char(&buffer, &left, '\n');
2882 	}
2883 
2884 	if (!err) {
2885 		if (write) {
2886 			if (*ppos)
2887 				bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
2888 			else
2889 				memcpy(bitmap, tmp_bitmap,
2890 					BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long));
2891 		}
2892 		kfree(tmp_bitmap);
2893 		*lenp -= left;
2894 		*ppos += *lenp;
2895 		return 0;
2896 	} else {
2897 		kfree(tmp_bitmap);
2898 		return err;
2899 	}
2900 }
2901 
2902 #else /* CONFIG_PROC_FS */
2903 
2904 int proc_dostring(struct ctl_table *table, int write,
2905 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2906 {
2907 	return -ENOSYS;
2908 }
2909 
2910 int proc_dointvec(struct ctl_table *table, int write,
2911 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2912 {
2913 	return -ENOSYS;
2914 }
2915 
2916 int proc_dointvec_minmax(struct ctl_table *table, int write,
2917 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2918 {
2919 	return -ENOSYS;
2920 }
2921 
2922 int proc_dointvec_jiffies(struct ctl_table *table, int write,
2923 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2924 {
2925 	return -ENOSYS;
2926 }
2927 
2928 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2929 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2930 {
2931 	return -ENOSYS;
2932 }
2933 
2934 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2935 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2936 {
2937 	return -ENOSYS;
2938 }
2939 
2940 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2941 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2942 {
2943 	return -ENOSYS;
2944 }
2945 
2946 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2947 				      void __user *buffer,
2948 				      size_t *lenp, loff_t *ppos)
2949 {
2950     return -ENOSYS;
2951 }
2952 
2953 
2954 #endif /* CONFIG_PROC_FS */
2955 
2956 /*
2957  * No sense putting this after each symbol definition, twice,
2958  * exception granted :-)
2959  */
2960 EXPORT_SYMBOL(proc_dointvec);
2961 EXPORT_SYMBOL(proc_dointvec_jiffies);
2962 EXPORT_SYMBOL(proc_dointvec_minmax);
2963 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2964 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2965 EXPORT_SYMBOL(proc_dostring);
2966 EXPORT_SYMBOL(proc_doulongvec_minmax);
2967 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2968 EXPORT_SYMBOL(register_sysctl_table);
2969 EXPORT_SYMBOL(register_sysctl_paths);
2970 EXPORT_SYMBOL(unregister_sysctl_table);
2971