xref: /openbmc/linux/kernel/sysctl.c (revision 261a9af6)
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20 
21 #include <linux/module.h>
22 #include <linux/mm.h>
23 #include <linux/swap.h>
24 #include <linux/slab.h>
25 #include <linux/sysctl.h>
26 #include <linux/signal.h>
27 #include <linux/printk.h>
28 #include <linux/proc_fs.h>
29 #include <linux/security.h>
30 #include <linux/ctype.h>
31 #include <linux/kmemcheck.h>
32 #include <linux/fs.h>
33 #include <linux/init.h>
34 #include <linux/kernel.h>
35 #include <linux/kobject.h>
36 #include <linux/net.h>
37 #include <linux/sysrq.h>
38 #include <linux/highuid.h>
39 #include <linux/writeback.h>
40 #include <linux/ratelimit.h>
41 #include <linux/compaction.h>
42 #include <linux/hugetlb.h>
43 #include <linux/initrd.h>
44 #include <linux/key.h>
45 #include <linux/times.h>
46 #include <linux/limits.h>
47 #include <linux/dcache.h>
48 #include <linux/dnotify.h>
49 #include <linux/syscalls.h>
50 #include <linux/vmstat.h>
51 #include <linux/nfs_fs.h>
52 #include <linux/acpi.h>
53 #include <linux/reboot.h>
54 #include <linux/ftrace.h>
55 #include <linux/perf_event.h>
56 #include <linux/kprobes.h>
57 #include <linux/pipe_fs_i.h>
58 #include <linux/oom.h>
59 #include <linux/kmod.h>
60 
61 #include <asm/uaccess.h>
62 #include <asm/processor.h>
63 
64 #ifdef CONFIG_X86
65 #include <asm/nmi.h>
66 #include <asm/stacktrace.h>
67 #include <asm/io.h>
68 #endif
69 #ifdef CONFIG_BSD_PROCESS_ACCT
70 #include <linux/acct.h>
71 #endif
72 #ifdef CONFIG_RT_MUTEXES
73 #include <linux/rtmutex.h>
74 #endif
75 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
76 #include <linux/lockdep.h>
77 #endif
78 #ifdef CONFIG_CHR_DEV_SG
79 #include <scsi/sg.h>
80 #endif
81 
82 #ifdef CONFIG_LOCKUP_DETECTOR
83 #include <linux/nmi.h>
84 #endif
85 
86 
87 #if defined(CONFIG_SYSCTL)
88 
89 /* External variables not in a header file. */
90 extern int sysctl_overcommit_memory;
91 extern int sysctl_overcommit_ratio;
92 extern int max_threads;
93 extern int core_uses_pid;
94 extern int suid_dumpable;
95 extern char core_pattern[];
96 extern unsigned int core_pipe_limit;
97 extern int pid_max;
98 extern int min_free_kbytes;
99 extern int pid_max_min, pid_max_max;
100 extern int sysctl_drop_caches;
101 extern int percpu_pagelist_fraction;
102 extern int compat_log;
103 extern int latencytop_enabled;
104 extern int sysctl_nr_open_min, sysctl_nr_open_max;
105 #ifndef CONFIG_MMU
106 extern int sysctl_nr_trim_pages;
107 #endif
108 #ifdef CONFIG_BLOCK
109 extern int blk_iopoll_enabled;
110 #endif
111 
112 /* Constants used for minimum and  maximum */
113 #ifdef CONFIG_LOCKUP_DETECTOR
114 static int sixty = 60;
115 static int neg_one = -1;
116 #endif
117 
118 static int zero;
119 static int __maybe_unused one = 1;
120 static int __maybe_unused two = 2;
121 static int __maybe_unused three = 3;
122 static unsigned long one_ul = 1;
123 static int one_hundred = 100;
124 #ifdef CONFIG_PRINTK
125 static int ten_thousand = 10000;
126 #endif
127 
128 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
129 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
130 
131 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
132 static int maxolduid = 65535;
133 static int minolduid;
134 static int min_percpu_pagelist_fract = 8;
135 
136 static int ngroups_max = NGROUPS_MAX;
137 
138 #ifdef CONFIG_INOTIFY_USER
139 #include <linux/inotify.h>
140 #endif
141 #ifdef CONFIG_SPARC
142 #include <asm/system.h>
143 #endif
144 
145 #ifdef CONFIG_SPARC64
146 extern int sysctl_tsb_ratio;
147 #endif
148 
149 #ifdef __hppa__
150 extern int pwrsw_enabled;
151 extern int unaligned_enabled;
152 #endif
153 
154 #ifdef CONFIG_S390
155 #ifdef CONFIG_MATHEMU
156 extern int sysctl_ieee_emulation_warnings;
157 #endif
158 extern int sysctl_userprocess_debug;
159 extern int spin_retry;
160 #endif
161 
162 #ifdef CONFIG_IA64
163 extern int no_unaligned_warning;
164 extern int unaligned_dump_stack;
165 #endif
166 
167 #ifdef CONFIG_PROC_SYSCTL
168 static int proc_do_cad_pid(struct ctl_table *table, int write,
169 		  void __user *buffer, size_t *lenp, loff_t *ppos);
170 static int proc_taint(struct ctl_table *table, int write,
171 			       void __user *buffer, size_t *lenp, loff_t *ppos);
172 #endif
173 
174 #ifdef CONFIG_PRINTK
175 static int proc_dmesg_restrict(struct ctl_table *table, int write,
176 				void __user *buffer, size_t *lenp, loff_t *ppos);
177 #endif
178 
179 #ifdef CONFIG_MAGIC_SYSRQ
180 /* Note: sysrq code uses it's own private copy */
181 static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE;
182 
183 static int sysrq_sysctl_handler(ctl_table *table, int write,
184 				void __user *buffer, size_t *lenp,
185 				loff_t *ppos)
186 {
187 	int error;
188 
189 	error = proc_dointvec(table, write, buffer, lenp, ppos);
190 	if (error)
191 		return error;
192 
193 	if (write)
194 		sysrq_toggle_support(__sysrq_enabled);
195 
196 	return 0;
197 }
198 
199 #endif
200 
201 static struct ctl_table root_table[];
202 static struct ctl_table_root sysctl_table_root;
203 static struct ctl_table_header root_table_header = {
204 	{{.count = 1,
205 	.ctl_table = root_table,
206 	.ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}},
207 	.root = &sysctl_table_root,
208 	.set = &sysctl_table_root.default_set,
209 };
210 static struct ctl_table_root sysctl_table_root = {
211 	.root_list = LIST_HEAD_INIT(sysctl_table_root.root_list),
212 	.default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry),
213 };
214 
215 static struct ctl_table kern_table[];
216 static struct ctl_table vm_table[];
217 static struct ctl_table fs_table[];
218 static struct ctl_table debug_table[];
219 static struct ctl_table dev_table[];
220 extern struct ctl_table random_table[];
221 #ifdef CONFIG_EPOLL
222 extern struct ctl_table epoll_table[];
223 #endif
224 
225 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
226 int sysctl_legacy_va_layout;
227 #endif
228 
229 /* The default sysctl tables: */
230 
231 static struct ctl_table root_table[] = {
232 	{
233 		.procname	= "kernel",
234 		.mode		= 0555,
235 		.child		= kern_table,
236 	},
237 	{
238 		.procname	= "vm",
239 		.mode		= 0555,
240 		.child		= vm_table,
241 	},
242 	{
243 		.procname	= "fs",
244 		.mode		= 0555,
245 		.child		= fs_table,
246 	},
247 	{
248 		.procname	= "debug",
249 		.mode		= 0555,
250 		.child		= debug_table,
251 	},
252 	{
253 		.procname	= "dev",
254 		.mode		= 0555,
255 		.child		= dev_table,
256 	},
257 	{ }
258 };
259 
260 #ifdef CONFIG_SCHED_DEBUG
261 static int min_sched_granularity_ns = 100000;		/* 100 usecs */
262 static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
263 static int min_wakeup_granularity_ns;			/* 0 usecs */
264 static int max_wakeup_granularity_ns = NSEC_PER_SEC;	/* 1 second */
265 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
266 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
267 #endif
268 
269 #ifdef CONFIG_COMPACTION
270 static int min_extfrag_threshold;
271 static int max_extfrag_threshold = 1000;
272 #endif
273 
274 static struct ctl_table kern_table[] = {
275 	{
276 		.procname	= "sched_child_runs_first",
277 		.data		= &sysctl_sched_child_runs_first,
278 		.maxlen		= sizeof(unsigned int),
279 		.mode		= 0644,
280 		.proc_handler	= proc_dointvec,
281 	},
282 #ifdef CONFIG_SCHED_DEBUG
283 	{
284 		.procname	= "sched_min_granularity_ns",
285 		.data		= &sysctl_sched_min_granularity,
286 		.maxlen		= sizeof(unsigned int),
287 		.mode		= 0644,
288 		.proc_handler	= sched_proc_update_handler,
289 		.extra1		= &min_sched_granularity_ns,
290 		.extra2		= &max_sched_granularity_ns,
291 	},
292 	{
293 		.procname	= "sched_latency_ns",
294 		.data		= &sysctl_sched_latency,
295 		.maxlen		= sizeof(unsigned int),
296 		.mode		= 0644,
297 		.proc_handler	= sched_proc_update_handler,
298 		.extra1		= &min_sched_granularity_ns,
299 		.extra2		= &max_sched_granularity_ns,
300 	},
301 	{
302 		.procname	= "sched_wakeup_granularity_ns",
303 		.data		= &sysctl_sched_wakeup_granularity,
304 		.maxlen		= sizeof(unsigned int),
305 		.mode		= 0644,
306 		.proc_handler	= sched_proc_update_handler,
307 		.extra1		= &min_wakeup_granularity_ns,
308 		.extra2		= &max_wakeup_granularity_ns,
309 	},
310 	{
311 		.procname	= "sched_tunable_scaling",
312 		.data		= &sysctl_sched_tunable_scaling,
313 		.maxlen		= sizeof(enum sched_tunable_scaling),
314 		.mode		= 0644,
315 		.proc_handler	= sched_proc_update_handler,
316 		.extra1		= &min_sched_tunable_scaling,
317 		.extra2		= &max_sched_tunable_scaling,
318 	},
319 	{
320 		.procname	= "sched_migration_cost",
321 		.data		= &sysctl_sched_migration_cost,
322 		.maxlen		= sizeof(unsigned int),
323 		.mode		= 0644,
324 		.proc_handler	= proc_dointvec,
325 	},
326 	{
327 		.procname	= "sched_nr_migrate",
328 		.data		= &sysctl_sched_nr_migrate,
329 		.maxlen		= sizeof(unsigned int),
330 		.mode		= 0644,
331 		.proc_handler	= proc_dointvec,
332 	},
333 	{
334 		.procname	= "sched_time_avg",
335 		.data		= &sysctl_sched_time_avg,
336 		.maxlen		= sizeof(unsigned int),
337 		.mode		= 0644,
338 		.proc_handler	= proc_dointvec,
339 	},
340 	{
341 		.procname	= "sched_shares_window",
342 		.data		= &sysctl_sched_shares_window,
343 		.maxlen		= sizeof(unsigned int),
344 		.mode		= 0644,
345 		.proc_handler	= proc_dointvec,
346 	},
347 	{
348 		.procname	= "timer_migration",
349 		.data		= &sysctl_timer_migration,
350 		.maxlen		= sizeof(unsigned int),
351 		.mode		= 0644,
352 		.proc_handler	= proc_dointvec_minmax,
353 		.extra1		= &zero,
354 		.extra2		= &one,
355 	},
356 #endif
357 	{
358 		.procname	= "sched_rt_period_us",
359 		.data		= &sysctl_sched_rt_period,
360 		.maxlen		= sizeof(unsigned int),
361 		.mode		= 0644,
362 		.proc_handler	= sched_rt_handler,
363 	},
364 	{
365 		.procname	= "sched_rt_runtime_us",
366 		.data		= &sysctl_sched_rt_runtime,
367 		.maxlen		= sizeof(int),
368 		.mode		= 0644,
369 		.proc_handler	= sched_rt_handler,
370 	},
371 #ifdef CONFIG_SCHED_AUTOGROUP
372 	{
373 		.procname	= "sched_autogroup_enabled",
374 		.data		= &sysctl_sched_autogroup_enabled,
375 		.maxlen		= sizeof(unsigned int),
376 		.mode		= 0644,
377 		.proc_handler	= proc_dointvec_minmax,
378 		.extra1		= &zero,
379 		.extra2		= &one,
380 	},
381 #endif
382 #ifdef CONFIG_PROVE_LOCKING
383 	{
384 		.procname	= "prove_locking",
385 		.data		= &prove_locking,
386 		.maxlen		= sizeof(int),
387 		.mode		= 0644,
388 		.proc_handler	= proc_dointvec,
389 	},
390 #endif
391 #ifdef CONFIG_LOCK_STAT
392 	{
393 		.procname	= "lock_stat",
394 		.data		= &lock_stat,
395 		.maxlen		= sizeof(int),
396 		.mode		= 0644,
397 		.proc_handler	= proc_dointvec,
398 	},
399 #endif
400 	{
401 		.procname	= "panic",
402 		.data		= &panic_timeout,
403 		.maxlen		= sizeof(int),
404 		.mode		= 0644,
405 		.proc_handler	= proc_dointvec,
406 	},
407 	{
408 		.procname	= "core_uses_pid",
409 		.data		= &core_uses_pid,
410 		.maxlen		= sizeof(int),
411 		.mode		= 0644,
412 		.proc_handler	= proc_dointvec,
413 	},
414 	{
415 		.procname	= "core_pattern",
416 		.data		= core_pattern,
417 		.maxlen		= CORENAME_MAX_SIZE,
418 		.mode		= 0644,
419 		.proc_handler	= proc_dostring,
420 	},
421 	{
422 		.procname	= "core_pipe_limit",
423 		.data		= &core_pipe_limit,
424 		.maxlen		= sizeof(unsigned int),
425 		.mode		= 0644,
426 		.proc_handler	= proc_dointvec,
427 	},
428 #ifdef CONFIG_PROC_SYSCTL
429 	{
430 		.procname	= "tainted",
431 		.maxlen 	= sizeof(long),
432 		.mode		= 0644,
433 		.proc_handler	= proc_taint,
434 	},
435 #endif
436 #ifdef CONFIG_LATENCYTOP
437 	{
438 		.procname	= "latencytop",
439 		.data		= &latencytop_enabled,
440 		.maxlen		= sizeof(int),
441 		.mode		= 0644,
442 		.proc_handler	= proc_dointvec,
443 	},
444 #endif
445 #ifdef CONFIG_BLK_DEV_INITRD
446 	{
447 		.procname	= "real-root-dev",
448 		.data		= &real_root_dev,
449 		.maxlen		= sizeof(int),
450 		.mode		= 0644,
451 		.proc_handler	= proc_dointvec,
452 	},
453 #endif
454 	{
455 		.procname	= "print-fatal-signals",
456 		.data		= &print_fatal_signals,
457 		.maxlen		= sizeof(int),
458 		.mode		= 0644,
459 		.proc_handler	= proc_dointvec,
460 	},
461 #ifdef CONFIG_SPARC
462 	{
463 		.procname	= "reboot-cmd",
464 		.data		= reboot_command,
465 		.maxlen		= 256,
466 		.mode		= 0644,
467 		.proc_handler	= proc_dostring,
468 	},
469 	{
470 		.procname	= "stop-a",
471 		.data		= &stop_a_enabled,
472 		.maxlen		= sizeof (int),
473 		.mode		= 0644,
474 		.proc_handler	= proc_dointvec,
475 	},
476 	{
477 		.procname	= "scons-poweroff",
478 		.data		= &scons_pwroff,
479 		.maxlen		= sizeof (int),
480 		.mode		= 0644,
481 		.proc_handler	= proc_dointvec,
482 	},
483 #endif
484 #ifdef CONFIG_SPARC64
485 	{
486 		.procname	= "tsb-ratio",
487 		.data		= &sysctl_tsb_ratio,
488 		.maxlen		= sizeof (int),
489 		.mode		= 0644,
490 		.proc_handler	= proc_dointvec,
491 	},
492 #endif
493 #ifdef __hppa__
494 	{
495 		.procname	= "soft-power",
496 		.data		= &pwrsw_enabled,
497 		.maxlen		= sizeof (int),
498 	 	.mode		= 0644,
499 		.proc_handler	= proc_dointvec,
500 	},
501 	{
502 		.procname	= "unaligned-trap",
503 		.data		= &unaligned_enabled,
504 		.maxlen		= sizeof (int),
505 		.mode		= 0644,
506 		.proc_handler	= proc_dointvec,
507 	},
508 #endif
509 	{
510 		.procname	= "ctrl-alt-del",
511 		.data		= &C_A_D,
512 		.maxlen		= sizeof(int),
513 		.mode		= 0644,
514 		.proc_handler	= proc_dointvec,
515 	},
516 #ifdef CONFIG_FUNCTION_TRACER
517 	{
518 		.procname	= "ftrace_enabled",
519 		.data		= &ftrace_enabled,
520 		.maxlen		= sizeof(int),
521 		.mode		= 0644,
522 		.proc_handler	= ftrace_enable_sysctl,
523 	},
524 #endif
525 #ifdef CONFIG_STACK_TRACER
526 	{
527 		.procname	= "stack_tracer_enabled",
528 		.data		= &stack_tracer_enabled,
529 		.maxlen		= sizeof(int),
530 		.mode		= 0644,
531 		.proc_handler	= stack_trace_sysctl,
532 	},
533 #endif
534 #ifdef CONFIG_TRACING
535 	{
536 		.procname	= "ftrace_dump_on_oops",
537 		.data		= &ftrace_dump_on_oops,
538 		.maxlen		= sizeof(int),
539 		.mode		= 0644,
540 		.proc_handler	= proc_dointvec,
541 	},
542 #endif
543 #ifdef CONFIG_MODULES
544 	{
545 		.procname	= "modprobe",
546 		.data		= &modprobe_path,
547 		.maxlen		= KMOD_PATH_LEN,
548 		.mode		= 0644,
549 		.proc_handler	= proc_dostring,
550 	},
551 	{
552 		.procname	= "modules_disabled",
553 		.data		= &modules_disabled,
554 		.maxlen		= sizeof(int),
555 		.mode		= 0644,
556 		/* only handle a transition from default "0" to "1" */
557 		.proc_handler	= proc_dointvec_minmax,
558 		.extra1		= &one,
559 		.extra2		= &one,
560 	},
561 #endif
562 #ifdef CONFIG_HOTPLUG
563 	{
564 		.procname	= "hotplug",
565 		.data		= &uevent_helper,
566 		.maxlen		= UEVENT_HELPER_PATH_LEN,
567 		.mode		= 0644,
568 		.proc_handler	= proc_dostring,
569 	},
570 #endif
571 #ifdef CONFIG_CHR_DEV_SG
572 	{
573 		.procname	= "sg-big-buff",
574 		.data		= &sg_big_buff,
575 		.maxlen		= sizeof (int),
576 		.mode		= 0444,
577 		.proc_handler	= proc_dointvec,
578 	},
579 #endif
580 #ifdef CONFIG_BSD_PROCESS_ACCT
581 	{
582 		.procname	= "acct",
583 		.data		= &acct_parm,
584 		.maxlen		= 3*sizeof(int),
585 		.mode		= 0644,
586 		.proc_handler	= proc_dointvec,
587 	},
588 #endif
589 #ifdef CONFIG_MAGIC_SYSRQ
590 	{
591 		.procname	= "sysrq",
592 		.data		= &__sysrq_enabled,
593 		.maxlen		= sizeof (int),
594 		.mode		= 0644,
595 		.proc_handler	= sysrq_sysctl_handler,
596 	},
597 #endif
598 #ifdef CONFIG_PROC_SYSCTL
599 	{
600 		.procname	= "cad_pid",
601 		.data		= NULL,
602 		.maxlen		= sizeof (int),
603 		.mode		= 0600,
604 		.proc_handler	= proc_do_cad_pid,
605 	},
606 #endif
607 	{
608 		.procname	= "threads-max",
609 		.data		= &max_threads,
610 		.maxlen		= sizeof(int),
611 		.mode		= 0644,
612 		.proc_handler	= proc_dointvec,
613 	},
614 	{
615 		.procname	= "random",
616 		.mode		= 0555,
617 		.child		= random_table,
618 	},
619 	{
620 		.procname	= "usermodehelper",
621 		.mode		= 0555,
622 		.child		= usermodehelper_table,
623 	},
624 	{
625 		.procname	= "overflowuid",
626 		.data		= &overflowuid,
627 		.maxlen		= sizeof(int),
628 		.mode		= 0644,
629 		.proc_handler	= proc_dointvec_minmax,
630 		.extra1		= &minolduid,
631 		.extra2		= &maxolduid,
632 	},
633 	{
634 		.procname	= "overflowgid",
635 		.data		= &overflowgid,
636 		.maxlen		= sizeof(int),
637 		.mode		= 0644,
638 		.proc_handler	= proc_dointvec_minmax,
639 		.extra1		= &minolduid,
640 		.extra2		= &maxolduid,
641 	},
642 #ifdef CONFIG_S390
643 #ifdef CONFIG_MATHEMU
644 	{
645 		.procname	= "ieee_emulation_warnings",
646 		.data		= &sysctl_ieee_emulation_warnings,
647 		.maxlen		= sizeof(int),
648 		.mode		= 0644,
649 		.proc_handler	= proc_dointvec,
650 	},
651 #endif
652 	{
653 		.procname	= "userprocess_debug",
654 		.data		= &show_unhandled_signals,
655 		.maxlen		= sizeof(int),
656 		.mode		= 0644,
657 		.proc_handler	= proc_dointvec,
658 	},
659 #endif
660 	{
661 		.procname	= "pid_max",
662 		.data		= &pid_max,
663 		.maxlen		= sizeof (int),
664 		.mode		= 0644,
665 		.proc_handler	= proc_dointvec_minmax,
666 		.extra1		= &pid_max_min,
667 		.extra2		= &pid_max_max,
668 	},
669 	{
670 		.procname	= "panic_on_oops",
671 		.data		= &panic_on_oops,
672 		.maxlen		= sizeof(int),
673 		.mode		= 0644,
674 		.proc_handler	= proc_dointvec,
675 	},
676 #if defined CONFIG_PRINTK
677 	{
678 		.procname	= "printk",
679 		.data		= &console_loglevel,
680 		.maxlen		= 4*sizeof(int),
681 		.mode		= 0644,
682 		.proc_handler	= proc_dointvec,
683 	},
684 	{
685 		.procname	= "printk_ratelimit",
686 		.data		= &printk_ratelimit_state.interval,
687 		.maxlen		= sizeof(int),
688 		.mode		= 0644,
689 		.proc_handler	= proc_dointvec_jiffies,
690 	},
691 	{
692 		.procname	= "printk_ratelimit_burst",
693 		.data		= &printk_ratelimit_state.burst,
694 		.maxlen		= sizeof(int),
695 		.mode		= 0644,
696 		.proc_handler	= proc_dointvec,
697 	},
698 	{
699 		.procname	= "printk_delay",
700 		.data		= &printk_delay_msec,
701 		.maxlen		= sizeof(int),
702 		.mode		= 0644,
703 		.proc_handler	= proc_dointvec_minmax,
704 		.extra1		= &zero,
705 		.extra2		= &ten_thousand,
706 	},
707 	{
708 		.procname	= "dmesg_restrict",
709 		.data		= &dmesg_restrict,
710 		.maxlen		= sizeof(int),
711 		.mode		= 0644,
712 		.proc_handler	= proc_dointvec_minmax,
713 		.extra1		= &zero,
714 		.extra2		= &one,
715 	},
716 	{
717 		.procname	= "kptr_restrict",
718 		.data		= &kptr_restrict,
719 		.maxlen		= sizeof(int),
720 		.mode		= 0644,
721 		.proc_handler	= proc_dmesg_restrict,
722 		.extra1		= &zero,
723 		.extra2		= &two,
724 	},
725 #endif
726 	{
727 		.procname	= "ngroups_max",
728 		.data		= &ngroups_max,
729 		.maxlen		= sizeof (int),
730 		.mode		= 0444,
731 		.proc_handler	= proc_dointvec,
732 	},
733 #if defined(CONFIG_LOCKUP_DETECTOR)
734 	{
735 		.procname       = "watchdog",
736 		.data           = &watchdog_enabled,
737 		.maxlen         = sizeof (int),
738 		.mode           = 0644,
739 		.proc_handler   = proc_dowatchdog,
740 		.extra1		= &zero,
741 		.extra2		= &one,
742 	},
743 	{
744 		.procname	= "watchdog_thresh",
745 		.data		= &watchdog_thresh,
746 		.maxlen		= sizeof(int),
747 		.mode		= 0644,
748 		.proc_handler	= proc_dowatchdog,
749 		.extra1		= &neg_one,
750 		.extra2		= &sixty,
751 	},
752 	{
753 		.procname	= "softlockup_panic",
754 		.data		= &softlockup_panic,
755 		.maxlen		= sizeof(int),
756 		.mode		= 0644,
757 		.proc_handler	= proc_dointvec_minmax,
758 		.extra1		= &zero,
759 		.extra2		= &one,
760 	},
761 	{
762 		.procname       = "nmi_watchdog",
763 		.data           = &watchdog_enabled,
764 		.maxlen         = sizeof (int),
765 		.mode           = 0644,
766 		.proc_handler   = proc_dowatchdog,
767 		.extra1		= &zero,
768 		.extra2		= &one,
769 	},
770 #endif
771 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
772 	{
773 		.procname       = "unknown_nmi_panic",
774 		.data           = &unknown_nmi_panic,
775 		.maxlen         = sizeof (int),
776 		.mode           = 0644,
777 		.proc_handler   = proc_dointvec,
778 	},
779 #endif
780 #if defined(CONFIG_X86)
781 	{
782 		.procname	= "panic_on_unrecovered_nmi",
783 		.data		= &panic_on_unrecovered_nmi,
784 		.maxlen		= sizeof(int),
785 		.mode		= 0644,
786 		.proc_handler	= proc_dointvec,
787 	},
788 	{
789 		.procname	= "panic_on_io_nmi",
790 		.data		= &panic_on_io_nmi,
791 		.maxlen		= sizeof(int),
792 		.mode		= 0644,
793 		.proc_handler	= proc_dointvec,
794 	},
795 	{
796 		.procname	= "bootloader_type",
797 		.data		= &bootloader_type,
798 		.maxlen		= sizeof (int),
799 		.mode		= 0444,
800 		.proc_handler	= proc_dointvec,
801 	},
802 	{
803 		.procname	= "bootloader_version",
804 		.data		= &bootloader_version,
805 		.maxlen		= sizeof (int),
806 		.mode		= 0444,
807 		.proc_handler	= proc_dointvec,
808 	},
809 	{
810 		.procname	= "kstack_depth_to_print",
811 		.data		= &kstack_depth_to_print,
812 		.maxlen		= sizeof(int),
813 		.mode		= 0644,
814 		.proc_handler	= proc_dointvec,
815 	},
816 	{
817 		.procname	= "io_delay_type",
818 		.data		= &io_delay_type,
819 		.maxlen		= sizeof(int),
820 		.mode		= 0644,
821 		.proc_handler	= proc_dointvec,
822 	},
823 #endif
824 #if defined(CONFIG_MMU)
825 	{
826 		.procname	= "randomize_va_space",
827 		.data		= &randomize_va_space,
828 		.maxlen		= sizeof(int),
829 		.mode		= 0644,
830 		.proc_handler	= proc_dointvec,
831 	},
832 #endif
833 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
834 	{
835 		.procname	= "spin_retry",
836 		.data		= &spin_retry,
837 		.maxlen		= sizeof (int),
838 		.mode		= 0644,
839 		.proc_handler	= proc_dointvec,
840 	},
841 #endif
842 #if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
843 	{
844 		.procname	= "acpi_video_flags",
845 		.data		= &acpi_realmode_flags,
846 		.maxlen		= sizeof (unsigned long),
847 		.mode		= 0644,
848 		.proc_handler	= proc_doulongvec_minmax,
849 	},
850 #endif
851 #ifdef CONFIG_IA64
852 	{
853 		.procname	= "ignore-unaligned-usertrap",
854 		.data		= &no_unaligned_warning,
855 		.maxlen		= sizeof (int),
856 	 	.mode		= 0644,
857 		.proc_handler	= proc_dointvec,
858 	},
859 	{
860 		.procname	= "unaligned-dump-stack",
861 		.data		= &unaligned_dump_stack,
862 		.maxlen		= sizeof (int),
863 		.mode		= 0644,
864 		.proc_handler	= proc_dointvec,
865 	},
866 #endif
867 #ifdef CONFIG_DETECT_HUNG_TASK
868 	{
869 		.procname	= "hung_task_panic",
870 		.data		= &sysctl_hung_task_panic,
871 		.maxlen		= sizeof(int),
872 		.mode		= 0644,
873 		.proc_handler	= proc_dointvec_minmax,
874 		.extra1		= &zero,
875 		.extra2		= &one,
876 	},
877 	{
878 		.procname	= "hung_task_check_count",
879 		.data		= &sysctl_hung_task_check_count,
880 		.maxlen		= sizeof(unsigned long),
881 		.mode		= 0644,
882 		.proc_handler	= proc_doulongvec_minmax,
883 	},
884 	{
885 		.procname	= "hung_task_timeout_secs",
886 		.data		= &sysctl_hung_task_timeout_secs,
887 		.maxlen		= sizeof(unsigned long),
888 		.mode		= 0644,
889 		.proc_handler	= proc_dohung_task_timeout_secs,
890 	},
891 	{
892 		.procname	= "hung_task_warnings",
893 		.data		= &sysctl_hung_task_warnings,
894 		.maxlen		= sizeof(unsigned long),
895 		.mode		= 0644,
896 		.proc_handler	= proc_doulongvec_minmax,
897 	},
898 #endif
899 #ifdef CONFIG_COMPAT
900 	{
901 		.procname	= "compat-log",
902 		.data		= &compat_log,
903 		.maxlen		= sizeof (int),
904 	 	.mode		= 0644,
905 		.proc_handler	= proc_dointvec,
906 	},
907 #endif
908 #ifdef CONFIG_RT_MUTEXES
909 	{
910 		.procname	= "max_lock_depth",
911 		.data		= &max_lock_depth,
912 		.maxlen		= sizeof(int),
913 		.mode		= 0644,
914 		.proc_handler	= proc_dointvec,
915 	},
916 #endif
917 	{
918 		.procname	= "poweroff_cmd",
919 		.data		= &poweroff_cmd,
920 		.maxlen		= POWEROFF_CMD_PATH_LEN,
921 		.mode		= 0644,
922 		.proc_handler	= proc_dostring,
923 	},
924 #ifdef CONFIG_KEYS
925 	{
926 		.procname	= "keys",
927 		.mode		= 0555,
928 		.child		= key_sysctls,
929 	},
930 #endif
931 #ifdef CONFIG_RCU_TORTURE_TEST
932 	{
933 		.procname       = "rcutorture_runnable",
934 		.data           = &rcutorture_runnable,
935 		.maxlen         = sizeof(int),
936 		.mode           = 0644,
937 		.proc_handler	= proc_dointvec,
938 	},
939 #endif
940 #ifdef CONFIG_PERF_EVENTS
941 	/*
942 	 * User-space scripts rely on the existence of this file
943 	 * as a feature check for perf_events being enabled.
944 	 *
945 	 * So it's an ABI, do not remove!
946 	 */
947 	{
948 		.procname	= "perf_event_paranoid",
949 		.data		= &sysctl_perf_event_paranoid,
950 		.maxlen		= sizeof(sysctl_perf_event_paranoid),
951 		.mode		= 0644,
952 		.proc_handler	= proc_dointvec,
953 	},
954 	{
955 		.procname	= "perf_event_mlock_kb",
956 		.data		= &sysctl_perf_event_mlock,
957 		.maxlen		= sizeof(sysctl_perf_event_mlock),
958 		.mode		= 0644,
959 		.proc_handler	= proc_dointvec,
960 	},
961 	{
962 		.procname	= "perf_event_max_sample_rate",
963 		.data		= &sysctl_perf_event_sample_rate,
964 		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
965 		.mode		= 0644,
966 		.proc_handler	= perf_proc_update_handler,
967 	},
968 #endif
969 #ifdef CONFIG_KMEMCHECK
970 	{
971 		.procname	= "kmemcheck",
972 		.data		= &kmemcheck_enabled,
973 		.maxlen		= sizeof(int),
974 		.mode		= 0644,
975 		.proc_handler	= proc_dointvec,
976 	},
977 #endif
978 #ifdef CONFIG_BLOCK
979 	{
980 		.procname	= "blk_iopoll",
981 		.data		= &blk_iopoll_enabled,
982 		.maxlen		= sizeof(int),
983 		.mode		= 0644,
984 		.proc_handler	= proc_dointvec,
985 	},
986 #endif
987 	{ }
988 };
989 
990 static struct ctl_table vm_table[] = {
991 	{
992 		.procname	= "overcommit_memory",
993 		.data		= &sysctl_overcommit_memory,
994 		.maxlen		= sizeof(sysctl_overcommit_memory),
995 		.mode		= 0644,
996 		.proc_handler	= proc_dointvec_minmax,
997 		.extra1		= &zero,
998 		.extra2		= &two,
999 	},
1000 	{
1001 		.procname	= "panic_on_oom",
1002 		.data		= &sysctl_panic_on_oom,
1003 		.maxlen		= sizeof(sysctl_panic_on_oom),
1004 		.mode		= 0644,
1005 		.proc_handler	= proc_dointvec_minmax,
1006 		.extra1		= &zero,
1007 		.extra2		= &two,
1008 	},
1009 	{
1010 		.procname	= "oom_kill_allocating_task",
1011 		.data		= &sysctl_oom_kill_allocating_task,
1012 		.maxlen		= sizeof(sysctl_oom_kill_allocating_task),
1013 		.mode		= 0644,
1014 		.proc_handler	= proc_dointvec,
1015 	},
1016 	{
1017 		.procname	= "oom_dump_tasks",
1018 		.data		= &sysctl_oom_dump_tasks,
1019 		.maxlen		= sizeof(sysctl_oom_dump_tasks),
1020 		.mode		= 0644,
1021 		.proc_handler	= proc_dointvec,
1022 	},
1023 	{
1024 		.procname	= "overcommit_ratio",
1025 		.data		= &sysctl_overcommit_ratio,
1026 		.maxlen		= sizeof(sysctl_overcommit_ratio),
1027 		.mode		= 0644,
1028 		.proc_handler	= proc_dointvec,
1029 	},
1030 	{
1031 		.procname	= "page-cluster",
1032 		.data		= &page_cluster,
1033 		.maxlen		= sizeof(int),
1034 		.mode		= 0644,
1035 		.proc_handler	= proc_dointvec_minmax,
1036 		.extra1		= &zero,
1037 	},
1038 	{
1039 		.procname	= "dirty_background_ratio",
1040 		.data		= &dirty_background_ratio,
1041 		.maxlen		= sizeof(dirty_background_ratio),
1042 		.mode		= 0644,
1043 		.proc_handler	= dirty_background_ratio_handler,
1044 		.extra1		= &zero,
1045 		.extra2		= &one_hundred,
1046 	},
1047 	{
1048 		.procname	= "dirty_background_bytes",
1049 		.data		= &dirty_background_bytes,
1050 		.maxlen		= sizeof(dirty_background_bytes),
1051 		.mode		= 0644,
1052 		.proc_handler	= dirty_background_bytes_handler,
1053 		.extra1		= &one_ul,
1054 	},
1055 	{
1056 		.procname	= "dirty_ratio",
1057 		.data		= &vm_dirty_ratio,
1058 		.maxlen		= sizeof(vm_dirty_ratio),
1059 		.mode		= 0644,
1060 		.proc_handler	= dirty_ratio_handler,
1061 		.extra1		= &zero,
1062 		.extra2		= &one_hundred,
1063 	},
1064 	{
1065 		.procname	= "dirty_bytes",
1066 		.data		= &vm_dirty_bytes,
1067 		.maxlen		= sizeof(vm_dirty_bytes),
1068 		.mode		= 0644,
1069 		.proc_handler	= dirty_bytes_handler,
1070 		.extra1		= &dirty_bytes_min,
1071 	},
1072 	{
1073 		.procname	= "dirty_writeback_centisecs",
1074 		.data		= &dirty_writeback_interval,
1075 		.maxlen		= sizeof(dirty_writeback_interval),
1076 		.mode		= 0644,
1077 		.proc_handler	= dirty_writeback_centisecs_handler,
1078 	},
1079 	{
1080 		.procname	= "dirty_expire_centisecs",
1081 		.data		= &dirty_expire_interval,
1082 		.maxlen		= sizeof(dirty_expire_interval),
1083 		.mode		= 0644,
1084 		.proc_handler	= proc_dointvec_minmax,
1085 		.extra1		= &zero,
1086 	},
1087 	{
1088 		.procname	= "nr_pdflush_threads",
1089 		.data		= &nr_pdflush_threads,
1090 		.maxlen		= sizeof nr_pdflush_threads,
1091 		.mode		= 0444 /* read-only*/,
1092 		.proc_handler	= proc_dointvec,
1093 	},
1094 	{
1095 		.procname	= "swappiness",
1096 		.data		= &vm_swappiness,
1097 		.maxlen		= sizeof(vm_swappiness),
1098 		.mode		= 0644,
1099 		.proc_handler	= proc_dointvec_minmax,
1100 		.extra1		= &zero,
1101 		.extra2		= &one_hundred,
1102 	},
1103 #ifdef CONFIG_HUGETLB_PAGE
1104 	{
1105 		.procname	= "nr_hugepages",
1106 		.data		= NULL,
1107 		.maxlen		= sizeof(unsigned long),
1108 		.mode		= 0644,
1109 		.proc_handler	= hugetlb_sysctl_handler,
1110 		.extra1		= (void *)&hugetlb_zero,
1111 		.extra2		= (void *)&hugetlb_infinity,
1112 	},
1113 #ifdef CONFIG_NUMA
1114 	{
1115 		.procname       = "nr_hugepages_mempolicy",
1116 		.data           = NULL,
1117 		.maxlen         = sizeof(unsigned long),
1118 		.mode           = 0644,
1119 		.proc_handler   = &hugetlb_mempolicy_sysctl_handler,
1120 		.extra1		= (void *)&hugetlb_zero,
1121 		.extra2		= (void *)&hugetlb_infinity,
1122 	},
1123 #endif
1124 	 {
1125 		.procname	= "hugetlb_shm_group",
1126 		.data		= &sysctl_hugetlb_shm_group,
1127 		.maxlen		= sizeof(gid_t),
1128 		.mode		= 0644,
1129 		.proc_handler	= proc_dointvec,
1130 	 },
1131 	 {
1132 		.procname	= "hugepages_treat_as_movable",
1133 		.data		= &hugepages_treat_as_movable,
1134 		.maxlen		= sizeof(int),
1135 		.mode		= 0644,
1136 		.proc_handler	= hugetlb_treat_movable_handler,
1137 	},
1138 	{
1139 		.procname	= "nr_overcommit_hugepages",
1140 		.data		= NULL,
1141 		.maxlen		= sizeof(unsigned long),
1142 		.mode		= 0644,
1143 		.proc_handler	= hugetlb_overcommit_handler,
1144 		.extra1		= (void *)&hugetlb_zero,
1145 		.extra2		= (void *)&hugetlb_infinity,
1146 	},
1147 #endif
1148 	{
1149 		.procname	= "lowmem_reserve_ratio",
1150 		.data		= &sysctl_lowmem_reserve_ratio,
1151 		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
1152 		.mode		= 0644,
1153 		.proc_handler	= lowmem_reserve_ratio_sysctl_handler,
1154 	},
1155 	{
1156 		.procname	= "drop_caches",
1157 		.data		= &sysctl_drop_caches,
1158 		.maxlen		= sizeof(int),
1159 		.mode		= 0644,
1160 		.proc_handler	= drop_caches_sysctl_handler,
1161 		.extra1		= &one,
1162 		.extra2		= &three,
1163 	},
1164 #ifdef CONFIG_COMPACTION
1165 	{
1166 		.procname	= "compact_memory",
1167 		.data		= &sysctl_compact_memory,
1168 		.maxlen		= sizeof(int),
1169 		.mode		= 0200,
1170 		.proc_handler	= sysctl_compaction_handler,
1171 	},
1172 	{
1173 		.procname	= "extfrag_threshold",
1174 		.data		= &sysctl_extfrag_threshold,
1175 		.maxlen		= sizeof(int),
1176 		.mode		= 0644,
1177 		.proc_handler	= sysctl_extfrag_handler,
1178 		.extra1		= &min_extfrag_threshold,
1179 		.extra2		= &max_extfrag_threshold,
1180 	},
1181 
1182 #endif /* CONFIG_COMPACTION */
1183 	{
1184 		.procname	= "min_free_kbytes",
1185 		.data		= &min_free_kbytes,
1186 		.maxlen		= sizeof(min_free_kbytes),
1187 		.mode		= 0644,
1188 		.proc_handler	= min_free_kbytes_sysctl_handler,
1189 		.extra1		= &zero,
1190 	},
1191 	{
1192 		.procname	= "percpu_pagelist_fraction",
1193 		.data		= &percpu_pagelist_fraction,
1194 		.maxlen		= sizeof(percpu_pagelist_fraction),
1195 		.mode		= 0644,
1196 		.proc_handler	= percpu_pagelist_fraction_sysctl_handler,
1197 		.extra1		= &min_percpu_pagelist_fract,
1198 	},
1199 #ifdef CONFIG_MMU
1200 	{
1201 		.procname	= "max_map_count",
1202 		.data		= &sysctl_max_map_count,
1203 		.maxlen		= sizeof(sysctl_max_map_count),
1204 		.mode		= 0644,
1205 		.proc_handler	= proc_dointvec_minmax,
1206 		.extra1		= &zero,
1207 	},
1208 #else
1209 	{
1210 		.procname	= "nr_trim_pages",
1211 		.data		= &sysctl_nr_trim_pages,
1212 		.maxlen		= sizeof(sysctl_nr_trim_pages),
1213 		.mode		= 0644,
1214 		.proc_handler	= proc_dointvec_minmax,
1215 		.extra1		= &zero,
1216 	},
1217 #endif
1218 	{
1219 		.procname	= "laptop_mode",
1220 		.data		= &laptop_mode,
1221 		.maxlen		= sizeof(laptop_mode),
1222 		.mode		= 0644,
1223 		.proc_handler	= proc_dointvec_jiffies,
1224 	},
1225 	{
1226 		.procname	= "block_dump",
1227 		.data		= &block_dump,
1228 		.maxlen		= sizeof(block_dump),
1229 		.mode		= 0644,
1230 		.proc_handler	= proc_dointvec,
1231 		.extra1		= &zero,
1232 	},
1233 	{
1234 		.procname	= "vfs_cache_pressure",
1235 		.data		= &sysctl_vfs_cache_pressure,
1236 		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
1237 		.mode		= 0644,
1238 		.proc_handler	= proc_dointvec,
1239 		.extra1		= &zero,
1240 	},
1241 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1242 	{
1243 		.procname	= "legacy_va_layout",
1244 		.data		= &sysctl_legacy_va_layout,
1245 		.maxlen		= sizeof(sysctl_legacy_va_layout),
1246 		.mode		= 0644,
1247 		.proc_handler	= proc_dointvec,
1248 		.extra1		= &zero,
1249 	},
1250 #endif
1251 #ifdef CONFIG_NUMA
1252 	{
1253 		.procname	= "zone_reclaim_mode",
1254 		.data		= &zone_reclaim_mode,
1255 		.maxlen		= sizeof(zone_reclaim_mode),
1256 		.mode		= 0644,
1257 		.proc_handler	= proc_dointvec,
1258 		.extra1		= &zero,
1259 	},
1260 	{
1261 		.procname	= "min_unmapped_ratio",
1262 		.data		= &sysctl_min_unmapped_ratio,
1263 		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
1264 		.mode		= 0644,
1265 		.proc_handler	= sysctl_min_unmapped_ratio_sysctl_handler,
1266 		.extra1		= &zero,
1267 		.extra2		= &one_hundred,
1268 	},
1269 	{
1270 		.procname	= "min_slab_ratio",
1271 		.data		= &sysctl_min_slab_ratio,
1272 		.maxlen		= sizeof(sysctl_min_slab_ratio),
1273 		.mode		= 0644,
1274 		.proc_handler	= sysctl_min_slab_ratio_sysctl_handler,
1275 		.extra1		= &zero,
1276 		.extra2		= &one_hundred,
1277 	},
1278 #endif
1279 #ifdef CONFIG_SMP
1280 	{
1281 		.procname	= "stat_interval",
1282 		.data		= &sysctl_stat_interval,
1283 		.maxlen		= sizeof(sysctl_stat_interval),
1284 		.mode		= 0644,
1285 		.proc_handler	= proc_dointvec_jiffies,
1286 	},
1287 #endif
1288 #ifdef CONFIG_MMU
1289 	{
1290 		.procname	= "mmap_min_addr",
1291 		.data		= &dac_mmap_min_addr,
1292 		.maxlen		= sizeof(unsigned long),
1293 		.mode		= 0644,
1294 		.proc_handler	= mmap_min_addr_handler,
1295 	},
1296 #endif
1297 #ifdef CONFIG_NUMA
1298 	{
1299 		.procname	= "numa_zonelist_order",
1300 		.data		= &numa_zonelist_order,
1301 		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
1302 		.mode		= 0644,
1303 		.proc_handler	= numa_zonelist_order_handler,
1304 	},
1305 #endif
1306 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1307    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1308 	{
1309 		.procname	= "vdso_enabled",
1310 		.data		= &vdso_enabled,
1311 		.maxlen		= sizeof(vdso_enabled),
1312 		.mode		= 0644,
1313 		.proc_handler	= proc_dointvec,
1314 		.extra1		= &zero,
1315 	},
1316 #endif
1317 #ifdef CONFIG_HIGHMEM
1318 	{
1319 		.procname	= "highmem_is_dirtyable",
1320 		.data		= &vm_highmem_is_dirtyable,
1321 		.maxlen		= sizeof(vm_highmem_is_dirtyable),
1322 		.mode		= 0644,
1323 		.proc_handler	= proc_dointvec_minmax,
1324 		.extra1		= &zero,
1325 		.extra2		= &one,
1326 	},
1327 #endif
1328 	{
1329 		.procname	= "scan_unevictable_pages",
1330 		.data		= &scan_unevictable_pages,
1331 		.maxlen		= sizeof(scan_unevictable_pages),
1332 		.mode		= 0644,
1333 		.proc_handler	= scan_unevictable_handler,
1334 	},
1335 #ifdef CONFIG_MEMORY_FAILURE
1336 	{
1337 		.procname	= "memory_failure_early_kill",
1338 		.data		= &sysctl_memory_failure_early_kill,
1339 		.maxlen		= sizeof(sysctl_memory_failure_early_kill),
1340 		.mode		= 0644,
1341 		.proc_handler	= proc_dointvec_minmax,
1342 		.extra1		= &zero,
1343 		.extra2		= &one,
1344 	},
1345 	{
1346 		.procname	= "memory_failure_recovery",
1347 		.data		= &sysctl_memory_failure_recovery,
1348 		.maxlen		= sizeof(sysctl_memory_failure_recovery),
1349 		.mode		= 0644,
1350 		.proc_handler	= proc_dointvec_minmax,
1351 		.extra1		= &zero,
1352 		.extra2		= &one,
1353 	},
1354 #endif
1355 	{ }
1356 };
1357 
1358 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1359 static struct ctl_table binfmt_misc_table[] = {
1360 	{ }
1361 };
1362 #endif
1363 
1364 static struct ctl_table fs_table[] = {
1365 	{
1366 		.procname	= "inode-nr",
1367 		.data		= &inodes_stat,
1368 		.maxlen		= 2*sizeof(int),
1369 		.mode		= 0444,
1370 		.proc_handler	= proc_nr_inodes,
1371 	},
1372 	{
1373 		.procname	= "inode-state",
1374 		.data		= &inodes_stat,
1375 		.maxlen		= 7*sizeof(int),
1376 		.mode		= 0444,
1377 		.proc_handler	= proc_nr_inodes,
1378 	},
1379 	{
1380 		.procname	= "file-nr",
1381 		.data		= &files_stat,
1382 		.maxlen		= sizeof(files_stat),
1383 		.mode		= 0444,
1384 		.proc_handler	= proc_nr_files,
1385 	},
1386 	{
1387 		.procname	= "file-max",
1388 		.data		= &files_stat.max_files,
1389 		.maxlen		= sizeof(files_stat.max_files),
1390 		.mode		= 0644,
1391 		.proc_handler	= proc_doulongvec_minmax,
1392 	},
1393 	{
1394 		.procname	= "nr_open",
1395 		.data		= &sysctl_nr_open,
1396 		.maxlen		= sizeof(int),
1397 		.mode		= 0644,
1398 		.proc_handler	= proc_dointvec_minmax,
1399 		.extra1		= &sysctl_nr_open_min,
1400 		.extra2		= &sysctl_nr_open_max,
1401 	},
1402 	{
1403 		.procname	= "dentry-state",
1404 		.data		= &dentry_stat,
1405 		.maxlen		= 6*sizeof(int),
1406 		.mode		= 0444,
1407 		.proc_handler	= proc_nr_dentry,
1408 	},
1409 	{
1410 		.procname	= "overflowuid",
1411 		.data		= &fs_overflowuid,
1412 		.maxlen		= sizeof(int),
1413 		.mode		= 0644,
1414 		.proc_handler	= proc_dointvec_minmax,
1415 		.extra1		= &minolduid,
1416 		.extra2		= &maxolduid,
1417 	},
1418 	{
1419 		.procname	= "overflowgid",
1420 		.data		= &fs_overflowgid,
1421 		.maxlen		= sizeof(int),
1422 		.mode		= 0644,
1423 		.proc_handler	= proc_dointvec_minmax,
1424 		.extra1		= &minolduid,
1425 		.extra2		= &maxolduid,
1426 	},
1427 #ifdef CONFIG_FILE_LOCKING
1428 	{
1429 		.procname	= "leases-enable",
1430 		.data		= &leases_enable,
1431 		.maxlen		= sizeof(int),
1432 		.mode		= 0644,
1433 		.proc_handler	= proc_dointvec,
1434 	},
1435 #endif
1436 #ifdef CONFIG_DNOTIFY
1437 	{
1438 		.procname	= "dir-notify-enable",
1439 		.data		= &dir_notify_enable,
1440 		.maxlen		= sizeof(int),
1441 		.mode		= 0644,
1442 		.proc_handler	= proc_dointvec,
1443 	},
1444 #endif
1445 #ifdef CONFIG_MMU
1446 #ifdef CONFIG_FILE_LOCKING
1447 	{
1448 		.procname	= "lease-break-time",
1449 		.data		= &lease_break_time,
1450 		.maxlen		= sizeof(int),
1451 		.mode		= 0644,
1452 		.proc_handler	= proc_dointvec,
1453 	},
1454 #endif
1455 #ifdef CONFIG_AIO
1456 	{
1457 		.procname	= "aio-nr",
1458 		.data		= &aio_nr,
1459 		.maxlen		= sizeof(aio_nr),
1460 		.mode		= 0444,
1461 		.proc_handler	= proc_doulongvec_minmax,
1462 	},
1463 	{
1464 		.procname	= "aio-max-nr",
1465 		.data		= &aio_max_nr,
1466 		.maxlen		= sizeof(aio_max_nr),
1467 		.mode		= 0644,
1468 		.proc_handler	= proc_doulongvec_minmax,
1469 	},
1470 #endif /* CONFIG_AIO */
1471 #ifdef CONFIG_INOTIFY_USER
1472 	{
1473 		.procname	= "inotify",
1474 		.mode		= 0555,
1475 		.child		= inotify_table,
1476 	},
1477 #endif
1478 #ifdef CONFIG_EPOLL
1479 	{
1480 		.procname	= "epoll",
1481 		.mode		= 0555,
1482 		.child		= epoll_table,
1483 	},
1484 #endif
1485 #endif
1486 	{
1487 		.procname	= "suid_dumpable",
1488 		.data		= &suid_dumpable,
1489 		.maxlen		= sizeof(int),
1490 		.mode		= 0644,
1491 		.proc_handler	= proc_dointvec_minmax,
1492 		.extra1		= &zero,
1493 		.extra2		= &two,
1494 	},
1495 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1496 	{
1497 		.procname	= "binfmt_misc",
1498 		.mode		= 0555,
1499 		.child		= binfmt_misc_table,
1500 	},
1501 #endif
1502 	{
1503 		.procname	= "pipe-max-size",
1504 		.data		= &pipe_max_size,
1505 		.maxlen		= sizeof(int),
1506 		.mode		= 0644,
1507 		.proc_handler	= &pipe_proc_fn,
1508 		.extra1		= &pipe_min_size,
1509 	},
1510 	{ }
1511 };
1512 
1513 static struct ctl_table debug_table[] = {
1514 #if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) || \
1515     defined(CONFIG_S390) || defined(CONFIG_TILE)
1516 	{
1517 		.procname	= "exception-trace",
1518 		.data		= &show_unhandled_signals,
1519 		.maxlen		= sizeof(int),
1520 		.mode		= 0644,
1521 		.proc_handler	= proc_dointvec
1522 	},
1523 #endif
1524 #if defined(CONFIG_OPTPROBES)
1525 	{
1526 		.procname	= "kprobes-optimization",
1527 		.data		= &sysctl_kprobes_optimization,
1528 		.maxlen		= sizeof(int),
1529 		.mode		= 0644,
1530 		.proc_handler	= proc_kprobes_optimization_handler,
1531 		.extra1		= &zero,
1532 		.extra2		= &one,
1533 	},
1534 #endif
1535 	{ }
1536 };
1537 
1538 static struct ctl_table dev_table[] = {
1539 	{ }
1540 };
1541 
1542 static DEFINE_SPINLOCK(sysctl_lock);
1543 
1544 /* called under sysctl_lock */
1545 static int use_table(struct ctl_table_header *p)
1546 {
1547 	if (unlikely(p->unregistering))
1548 		return 0;
1549 	p->used++;
1550 	return 1;
1551 }
1552 
1553 /* called under sysctl_lock */
1554 static void unuse_table(struct ctl_table_header *p)
1555 {
1556 	if (!--p->used)
1557 		if (unlikely(p->unregistering))
1558 			complete(p->unregistering);
1559 }
1560 
1561 /* called under sysctl_lock, will reacquire if has to wait */
1562 static void start_unregistering(struct ctl_table_header *p)
1563 {
1564 	/*
1565 	 * if p->used is 0, nobody will ever touch that entry again;
1566 	 * we'll eliminate all paths to it before dropping sysctl_lock
1567 	 */
1568 	if (unlikely(p->used)) {
1569 		struct completion wait;
1570 		init_completion(&wait);
1571 		p->unregistering = &wait;
1572 		spin_unlock(&sysctl_lock);
1573 		wait_for_completion(&wait);
1574 		spin_lock(&sysctl_lock);
1575 	} else {
1576 		/* anything non-NULL; we'll never dereference it */
1577 		p->unregistering = ERR_PTR(-EINVAL);
1578 	}
1579 	/*
1580 	 * do not remove from the list until nobody holds it; walking the
1581 	 * list in do_sysctl() relies on that.
1582 	 */
1583 	list_del_init(&p->ctl_entry);
1584 }
1585 
1586 void sysctl_head_get(struct ctl_table_header *head)
1587 {
1588 	spin_lock(&sysctl_lock);
1589 	head->count++;
1590 	spin_unlock(&sysctl_lock);
1591 }
1592 
1593 static void free_head(struct rcu_head *rcu)
1594 {
1595 	kfree(container_of(rcu, struct ctl_table_header, rcu));
1596 }
1597 
1598 void sysctl_head_put(struct ctl_table_header *head)
1599 {
1600 	spin_lock(&sysctl_lock);
1601 	if (!--head->count)
1602 		call_rcu(&head->rcu, free_head);
1603 	spin_unlock(&sysctl_lock);
1604 }
1605 
1606 struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
1607 {
1608 	if (!head)
1609 		BUG();
1610 	spin_lock(&sysctl_lock);
1611 	if (!use_table(head))
1612 		head = ERR_PTR(-ENOENT);
1613 	spin_unlock(&sysctl_lock);
1614 	return head;
1615 }
1616 
1617 void sysctl_head_finish(struct ctl_table_header *head)
1618 {
1619 	if (!head)
1620 		return;
1621 	spin_lock(&sysctl_lock);
1622 	unuse_table(head);
1623 	spin_unlock(&sysctl_lock);
1624 }
1625 
1626 static struct ctl_table_set *
1627 lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
1628 {
1629 	struct ctl_table_set *set = &root->default_set;
1630 	if (root->lookup)
1631 		set = root->lookup(root, namespaces);
1632 	return set;
1633 }
1634 
1635 static struct list_head *
1636 lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces)
1637 {
1638 	struct ctl_table_set *set = lookup_header_set(root, namespaces);
1639 	return &set->list;
1640 }
1641 
1642 struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces,
1643 					    struct ctl_table_header *prev)
1644 {
1645 	struct ctl_table_root *root;
1646 	struct list_head *header_list;
1647 	struct ctl_table_header *head;
1648 	struct list_head *tmp;
1649 
1650 	spin_lock(&sysctl_lock);
1651 	if (prev) {
1652 		head = prev;
1653 		tmp = &prev->ctl_entry;
1654 		unuse_table(prev);
1655 		goto next;
1656 	}
1657 	tmp = &root_table_header.ctl_entry;
1658 	for (;;) {
1659 		head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1660 
1661 		if (!use_table(head))
1662 			goto next;
1663 		spin_unlock(&sysctl_lock);
1664 		return head;
1665 	next:
1666 		root = head->root;
1667 		tmp = tmp->next;
1668 		header_list = lookup_header_list(root, namespaces);
1669 		if (tmp != header_list)
1670 			continue;
1671 
1672 		do {
1673 			root = list_entry(root->root_list.next,
1674 					struct ctl_table_root, root_list);
1675 			if (root == &sysctl_table_root)
1676 				goto out;
1677 			header_list = lookup_header_list(root, namespaces);
1678 		} while (list_empty(header_list));
1679 		tmp = header_list->next;
1680 	}
1681 out:
1682 	spin_unlock(&sysctl_lock);
1683 	return NULL;
1684 }
1685 
1686 struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1687 {
1688 	return __sysctl_head_next(current->nsproxy, prev);
1689 }
1690 
1691 void register_sysctl_root(struct ctl_table_root *root)
1692 {
1693 	spin_lock(&sysctl_lock);
1694 	list_add_tail(&root->root_list, &sysctl_table_root.root_list);
1695 	spin_unlock(&sysctl_lock);
1696 }
1697 
1698 /*
1699  * sysctl_perm does NOT grant the superuser all rights automatically, because
1700  * some sysctl variables are readonly even to root.
1701  */
1702 
1703 static int test_perm(int mode, int op)
1704 {
1705 	if (!current_euid())
1706 		mode >>= 6;
1707 	else if (in_egroup_p(0))
1708 		mode >>= 3;
1709 	if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
1710 		return 0;
1711 	return -EACCES;
1712 }
1713 
1714 int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
1715 {
1716 	int mode;
1717 
1718 	if (root->permissions)
1719 		mode = root->permissions(root, current->nsproxy, table);
1720 	else
1721 		mode = table->mode;
1722 
1723 	return test_perm(mode, op);
1724 }
1725 
1726 static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1727 {
1728 	for (; table->procname; table++) {
1729 		table->parent = parent;
1730 		if (table->child)
1731 			sysctl_set_parent(table, table->child);
1732 	}
1733 }
1734 
1735 static __init int sysctl_init(void)
1736 {
1737 	sysctl_set_parent(NULL, root_table);
1738 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1739 	sysctl_check_table(current->nsproxy, root_table);
1740 #endif
1741 	return 0;
1742 }
1743 
1744 core_initcall(sysctl_init);
1745 
1746 static struct ctl_table *is_branch_in(struct ctl_table *branch,
1747 				      struct ctl_table *table)
1748 {
1749 	struct ctl_table *p;
1750 	const char *s = branch->procname;
1751 
1752 	/* branch should have named subdirectory as its first element */
1753 	if (!s || !branch->child)
1754 		return NULL;
1755 
1756 	/* ... and nothing else */
1757 	if (branch[1].procname)
1758 		return NULL;
1759 
1760 	/* table should contain subdirectory with the same name */
1761 	for (p = table; p->procname; p++) {
1762 		if (!p->child)
1763 			continue;
1764 		if (p->procname && strcmp(p->procname, s) == 0)
1765 			return p;
1766 	}
1767 	return NULL;
1768 }
1769 
1770 /* see if attaching q to p would be an improvement */
1771 static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
1772 {
1773 	struct ctl_table *to = p->ctl_table, *by = q->ctl_table;
1774 	struct ctl_table *next;
1775 	int is_better = 0;
1776 	int not_in_parent = !p->attached_by;
1777 
1778 	while ((next = is_branch_in(by, to)) != NULL) {
1779 		if (by == q->attached_by)
1780 			is_better = 1;
1781 		if (to == p->attached_by)
1782 			not_in_parent = 1;
1783 		by = by->child;
1784 		to = next->child;
1785 	}
1786 
1787 	if (is_better && not_in_parent) {
1788 		q->attached_by = by;
1789 		q->attached_to = to;
1790 		q->parent = p;
1791 	}
1792 }
1793 
1794 /**
1795  * __register_sysctl_paths - register a sysctl hierarchy
1796  * @root: List of sysctl headers to register on
1797  * @namespaces: Data to compute which lists of sysctl entries are visible
1798  * @path: The path to the directory the sysctl table is in.
1799  * @table: the top-level table structure
1800  *
1801  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1802  * array. A completely 0 filled entry terminates the table.
1803  *
1804  * The members of the &struct ctl_table structure are used as follows:
1805  *
1806  * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1807  *            enter a sysctl file
1808  *
1809  * data - a pointer to data for use by proc_handler
1810  *
1811  * maxlen - the maximum size in bytes of the data
1812  *
1813  * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1814  *
1815  * child - a pointer to the child sysctl table if this entry is a directory, or
1816  *         %NULL.
1817  *
1818  * proc_handler - the text handler routine (described below)
1819  *
1820  * de - for internal use by the sysctl routines
1821  *
1822  * extra1, extra2 - extra pointers usable by the proc handler routines
1823  *
1824  * Leaf nodes in the sysctl tree will be represented by a single file
1825  * under /proc; non-leaf nodes will be represented by directories.
1826  *
1827  * sysctl(2) can automatically manage read and write requests through
1828  * the sysctl table.  The data and maxlen fields of the ctl_table
1829  * struct enable minimal validation of the values being written to be
1830  * performed, and the mode field allows minimal authentication.
1831  *
1832  * There must be a proc_handler routine for any terminal nodes
1833  * mirrored under /proc/sys (non-terminals are handled by a built-in
1834  * directory handler).  Several default handlers are available to
1835  * cover common cases -
1836  *
1837  * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1838  * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1839  * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1840  *
1841  * It is the handler's job to read the input buffer from user memory
1842  * and process it. The handler should return 0 on success.
1843  *
1844  * This routine returns %NULL on a failure to register, and a pointer
1845  * to the table header on success.
1846  */
1847 struct ctl_table_header *__register_sysctl_paths(
1848 	struct ctl_table_root *root,
1849 	struct nsproxy *namespaces,
1850 	const struct ctl_path *path, struct ctl_table *table)
1851 {
1852 	struct ctl_table_header *header;
1853 	struct ctl_table *new, **prevp;
1854 	unsigned int n, npath;
1855 	struct ctl_table_set *set;
1856 
1857 	/* Count the path components */
1858 	for (npath = 0; path[npath].procname; ++npath)
1859 		;
1860 
1861 	/*
1862 	 * For each path component, allocate a 2-element ctl_table array.
1863 	 * The first array element will be filled with the sysctl entry
1864 	 * for this, the second will be the sentinel (procname == 0).
1865 	 *
1866 	 * We allocate everything in one go so that we don't have to
1867 	 * worry about freeing additional memory in unregister_sysctl_table.
1868 	 */
1869 	header = kzalloc(sizeof(struct ctl_table_header) +
1870 			 (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL);
1871 	if (!header)
1872 		return NULL;
1873 
1874 	new = (struct ctl_table *) (header + 1);
1875 
1876 	/* Now connect the dots */
1877 	prevp = &header->ctl_table;
1878 	for (n = 0; n < npath; ++n, ++path) {
1879 		/* Copy the procname */
1880 		new->procname = path->procname;
1881 		new->mode     = 0555;
1882 
1883 		*prevp = new;
1884 		prevp = &new->child;
1885 
1886 		new += 2;
1887 	}
1888 	*prevp = table;
1889 	header->ctl_table_arg = table;
1890 
1891 	INIT_LIST_HEAD(&header->ctl_entry);
1892 	header->used = 0;
1893 	header->unregistering = NULL;
1894 	header->root = root;
1895 	sysctl_set_parent(NULL, header->ctl_table);
1896 	header->count = 1;
1897 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1898 	if (sysctl_check_table(namespaces, header->ctl_table)) {
1899 		kfree(header);
1900 		return NULL;
1901 	}
1902 #endif
1903 	spin_lock(&sysctl_lock);
1904 	header->set = lookup_header_set(root, namespaces);
1905 	header->attached_by = header->ctl_table;
1906 	header->attached_to = root_table;
1907 	header->parent = &root_table_header;
1908 	for (set = header->set; set; set = set->parent) {
1909 		struct ctl_table_header *p;
1910 		list_for_each_entry(p, &set->list, ctl_entry) {
1911 			if (p->unregistering)
1912 				continue;
1913 			try_attach(p, header);
1914 		}
1915 	}
1916 	header->parent->count++;
1917 	list_add_tail(&header->ctl_entry, &header->set->list);
1918 	spin_unlock(&sysctl_lock);
1919 
1920 	return header;
1921 }
1922 
1923 /**
1924  * register_sysctl_table_path - register a sysctl table hierarchy
1925  * @path: The path to the directory the sysctl table is in.
1926  * @table: the top-level table structure
1927  *
1928  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1929  * array. A completely 0 filled entry terminates the table.
1930  *
1931  * See __register_sysctl_paths for more details.
1932  */
1933 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
1934 						struct ctl_table *table)
1935 {
1936 	return __register_sysctl_paths(&sysctl_table_root, current->nsproxy,
1937 					path, table);
1938 }
1939 
1940 /**
1941  * register_sysctl_table - register a sysctl table hierarchy
1942  * @table: the top-level table structure
1943  *
1944  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1945  * array. A completely 0 filled entry terminates the table.
1946  *
1947  * See register_sysctl_paths for more details.
1948  */
1949 struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
1950 {
1951 	static const struct ctl_path null_path[] = { {} };
1952 
1953 	return register_sysctl_paths(null_path, table);
1954 }
1955 
1956 /**
1957  * unregister_sysctl_table - unregister a sysctl table hierarchy
1958  * @header: the header returned from register_sysctl_table
1959  *
1960  * Unregisters the sysctl table and all children. proc entries may not
1961  * actually be removed until they are no longer used by anyone.
1962  */
1963 void unregister_sysctl_table(struct ctl_table_header * header)
1964 {
1965 	might_sleep();
1966 
1967 	if (header == NULL)
1968 		return;
1969 
1970 	spin_lock(&sysctl_lock);
1971 	start_unregistering(header);
1972 	if (!--header->parent->count) {
1973 		WARN_ON(1);
1974 		call_rcu(&header->parent->rcu, free_head);
1975 	}
1976 	if (!--header->count)
1977 		call_rcu(&header->rcu, free_head);
1978 	spin_unlock(&sysctl_lock);
1979 }
1980 
1981 int sysctl_is_seen(struct ctl_table_header *p)
1982 {
1983 	struct ctl_table_set *set = p->set;
1984 	int res;
1985 	spin_lock(&sysctl_lock);
1986 	if (p->unregistering)
1987 		res = 0;
1988 	else if (!set->is_seen)
1989 		res = 1;
1990 	else
1991 		res = set->is_seen(set);
1992 	spin_unlock(&sysctl_lock);
1993 	return res;
1994 }
1995 
1996 void setup_sysctl_set(struct ctl_table_set *p,
1997 	struct ctl_table_set *parent,
1998 	int (*is_seen)(struct ctl_table_set *))
1999 {
2000 	INIT_LIST_HEAD(&p->list);
2001 	p->parent = parent ? parent : &sysctl_table_root.default_set;
2002 	p->is_seen = is_seen;
2003 }
2004 
2005 #else /* !CONFIG_SYSCTL */
2006 struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
2007 {
2008 	return NULL;
2009 }
2010 
2011 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
2012 						    struct ctl_table *table)
2013 {
2014 	return NULL;
2015 }
2016 
2017 void unregister_sysctl_table(struct ctl_table_header * table)
2018 {
2019 }
2020 
2021 void setup_sysctl_set(struct ctl_table_set *p,
2022 	struct ctl_table_set *parent,
2023 	int (*is_seen)(struct ctl_table_set *))
2024 {
2025 }
2026 
2027 void sysctl_head_put(struct ctl_table_header *head)
2028 {
2029 }
2030 
2031 #endif /* CONFIG_SYSCTL */
2032 
2033 /*
2034  * /proc/sys support
2035  */
2036 
2037 #ifdef CONFIG_PROC_SYSCTL
2038 
2039 static int _proc_do_string(void* data, int maxlen, int write,
2040 			   void __user *buffer,
2041 			   size_t *lenp, loff_t *ppos)
2042 {
2043 	size_t len;
2044 	char __user *p;
2045 	char c;
2046 
2047 	if (!data || !maxlen || !*lenp) {
2048 		*lenp = 0;
2049 		return 0;
2050 	}
2051 
2052 	if (write) {
2053 		len = 0;
2054 		p = buffer;
2055 		while (len < *lenp) {
2056 			if (get_user(c, p++))
2057 				return -EFAULT;
2058 			if (c == 0 || c == '\n')
2059 				break;
2060 			len++;
2061 		}
2062 		if (len >= maxlen)
2063 			len = maxlen-1;
2064 		if(copy_from_user(data, buffer, len))
2065 			return -EFAULT;
2066 		((char *) data)[len] = 0;
2067 		*ppos += *lenp;
2068 	} else {
2069 		len = strlen(data);
2070 		if (len > maxlen)
2071 			len = maxlen;
2072 
2073 		if (*ppos > len) {
2074 			*lenp = 0;
2075 			return 0;
2076 		}
2077 
2078 		data += *ppos;
2079 		len  -= *ppos;
2080 
2081 		if (len > *lenp)
2082 			len = *lenp;
2083 		if (len)
2084 			if(copy_to_user(buffer, data, len))
2085 				return -EFAULT;
2086 		if (len < *lenp) {
2087 			if(put_user('\n', ((char __user *) buffer) + len))
2088 				return -EFAULT;
2089 			len++;
2090 		}
2091 		*lenp = len;
2092 		*ppos += len;
2093 	}
2094 	return 0;
2095 }
2096 
2097 /**
2098  * proc_dostring - read a string sysctl
2099  * @table: the sysctl table
2100  * @write: %TRUE if this is a write to the sysctl file
2101  * @buffer: the user buffer
2102  * @lenp: the size of the user buffer
2103  * @ppos: file position
2104  *
2105  * Reads/writes a string from/to the user buffer. If the kernel
2106  * buffer provided is not large enough to hold the string, the
2107  * string is truncated. The copied string is %NULL-terminated.
2108  * If the string is being read by the user process, it is copied
2109  * and a newline '\n' is added. It is truncated if the buffer is
2110  * not large enough.
2111  *
2112  * Returns 0 on success.
2113  */
2114 int proc_dostring(struct ctl_table *table, int write,
2115 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2116 {
2117 	return _proc_do_string(table->data, table->maxlen, write,
2118 			       buffer, lenp, ppos);
2119 }
2120 
2121 static size_t proc_skip_spaces(char **buf)
2122 {
2123 	size_t ret;
2124 	char *tmp = skip_spaces(*buf);
2125 	ret = tmp - *buf;
2126 	*buf = tmp;
2127 	return ret;
2128 }
2129 
2130 static void proc_skip_char(char **buf, size_t *size, const char v)
2131 {
2132 	while (*size) {
2133 		if (**buf != v)
2134 			break;
2135 		(*size)--;
2136 		(*buf)++;
2137 	}
2138 }
2139 
2140 #define TMPBUFLEN 22
2141 /**
2142  * proc_get_long - reads an ASCII formatted integer from a user buffer
2143  *
2144  * @buf: a kernel buffer
2145  * @size: size of the kernel buffer
2146  * @val: this is where the number will be stored
2147  * @neg: set to %TRUE if number is negative
2148  * @perm_tr: a vector which contains the allowed trailers
2149  * @perm_tr_len: size of the perm_tr vector
2150  * @tr: pointer to store the trailer character
2151  *
2152  * In case of success %0 is returned and @buf and @size are updated with
2153  * the amount of bytes read. If @tr is non-NULL and a trailing
2154  * character exists (size is non-zero after returning from this
2155  * function), @tr is updated with the trailing character.
2156  */
2157 static int proc_get_long(char **buf, size_t *size,
2158 			  unsigned long *val, bool *neg,
2159 			  const char *perm_tr, unsigned perm_tr_len, char *tr)
2160 {
2161 	int len;
2162 	char *p, tmp[TMPBUFLEN];
2163 
2164 	if (!*size)
2165 		return -EINVAL;
2166 
2167 	len = *size;
2168 	if (len > TMPBUFLEN - 1)
2169 		len = TMPBUFLEN - 1;
2170 
2171 	memcpy(tmp, *buf, len);
2172 
2173 	tmp[len] = 0;
2174 	p = tmp;
2175 	if (*p == '-' && *size > 1) {
2176 		*neg = true;
2177 		p++;
2178 	} else
2179 		*neg = false;
2180 	if (!isdigit(*p))
2181 		return -EINVAL;
2182 
2183 	*val = simple_strtoul(p, &p, 0);
2184 
2185 	len = p - tmp;
2186 
2187 	/* We don't know if the next char is whitespace thus we may accept
2188 	 * invalid integers (e.g. 1234...a) or two integers instead of one
2189 	 * (e.g. 123...1). So lets not allow such large numbers. */
2190 	if (len == TMPBUFLEN - 1)
2191 		return -EINVAL;
2192 
2193 	if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
2194 		return -EINVAL;
2195 
2196 	if (tr && (len < *size))
2197 		*tr = *p;
2198 
2199 	*buf += len;
2200 	*size -= len;
2201 
2202 	return 0;
2203 }
2204 
2205 /**
2206  * proc_put_long - converts an integer to a decimal ASCII formatted string
2207  *
2208  * @buf: the user buffer
2209  * @size: the size of the user buffer
2210  * @val: the integer to be converted
2211  * @neg: sign of the number, %TRUE for negative
2212  *
2213  * In case of success %0 is returned and @buf and @size are updated with
2214  * the amount of bytes written.
2215  */
2216 static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
2217 			  bool neg)
2218 {
2219 	int len;
2220 	char tmp[TMPBUFLEN], *p = tmp;
2221 
2222 	sprintf(p, "%s%lu", neg ? "-" : "", val);
2223 	len = strlen(tmp);
2224 	if (len > *size)
2225 		len = *size;
2226 	if (copy_to_user(*buf, tmp, len))
2227 		return -EFAULT;
2228 	*size -= len;
2229 	*buf += len;
2230 	return 0;
2231 }
2232 #undef TMPBUFLEN
2233 
2234 static int proc_put_char(void __user **buf, size_t *size, char c)
2235 {
2236 	if (*size) {
2237 		char __user **buffer = (char __user **)buf;
2238 		if (put_user(c, *buffer))
2239 			return -EFAULT;
2240 		(*size)--, (*buffer)++;
2241 		*buf = *buffer;
2242 	}
2243 	return 0;
2244 }
2245 
2246 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
2247 				 int *valp,
2248 				 int write, void *data)
2249 {
2250 	if (write) {
2251 		*valp = *negp ? -*lvalp : *lvalp;
2252 	} else {
2253 		int val = *valp;
2254 		if (val < 0) {
2255 			*negp = true;
2256 			*lvalp = (unsigned long)-val;
2257 		} else {
2258 			*negp = false;
2259 			*lvalp = (unsigned long)val;
2260 		}
2261 	}
2262 	return 0;
2263 }
2264 
2265 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
2266 
2267 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2268 		  int write, void __user *buffer,
2269 		  size_t *lenp, loff_t *ppos,
2270 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2271 			      int write, void *data),
2272 		  void *data)
2273 {
2274 	int *i, vleft, first = 1, err = 0;
2275 	unsigned long page = 0;
2276 	size_t left;
2277 	char *kbuf;
2278 
2279 	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2280 		*lenp = 0;
2281 		return 0;
2282 	}
2283 
2284 	i = (int *) tbl_data;
2285 	vleft = table->maxlen / sizeof(*i);
2286 	left = *lenp;
2287 
2288 	if (!conv)
2289 		conv = do_proc_dointvec_conv;
2290 
2291 	if (write) {
2292 		if (left > PAGE_SIZE - 1)
2293 			left = PAGE_SIZE - 1;
2294 		page = __get_free_page(GFP_TEMPORARY);
2295 		kbuf = (char *) page;
2296 		if (!kbuf)
2297 			return -ENOMEM;
2298 		if (copy_from_user(kbuf, buffer, left)) {
2299 			err = -EFAULT;
2300 			goto free;
2301 		}
2302 		kbuf[left] = 0;
2303 	}
2304 
2305 	for (; left && vleft--; i++, first=0) {
2306 		unsigned long lval;
2307 		bool neg;
2308 
2309 		if (write) {
2310 			left -= proc_skip_spaces(&kbuf);
2311 
2312 			if (!left)
2313 				break;
2314 			err = proc_get_long(&kbuf, &left, &lval, &neg,
2315 					     proc_wspace_sep,
2316 					     sizeof(proc_wspace_sep), NULL);
2317 			if (err)
2318 				break;
2319 			if (conv(&neg, &lval, i, 1, data)) {
2320 				err = -EINVAL;
2321 				break;
2322 			}
2323 		} else {
2324 			if (conv(&neg, &lval, i, 0, data)) {
2325 				err = -EINVAL;
2326 				break;
2327 			}
2328 			if (!first)
2329 				err = proc_put_char(&buffer, &left, '\t');
2330 			if (err)
2331 				break;
2332 			err = proc_put_long(&buffer, &left, lval, neg);
2333 			if (err)
2334 				break;
2335 		}
2336 	}
2337 
2338 	if (!write && !first && left && !err)
2339 		err = proc_put_char(&buffer, &left, '\n');
2340 	if (write && !err && left)
2341 		left -= proc_skip_spaces(&kbuf);
2342 free:
2343 	if (write) {
2344 		free_page(page);
2345 		if (first)
2346 			return err ? : -EINVAL;
2347 	}
2348 	*lenp -= left;
2349 	*ppos += *lenp;
2350 	return err;
2351 }
2352 
2353 static int do_proc_dointvec(struct ctl_table *table, int write,
2354 		  void __user *buffer, size_t *lenp, loff_t *ppos,
2355 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2356 			      int write, void *data),
2357 		  void *data)
2358 {
2359 	return __do_proc_dointvec(table->data, table, write,
2360 			buffer, lenp, ppos, conv, data);
2361 }
2362 
2363 /**
2364  * proc_dointvec - read a vector of integers
2365  * @table: the sysctl table
2366  * @write: %TRUE if this is a write to the sysctl file
2367  * @buffer: the user buffer
2368  * @lenp: the size of the user buffer
2369  * @ppos: file position
2370  *
2371  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2372  * values from/to the user buffer, treated as an ASCII string.
2373  *
2374  * Returns 0 on success.
2375  */
2376 int proc_dointvec(struct ctl_table *table, int write,
2377 		     void __user *buffer, size_t *lenp, loff_t *ppos)
2378 {
2379     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2380 		    	    NULL,NULL);
2381 }
2382 
2383 /*
2384  * Taint values can only be increased
2385  * This means we can safely use a temporary.
2386  */
2387 static int proc_taint(struct ctl_table *table, int write,
2388 			       void __user *buffer, size_t *lenp, loff_t *ppos)
2389 {
2390 	struct ctl_table t;
2391 	unsigned long tmptaint = get_taint();
2392 	int err;
2393 
2394 	if (write && !capable(CAP_SYS_ADMIN))
2395 		return -EPERM;
2396 
2397 	t = *table;
2398 	t.data = &tmptaint;
2399 	err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2400 	if (err < 0)
2401 		return err;
2402 
2403 	if (write) {
2404 		/*
2405 		 * Poor man's atomic or. Not worth adding a primitive
2406 		 * to everyone's atomic.h for this
2407 		 */
2408 		int i;
2409 		for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2410 			if ((tmptaint >> i) & 1)
2411 				add_taint(i);
2412 		}
2413 	}
2414 
2415 	return err;
2416 }
2417 
2418 #ifdef CONFIG_PRINTK
2419 static int proc_dmesg_restrict(struct ctl_table *table, int write,
2420 				void __user *buffer, size_t *lenp, loff_t *ppos)
2421 {
2422 	if (write && !capable(CAP_SYS_ADMIN))
2423 		return -EPERM;
2424 
2425 	return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2426 }
2427 #endif
2428 
2429 struct do_proc_dointvec_minmax_conv_param {
2430 	int *min;
2431 	int *max;
2432 };
2433 
2434 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2435 					int *valp,
2436 					int write, void *data)
2437 {
2438 	struct do_proc_dointvec_minmax_conv_param *param = data;
2439 	if (write) {
2440 		int val = *negp ? -*lvalp : *lvalp;
2441 		if ((param->min && *param->min > val) ||
2442 		    (param->max && *param->max < val))
2443 			return -EINVAL;
2444 		*valp = val;
2445 	} else {
2446 		int val = *valp;
2447 		if (val < 0) {
2448 			*negp = true;
2449 			*lvalp = (unsigned long)-val;
2450 		} else {
2451 			*negp = false;
2452 			*lvalp = (unsigned long)val;
2453 		}
2454 	}
2455 	return 0;
2456 }
2457 
2458 /**
2459  * proc_dointvec_minmax - read a vector of integers with min/max values
2460  * @table: the sysctl table
2461  * @write: %TRUE if this is a write to the sysctl file
2462  * @buffer: the user buffer
2463  * @lenp: the size of the user buffer
2464  * @ppos: file position
2465  *
2466  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2467  * values from/to the user buffer, treated as an ASCII string.
2468  *
2469  * This routine will ensure the values are within the range specified by
2470  * table->extra1 (min) and table->extra2 (max).
2471  *
2472  * Returns 0 on success.
2473  */
2474 int proc_dointvec_minmax(struct ctl_table *table, int write,
2475 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2476 {
2477 	struct do_proc_dointvec_minmax_conv_param param = {
2478 		.min = (int *) table->extra1,
2479 		.max = (int *) table->extra2,
2480 	};
2481 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
2482 				do_proc_dointvec_minmax_conv, &param);
2483 }
2484 
2485 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2486 				     void __user *buffer,
2487 				     size_t *lenp, loff_t *ppos,
2488 				     unsigned long convmul,
2489 				     unsigned long convdiv)
2490 {
2491 	unsigned long *i, *min, *max;
2492 	int vleft, first = 1, err = 0;
2493 	unsigned long page = 0;
2494 	size_t left;
2495 	char *kbuf;
2496 
2497 	if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
2498 		*lenp = 0;
2499 		return 0;
2500 	}
2501 
2502 	i = (unsigned long *) data;
2503 	min = (unsigned long *) table->extra1;
2504 	max = (unsigned long *) table->extra2;
2505 	vleft = table->maxlen / sizeof(unsigned long);
2506 	left = *lenp;
2507 
2508 	if (write) {
2509 		if (left > PAGE_SIZE - 1)
2510 			left = PAGE_SIZE - 1;
2511 		page = __get_free_page(GFP_TEMPORARY);
2512 		kbuf = (char *) page;
2513 		if (!kbuf)
2514 			return -ENOMEM;
2515 		if (copy_from_user(kbuf, buffer, left)) {
2516 			err = -EFAULT;
2517 			goto free;
2518 		}
2519 		kbuf[left] = 0;
2520 	}
2521 
2522 	for (; left && vleft--; i++, first = 0) {
2523 		unsigned long val;
2524 
2525 		if (write) {
2526 			bool neg;
2527 
2528 			left -= proc_skip_spaces(&kbuf);
2529 
2530 			err = proc_get_long(&kbuf, &left, &val, &neg,
2531 					     proc_wspace_sep,
2532 					     sizeof(proc_wspace_sep), NULL);
2533 			if (err)
2534 				break;
2535 			if (neg)
2536 				continue;
2537 			if ((min && val < *min) || (max && val > *max))
2538 				continue;
2539 			*i = val;
2540 		} else {
2541 			val = convdiv * (*i) / convmul;
2542 			if (!first)
2543 				err = proc_put_char(&buffer, &left, '\t');
2544 			err = proc_put_long(&buffer, &left, val, false);
2545 			if (err)
2546 				break;
2547 		}
2548 	}
2549 
2550 	if (!write && !first && left && !err)
2551 		err = proc_put_char(&buffer, &left, '\n');
2552 	if (write && !err)
2553 		left -= proc_skip_spaces(&kbuf);
2554 free:
2555 	if (write) {
2556 		free_page(page);
2557 		if (first)
2558 			return err ? : -EINVAL;
2559 	}
2560 	*lenp -= left;
2561 	*ppos += *lenp;
2562 	return err;
2563 }
2564 
2565 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2566 				     void __user *buffer,
2567 				     size_t *lenp, loff_t *ppos,
2568 				     unsigned long convmul,
2569 				     unsigned long convdiv)
2570 {
2571 	return __do_proc_doulongvec_minmax(table->data, table, write,
2572 			buffer, lenp, ppos, convmul, convdiv);
2573 }
2574 
2575 /**
2576  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2577  * @table: the sysctl table
2578  * @write: %TRUE if this is a write to the sysctl file
2579  * @buffer: the user buffer
2580  * @lenp: the size of the user buffer
2581  * @ppos: file position
2582  *
2583  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2584  * values from/to the user buffer, treated as an ASCII string.
2585  *
2586  * This routine will ensure the values are within the range specified by
2587  * table->extra1 (min) and table->extra2 (max).
2588  *
2589  * Returns 0 on success.
2590  */
2591 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2592 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2593 {
2594     return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2595 }
2596 
2597 /**
2598  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2599  * @table: the sysctl table
2600  * @write: %TRUE if this is a write to the sysctl file
2601  * @buffer: the user buffer
2602  * @lenp: the size of the user buffer
2603  * @ppos: file position
2604  *
2605  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2606  * values from/to the user buffer, treated as an ASCII string. The values
2607  * are treated as milliseconds, and converted to jiffies when they are stored.
2608  *
2609  * This routine will ensure the values are within the range specified by
2610  * table->extra1 (min) and table->extra2 (max).
2611  *
2612  * Returns 0 on success.
2613  */
2614 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2615 				      void __user *buffer,
2616 				      size_t *lenp, loff_t *ppos)
2617 {
2618     return do_proc_doulongvec_minmax(table, write, buffer,
2619 				     lenp, ppos, HZ, 1000l);
2620 }
2621 
2622 
2623 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
2624 					 int *valp,
2625 					 int write, void *data)
2626 {
2627 	if (write) {
2628 		if (*lvalp > LONG_MAX / HZ)
2629 			return 1;
2630 		*valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2631 	} else {
2632 		int val = *valp;
2633 		unsigned long lval;
2634 		if (val < 0) {
2635 			*negp = true;
2636 			lval = (unsigned long)-val;
2637 		} else {
2638 			*negp = false;
2639 			lval = (unsigned long)val;
2640 		}
2641 		*lvalp = lval / HZ;
2642 	}
2643 	return 0;
2644 }
2645 
2646 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
2647 						int *valp,
2648 						int write, void *data)
2649 {
2650 	if (write) {
2651 		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2652 			return 1;
2653 		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2654 	} else {
2655 		int val = *valp;
2656 		unsigned long lval;
2657 		if (val < 0) {
2658 			*negp = true;
2659 			lval = (unsigned long)-val;
2660 		} else {
2661 			*negp = false;
2662 			lval = (unsigned long)val;
2663 		}
2664 		*lvalp = jiffies_to_clock_t(lval);
2665 	}
2666 	return 0;
2667 }
2668 
2669 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
2670 					    int *valp,
2671 					    int write, void *data)
2672 {
2673 	if (write) {
2674 		*valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2675 	} else {
2676 		int val = *valp;
2677 		unsigned long lval;
2678 		if (val < 0) {
2679 			*negp = true;
2680 			lval = (unsigned long)-val;
2681 		} else {
2682 			*negp = false;
2683 			lval = (unsigned long)val;
2684 		}
2685 		*lvalp = jiffies_to_msecs(lval);
2686 	}
2687 	return 0;
2688 }
2689 
2690 /**
2691  * proc_dointvec_jiffies - read a vector of integers as seconds
2692  * @table: the sysctl table
2693  * @write: %TRUE if this is a write to the sysctl file
2694  * @buffer: the user buffer
2695  * @lenp: the size of the user buffer
2696  * @ppos: file position
2697  *
2698  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2699  * values from/to the user buffer, treated as an ASCII string.
2700  * The values read are assumed to be in seconds, and are converted into
2701  * jiffies.
2702  *
2703  * Returns 0 on success.
2704  */
2705 int proc_dointvec_jiffies(struct ctl_table *table, int write,
2706 			  void __user *buffer, size_t *lenp, loff_t *ppos)
2707 {
2708     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2709 		    	    do_proc_dointvec_jiffies_conv,NULL);
2710 }
2711 
2712 /**
2713  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2714  * @table: the sysctl table
2715  * @write: %TRUE if this is a write to the sysctl file
2716  * @buffer: the user buffer
2717  * @lenp: the size of the user buffer
2718  * @ppos: pointer to the file position
2719  *
2720  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2721  * values from/to the user buffer, treated as an ASCII string.
2722  * The values read are assumed to be in 1/USER_HZ seconds, and
2723  * are converted into jiffies.
2724  *
2725  * Returns 0 on success.
2726  */
2727 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2728 				 void __user *buffer, size_t *lenp, loff_t *ppos)
2729 {
2730     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2731 		    	    do_proc_dointvec_userhz_jiffies_conv,NULL);
2732 }
2733 
2734 /**
2735  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2736  * @table: the sysctl table
2737  * @write: %TRUE if this is a write to the sysctl file
2738  * @buffer: the user buffer
2739  * @lenp: the size of the user buffer
2740  * @ppos: file position
2741  * @ppos: the current position in the file
2742  *
2743  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2744  * values from/to the user buffer, treated as an ASCII string.
2745  * The values read are assumed to be in 1/1000 seconds, and
2746  * are converted into jiffies.
2747  *
2748  * Returns 0 on success.
2749  */
2750 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2751 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2752 {
2753 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
2754 				do_proc_dointvec_ms_jiffies_conv, NULL);
2755 }
2756 
2757 static int proc_do_cad_pid(struct ctl_table *table, int write,
2758 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2759 {
2760 	struct pid *new_pid;
2761 	pid_t tmp;
2762 	int r;
2763 
2764 	tmp = pid_vnr(cad_pid);
2765 
2766 	r = __do_proc_dointvec(&tmp, table, write, buffer,
2767 			       lenp, ppos, NULL, NULL);
2768 	if (r || !write)
2769 		return r;
2770 
2771 	new_pid = find_get_pid(tmp);
2772 	if (!new_pid)
2773 		return -ESRCH;
2774 
2775 	put_pid(xchg(&cad_pid, new_pid));
2776 	return 0;
2777 }
2778 
2779 /**
2780  * proc_do_large_bitmap - read/write from/to a large bitmap
2781  * @table: the sysctl table
2782  * @write: %TRUE if this is a write to the sysctl file
2783  * @buffer: the user buffer
2784  * @lenp: the size of the user buffer
2785  * @ppos: file position
2786  *
2787  * The bitmap is stored at table->data and the bitmap length (in bits)
2788  * in table->maxlen.
2789  *
2790  * We use a range comma separated format (e.g. 1,3-4,10-10) so that
2791  * large bitmaps may be represented in a compact manner. Writing into
2792  * the file will clear the bitmap then update it with the given input.
2793  *
2794  * Returns 0 on success.
2795  */
2796 int proc_do_large_bitmap(struct ctl_table *table, int write,
2797 			 void __user *buffer, size_t *lenp, loff_t *ppos)
2798 {
2799 	int err = 0;
2800 	bool first = 1;
2801 	size_t left = *lenp;
2802 	unsigned long bitmap_len = table->maxlen;
2803 	unsigned long *bitmap = (unsigned long *) table->data;
2804 	unsigned long *tmp_bitmap = NULL;
2805 	char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
2806 
2807 	if (!bitmap_len || !left || (*ppos && !write)) {
2808 		*lenp = 0;
2809 		return 0;
2810 	}
2811 
2812 	if (write) {
2813 		unsigned long page = 0;
2814 		char *kbuf;
2815 
2816 		if (left > PAGE_SIZE - 1)
2817 			left = PAGE_SIZE - 1;
2818 
2819 		page = __get_free_page(GFP_TEMPORARY);
2820 		kbuf = (char *) page;
2821 		if (!kbuf)
2822 			return -ENOMEM;
2823 		if (copy_from_user(kbuf, buffer, left)) {
2824 			free_page(page);
2825 			return -EFAULT;
2826                 }
2827 		kbuf[left] = 0;
2828 
2829 		tmp_bitmap = kzalloc(BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long),
2830 				     GFP_KERNEL);
2831 		if (!tmp_bitmap) {
2832 			free_page(page);
2833 			return -ENOMEM;
2834 		}
2835 		proc_skip_char(&kbuf, &left, '\n');
2836 		while (!err && left) {
2837 			unsigned long val_a, val_b;
2838 			bool neg;
2839 
2840 			err = proc_get_long(&kbuf, &left, &val_a, &neg, tr_a,
2841 					     sizeof(tr_a), &c);
2842 			if (err)
2843 				break;
2844 			if (val_a >= bitmap_len || neg) {
2845 				err = -EINVAL;
2846 				break;
2847 			}
2848 
2849 			val_b = val_a;
2850 			if (left) {
2851 				kbuf++;
2852 				left--;
2853 			}
2854 
2855 			if (c == '-') {
2856 				err = proc_get_long(&kbuf, &left, &val_b,
2857 						     &neg, tr_b, sizeof(tr_b),
2858 						     &c);
2859 				if (err)
2860 					break;
2861 				if (val_b >= bitmap_len || neg ||
2862 				    val_a > val_b) {
2863 					err = -EINVAL;
2864 					break;
2865 				}
2866 				if (left) {
2867 					kbuf++;
2868 					left--;
2869 				}
2870 			}
2871 
2872 			while (val_a <= val_b)
2873 				set_bit(val_a++, tmp_bitmap);
2874 
2875 			first = 0;
2876 			proc_skip_char(&kbuf, &left, '\n');
2877 		}
2878 		free_page(page);
2879 	} else {
2880 		unsigned long bit_a, bit_b = 0;
2881 
2882 		while (left) {
2883 			bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
2884 			if (bit_a >= bitmap_len)
2885 				break;
2886 			bit_b = find_next_zero_bit(bitmap, bitmap_len,
2887 						   bit_a + 1) - 1;
2888 
2889 			if (!first) {
2890 				err = proc_put_char(&buffer, &left, ',');
2891 				if (err)
2892 					break;
2893 			}
2894 			err = proc_put_long(&buffer, &left, bit_a, false);
2895 			if (err)
2896 				break;
2897 			if (bit_a != bit_b) {
2898 				err = proc_put_char(&buffer, &left, '-');
2899 				if (err)
2900 					break;
2901 				err = proc_put_long(&buffer, &left, bit_b, false);
2902 				if (err)
2903 					break;
2904 			}
2905 
2906 			first = 0; bit_b++;
2907 		}
2908 		if (!err)
2909 			err = proc_put_char(&buffer, &left, '\n');
2910 	}
2911 
2912 	if (!err) {
2913 		if (write) {
2914 			if (*ppos)
2915 				bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
2916 			else
2917 				memcpy(bitmap, tmp_bitmap,
2918 					BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long));
2919 		}
2920 		kfree(tmp_bitmap);
2921 		*lenp -= left;
2922 		*ppos += *lenp;
2923 		return 0;
2924 	} else {
2925 		kfree(tmp_bitmap);
2926 		return err;
2927 	}
2928 }
2929 
2930 #else /* CONFIG_PROC_SYSCTL */
2931 
2932 int proc_dostring(struct ctl_table *table, int write,
2933 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2934 {
2935 	return -ENOSYS;
2936 }
2937 
2938 int proc_dointvec(struct ctl_table *table, int write,
2939 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2940 {
2941 	return -ENOSYS;
2942 }
2943 
2944 int proc_dointvec_minmax(struct ctl_table *table, int write,
2945 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2946 {
2947 	return -ENOSYS;
2948 }
2949 
2950 int proc_dointvec_jiffies(struct ctl_table *table, int write,
2951 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2952 {
2953 	return -ENOSYS;
2954 }
2955 
2956 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2957 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2958 {
2959 	return -ENOSYS;
2960 }
2961 
2962 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2963 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2964 {
2965 	return -ENOSYS;
2966 }
2967 
2968 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2969 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2970 {
2971 	return -ENOSYS;
2972 }
2973 
2974 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2975 				      void __user *buffer,
2976 				      size_t *lenp, loff_t *ppos)
2977 {
2978     return -ENOSYS;
2979 }
2980 
2981 
2982 #endif /* CONFIG_PROC_SYSCTL */
2983 
2984 /*
2985  * No sense putting this after each symbol definition, twice,
2986  * exception granted :-)
2987  */
2988 EXPORT_SYMBOL(proc_dointvec);
2989 EXPORT_SYMBOL(proc_dointvec_jiffies);
2990 EXPORT_SYMBOL(proc_dointvec_minmax);
2991 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2992 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2993 EXPORT_SYMBOL(proc_dostring);
2994 EXPORT_SYMBOL(proc_doulongvec_minmax);
2995 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2996 EXPORT_SYMBOL(register_sysctl_table);
2997 EXPORT_SYMBOL(register_sysctl_paths);
2998 EXPORT_SYMBOL(unregister_sysctl_table);
2999