xref: /openbmc/linux/kernel/sysctl.c (revision 5b4cb650)
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20 
21 #include <linux/module.h>
22 #include <linux/aio.h>
23 #include <linux/mm.h>
24 #include <linux/swap.h>
25 #include <linux/slab.h>
26 #include <linux/sysctl.h>
27 #include <linux/bitmap.h>
28 #include <linux/signal.h>
29 #include <linux/printk.h>
30 #include <linux/proc_fs.h>
31 #include <linux/security.h>
32 #include <linux/ctype.h>
33 #include <linux/kmemleak.h>
34 #include <linux/fs.h>
35 #include <linux/init.h>
36 #include <linux/kernel.h>
37 #include <linux/kobject.h>
38 #include <linux/net.h>
39 #include <linux/sysrq.h>
40 #include <linux/highuid.h>
41 #include <linux/writeback.h>
42 #include <linux/ratelimit.h>
43 #include <linux/compaction.h>
44 #include <linux/hugetlb.h>
45 #include <linux/initrd.h>
46 #include <linux/key.h>
47 #include <linux/times.h>
48 #include <linux/limits.h>
49 #include <linux/dcache.h>
50 #include <linux/dnotify.h>
51 #include <linux/syscalls.h>
52 #include <linux/vmstat.h>
53 #include <linux/nfs_fs.h>
54 #include <linux/acpi.h>
55 #include <linux/reboot.h>
56 #include <linux/ftrace.h>
57 #include <linux/perf_event.h>
58 #include <linux/kprobes.h>
59 #include <linux/pipe_fs_i.h>
60 #include <linux/oom.h>
61 #include <linux/kmod.h>
62 #include <linux/capability.h>
63 #include <linux/binfmts.h>
64 #include <linux/sched/sysctl.h>
65 #include <linux/sched/coredump.h>
66 #include <linux/kexec.h>
67 #include <linux/bpf.h>
68 #include <linux/mount.h>
69 
70 #include <linux/uaccess.h>
71 #include <asm/processor.h>
72 
73 #ifdef CONFIG_X86
74 #include <asm/nmi.h>
75 #include <asm/stacktrace.h>
76 #include <asm/io.h>
77 #endif
78 #ifdef CONFIG_SPARC
79 #include <asm/setup.h>
80 #endif
81 #ifdef CONFIG_BSD_PROCESS_ACCT
82 #include <linux/acct.h>
83 #endif
84 #ifdef CONFIG_RT_MUTEXES
85 #include <linux/rtmutex.h>
86 #endif
87 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
88 #include <linux/lockdep.h>
89 #endif
90 #ifdef CONFIG_CHR_DEV_SG
91 #include <scsi/sg.h>
92 #endif
93 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
94 #include <linux/stackleak.h>
95 #endif
96 #ifdef CONFIG_LOCKUP_DETECTOR
97 #include <linux/nmi.h>
98 #endif
99 
100 #if defined(CONFIG_SYSCTL)
101 
102 /* External variables not in a header file. */
103 extern int suid_dumpable;
104 #ifdef CONFIG_COREDUMP
105 extern int core_uses_pid;
106 extern char core_pattern[];
107 extern unsigned int core_pipe_limit;
108 #endif
109 extern int pid_max;
110 extern int pid_max_min, pid_max_max;
111 extern int percpu_pagelist_fraction;
112 extern int latencytop_enabled;
113 extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
114 #ifndef CONFIG_MMU
115 extern int sysctl_nr_trim_pages;
116 #endif
117 
118 /* Constants used for minimum and  maximum */
119 #ifdef CONFIG_LOCKUP_DETECTOR
120 static int sixty = 60;
121 #endif
122 
123 static int __maybe_unused neg_one = -1;
124 
125 static int zero;
126 static int __maybe_unused one = 1;
127 static int __maybe_unused two = 2;
128 static int __maybe_unused four = 4;
129 static unsigned long one_ul = 1;
130 static int one_hundred = 100;
131 static int one_thousand = 1000;
132 #ifdef CONFIG_PRINTK
133 static int ten_thousand = 10000;
134 #endif
135 #ifdef CONFIG_PERF_EVENTS
136 static int six_hundred_forty_kb = 640 * 1024;
137 #endif
138 
139 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
140 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
141 
142 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
143 static int maxolduid = 65535;
144 static int minolduid;
145 
146 static int ngroups_max = NGROUPS_MAX;
147 static const int cap_last_cap = CAP_LAST_CAP;
148 
149 /*
150  * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
151  * and hung_task_check_interval_secs
152  */
153 #ifdef CONFIG_DETECT_HUNG_TASK
154 static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
155 #endif
156 
157 #ifdef CONFIG_INOTIFY_USER
158 #include <linux/inotify.h>
159 #endif
160 #ifdef CONFIG_SPARC
161 #endif
162 
163 #ifdef __hppa__
164 extern int pwrsw_enabled;
165 #endif
166 
167 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
168 extern int unaligned_enabled;
169 #endif
170 
171 #ifdef CONFIG_IA64
172 extern int unaligned_dump_stack;
173 #endif
174 
175 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
176 extern int no_unaligned_warning;
177 #endif
178 
179 #ifdef CONFIG_PROC_SYSCTL
180 
181 /**
182  * enum sysctl_writes_mode - supported sysctl write modes
183  *
184  * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
185  * 	to be written, and multiple writes on the same sysctl file descriptor
186  * 	will rewrite the sysctl value, regardless of file position. No warning
187  * 	is issued when the initial position is not 0.
188  * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
189  * 	not 0.
190  * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
191  * 	file position 0 and the value must be fully contained in the buffer
192  * 	sent to the write syscall. If dealing with strings respect the file
193  * 	position, but restrict this to the max length of the buffer, anything
194  * 	passed the max lenght will be ignored. Multiple writes will append
195  * 	to the buffer.
196  *
197  * These write modes control how current file position affects the behavior of
198  * updating sysctl values through the proc interface on each write.
199  */
200 enum sysctl_writes_mode {
201 	SYSCTL_WRITES_LEGACY		= -1,
202 	SYSCTL_WRITES_WARN		= 0,
203 	SYSCTL_WRITES_STRICT		= 1,
204 };
205 
206 static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
207 
208 static int proc_do_cad_pid(struct ctl_table *table, int write,
209 		  void __user *buffer, size_t *lenp, loff_t *ppos);
210 static int proc_taint(struct ctl_table *table, int write,
211 			       void __user *buffer, size_t *lenp, loff_t *ppos);
212 #endif
213 
214 #ifdef CONFIG_PRINTK
215 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
216 				void __user *buffer, size_t *lenp, loff_t *ppos);
217 #endif
218 
219 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
220 		void __user *buffer, size_t *lenp, loff_t *ppos);
221 #ifdef CONFIG_COREDUMP
222 static int proc_dostring_coredump(struct ctl_table *table, int write,
223 		void __user *buffer, size_t *lenp, loff_t *ppos);
224 #endif
225 static int proc_dopipe_max_size(struct ctl_table *table, int write,
226 		void __user *buffer, size_t *lenp, loff_t *ppos);
227 
228 #ifdef CONFIG_MAGIC_SYSRQ
229 /* Note: sysrq code uses its own private copy */
230 static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
231 
232 static int sysrq_sysctl_handler(struct ctl_table *table, int write,
233 				void __user *buffer, size_t *lenp,
234 				loff_t *ppos)
235 {
236 	int error;
237 
238 	error = proc_dointvec(table, write, buffer, lenp, ppos);
239 	if (error)
240 		return error;
241 
242 	if (write)
243 		sysrq_toggle_support(__sysrq_enabled);
244 
245 	return 0;
246 }
247 
248 #endif
249 
250 static struct ctl_table kern_table[];
251 static struct ctl_table vm_table[];
252 static struct ctl_table fs_table[];
253 static struct ctl_table debug_table[];
254 static struct ctl_table dev_table[];
255 extern struct ctl_table random_table[];
256 #ifdef CONFIG_EPOLL
257 extern struct ctl_table epoll_table[];
258 #endif
259 
260 #ifdef CONFIG_FW_LOADER_USER_HELPER
261 extern struct ctl_table firmware_config_table[];
262 #endif
263 
264 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
265 int sysctl_legacy_va_layout;
266 #endif
267 
268 /* The default sysctl tables: */
269 
270 static struct ctl_table sysctl_base_table[] = {
271 	{
272 		.procname	= "kernel",
273 		.mode		= 0555,
274 		.child		= kern_table,
275 	},
276 	{
277 		.procname	= "vm",
278 		.mode		= 0555,
279 		.child		= vm_table,
280 	},
281 	{
282 		.procname	= "fs",
283 		.mode		= 0555,
284 		.child		= fs_table,
285 	},
286 	{
287 		.procname	= "debug",
288 		.mode		= 0555,
289 		.child		= debug_table,
290 	},
291 	{
292 		.procname	= "dev",
293 		.mode		= 0555,
294 		.child		= dev_table,
295 	},
296 	{ }
297 };
298 
299 #ifdef CONFIG_SCHED_DEBUG
300 static int min_sched_granularity_ns = 100000;		/* 100 usecs */
301 static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
302 static int min_wakeup_granularity_ns;			/* 0 usecs */
303 static int max_wakeup_granularity_ns = NSEC_PER_SEC;	/* 1 second */
304 #ifdef CONFIG_SMP
305 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
306 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
307 #endif /* CONFIG_SMP */
308 #endif /* CONFIG_SCHED_DEBUG */
309 
310 #ifdef CONFIG_COMPACTION
311 static int min_extfrag_threshold;
312 static int max_extfrag_threshold = 1000;
313 #endif
314 
315 static struct ctl_table kern_table[] = {
316 	{
317 		.procname	= "sched_child_runs_first",
318 		.data		= &sysctl_sched_child_runs_first,
319 		.maxlen		= sizeof(unsigned int),
320 		.mode		= 0644,
321 		.proc_handler	= proc_dointvec,
322 	},
323 #ifdef CONFIG_SCHED_DEBUG
324 	{
325 		.procname	= "sched_min_granularity_ns",
326 		.data		= &sysctl_sched_min_granularity,
327 		.maxlen		= sizeof(unsigned int),
328 		.mode		= 0644,
329 		.proc_handler	= sched_proc_update_handler,
330 		.extra1		= &min_sched_granularity_ns,
331 		.extra2		= &max_sched_granularity_ns,
332 	},
333 	{
334 		.procname	= "sched_latency_ns",
335 		.data		= &sysctl_sched_latency,
336 		.maxlen		= sizeof(unsigned int),
337 		.mode		= 0644,
338 		.proc_handler	= sched_proc_update_handler,
339 		.extra1		= &min_sched_granularity_ns,
340 		.extra2		= &max_sched_granularity_ns,
341 	},
342 	{
343 		.procname	= "sched_wakeup_granularity_ns",
344 		.data		= &sysctl_sched_wakeup_granularity,
345 		.maxlen		= sizeof(unsigned int),
346 		.mode		= 0644,
347 		.proc_handler	= sched_proc_update_handler,
348 		.extra1		= &min_wakeup_granularity_ns,
349 		.extra2		= &max_wakeup_granularity_ns,
350 	},
351 #ifdef CONFIG_SMP
352 	{
353 		.procname	= "sched_tunable_scaling",
354 		.data		= &sysctl_sched_tunable_scaling,
355 		.maxlen		= sizeof(enum sched_tunable_scaling),
356 		.mode		= 0644,
357 		.proc_handler	= sched_proc_update_handler,
358 		.extra1		= &min_sched_tunable_scaling,
359 		.extra2		= &max_sched_tunable_scaling,
360 	},
361 	{
362 		.procname	= "sched_migration_cost_ns",
363 		.data		= &sysctl_sched_migration_cost,
364 		.maxlen		= sizeof(unsigned int),
365 		.mode		= 0644,
366 		.proc_handler	= proc_dointvec,
367 	},
368 	{
369 		.procname	= "sched_nr_migrate",
370 		.data		= &sysctl_sched_nr_migrate,
371 		.maxlen		= sizeof(unsigned int),
372 		.mode		= 0644,
373 		.proc_handler	= proc_dointvec,
374 	},
375 #ifdef CONFIG_SCHEDSTATS
376 	{
377 		.procname	= "sched_schedstats",
378 		.data		= NULL,
379 		.maxlen		= sizeof(unsigned int),
380 		.mode		= 0644,
381 		.proc_handler	= sysctl_schedstats,
382 		.extra1		= &zero,
383 		.extra2		= &one,
384 	},
385 #endif /* CONFIG_SCHEDSTATS */
386 #endif /* CONFIG_SMP */
387 #ifdef CONFIG_NUMA_BALANCING
388 	{
389 		.procname	= "numa_balancing_scan_delay_ms",
390 		.data		= &sysctl_numa_balancing_scan_delay,
391 		.maxlen		= sizeof(unsigned int),
392 		.mode		= 0644,
393 		.proc_handler	= proc_dointvec,
394 	},
395 	{
396 		.procname	= "numa_balancing_scan_period_min_ms",
397 		.data		= &sysctl_numa_balancing_scan_period_min,
398 		.maxlen		= sizeof(unsigned int),
399 		.mode		= 0644,
400 		.proc_handler	= proc_dointvec,
401 	},
402 	{
403 		.procname	= "numa_balancing_scan_period_max_ms",
404 		.data		= &sysctl_numa_balancing_scan_period_max,
405 		.maxlen		= sizeof(unsigned int),
406 		.mode		= 0644,
407 		.proc_handler	= proc_dointvec,
408 	},
409 	{
410 		.procname	= "numa_balancing_scan_size_mb",
411 		.data		= &sysctl_numa_balancing_scan_size,
412 		.maxlen		= sizeof(unsigned int),
413 		.mode		= 0644,
414 		.proc_handler	= proc_dointvec_minmax,
415 		.extra1		= &one,
416 	},
417 	{
418 		.procname	= "numa_balancing",
419 		.data		= NULL, /* filled in by handler */
420 		.maxlen		= sizeof(unsigned int),
421 		.mode		= 0644,
422 		.proc_handler	= sysctl_numa_balancing,
423 		.extra1		= &zero,
424 		.extra2		= &one,
425 	},
426 #endif /* CONFIG_NUMA_BALANCING */
427 #endif /* CONFIG_SCHED_DEBUG */
428 	{
429 		.procname	= "sched_rt_period_us",
430 		.data		= &sysctl_sched_rt_period,
431 		.maxlen		= sizeof(unsigned int),
432 		.mode		= 0644,
433 		.proc_handler	= sched_rt_handler,
434 	},
435 	{
436 		.procname	= "sched_rt_runtime_us",
437 		.data		= &sysctl_sched_rt_runtime,
438 		.maxlen		= sizeof(int),
439 		.mode		= 0644,
440 		.proc_handler	= sched_rt_handler,
441 	},
442 	{
443 		.procname	= "sched_rr_timeslice_ms",
444 		.data		= &sysctl_sched_rr_timeslice,
445 		.maxlen		= sizeof(int),
446 		.mode		= 0644,
447 		.proc_handler	= sched_rr_handler,
448 	},
449 #ifdef CONFIG_SCHED_AUTOGROUP
450 	{
451 		.procname	= "sched_autogroup_enabled",
452 		.data		= &sysctl_sched_autogroup_enabled,
453 		.maxlen		= sizeof(unsigned int),
454 		.mode		= 0644,
455 		.proc_handler	= proc_dointvec_minmax,
456 		.extra1		= &zero,
457 		.extra2		= &one,
458 	},
459 #endif
460 #ifdef CONFIG_CFS_BANDWIDTH
461 	{
462 		.procname	= "sched_cfs_bandwidth_slice_us",
463 		.data		= &sysctl_sched_cfs_bandwidth_slice,
464 		.maxlen		= sizeof(unsigned int),
465 		.mode		= 0644,
466 		.proc_handler	= proc_dointvec_minmax,
467 		.extra1		= &one,
468 	},
469 #endif
470 #ifdef CONFIG_PROVE_LOCKING
471 	{
472 		.procname	= "prove_locking",
473 		.data		= &prove_locking,
474 		.maxlen		= sizeof(int),
475 		.mode		= 0644,
476 		.proc_handler	= proc_dointvec,
477 	},
478 #endif
479 #ifdef CONFIG_LOCK_STAT
480 	{
481 		.procname	= "lock_stat",
482 		.data		= &lock_stat,
483 		.maxlen		= sizeof(int),
484 		.mode		= 0644,
485 		.proc_handler	= proc_dointvec,
486 	},
487 #endif
488 	{
489 		.procname	= "panic",
490 		.data		= &panic_timeout,
491 		.maxlen		= sizeof(int),
492 		.mode		= 0644,
493 		.proc_handler	= proc_dointvec,
494 	},
495 #ifdef CONFIG_COREDUMP
496 	{
497 		.procname	= "core_uses_pid",
498 		.data		= &core_uses_pid,
499 		.maxlen		= sizeof(int),
500 		.mode		= 0644,
501 		.proc_handler	= proc_dointvec,
502 	},
503 	{
504 		.procname	= "core_pattern",
505 		.data		= core_pattern,
506 		.maxlen		= CORENAME_MAX_SIZE,
507 		.mode		= 0644,
508 		.proc_handler	= proc_dostring_coredump,
509 	},
510 	{
511 		.procname	= "core_pipe_limit",
512 		.data		= &core_pipe_limit,
513 		.maxlen		= sizeof(unsigned int),
514 		.mode		= 0644,
515 		.proc_handler	= proc_dointvec,
516 	},
517 #endif
518 #ifdef CONFIG_PROC_SYSCTL
519 	{
520 		.procname	= "tainted",
521 		.maxlen 	= sizeof(long),
522 		.mode		= 0644,
523 		.proc_handler	= proc_taint,
524 	},
525 	{
526 		.procname	= "sysctl_writes_strict",
527 		.data		= &sysctl_writes_strict,
528 		.maxlen		= sizeof(int),
529 		.mode		= 0644,
530 		.proc_handler	= proc_dointvec_minmax,
531 		.extra1		= &neg_one,
532 		.extra2		= &one,
533 	},
534 #endif
535 #ifdef CONFIG_LATENCYTOP
536 	{
537 		.procname	= "latencytop",
538 		.data		= &latencytop_enabled,
539 		.maxlen		= sizeof(int),
540 		.mode		= 0644,
541 		.proc_handler	= sysctl_latencytop,
542 	},
543 #endif
544 #ifdef CONFIG_BLK_DEV_INITRD
545 	{
546 		.procname	= "real-root-dev",
547 		.data		= &real_root_dev,
548 		.maxlen		= sizeof(int),
549 		.mode		= 0644,
550 		.proc_handler	= proc_dointvec,
551 	},
552 #endif
553 	{
554 		.procname	= "print-fatal-signals",
555 		.data		= &print_fatal_signals,
556 		.maxlen		= sizeof(int),
557 		.mode		= 0644,
558 		.proc_handler	= proc_dointvec,
559 	},
560 #ifdef CONFIG_SPARC
561 	{
562 		.procname	= "reboot-cmd",
563 		.data		= reboot_command,
564 		.maxlen		= 256,
565 		.mode		= 0644,
566 		.proc_handler	= proc_dostring,
567 	},
568 	{
569 		.procname	= "stop-a",
570 		.data		= &stop_a_enabled,
571 		.maxlen		= sizeof (int),
572 		.mode		= 0644,
573 		.proc_handler	= proc_dointvec,
574 	},
575 	{
576 		.procname	= "scons-poweroff",
577 		.data		= &scons_pwroff,
578 		.maxlen		= sizeof (int),
579 		.mode		= 0644,
580 		.proc_handler	= proc_dointvec,
581 	},
582 #endif
583 #ifdef CONFIG_SPARC64
584 	{
585 		.procname	= "tsb-ratio",
586 		.data		= &sysctl_tsb_ratio,
587 		.maxlen		= sizeof (int),
588 		.mode		= 0644,
589 		.proc_handler	= proc_dointvec,
590 	},
591 #endif
592 #ifdef __hppa__
593 	{
594 		.procname	= "soft-power",
595 		.data		= &pwrsw_enabled,
596 		.maxlen		= sizeof (int),
597 	 	.mode		= 0644,
598 		.proc_handler	= proc_dointvec,
599 	},
600 #endif
601 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
602 	{
603 		.procname	= "unaligned-trap",
604 		.data		= &unaligned_enabled,
605 		.maxlen		= sizeof (int),
606 		.mode		= 0644,
607 		.proc_handler	= proc_dointvec,
608 	},
609 #endif
610 	{
611 		.procname	= "ctrl-alt-del",
612 		.data		= &C_A_D,
613 		.maxlen		= sizeof(int),
614 		.mode		= 0644,
615 		.proc_handler	= proc_dointvec,
616 	},
617 #ifdef CONFIG_FUNCTION_TRACER
618 	{
619 		.procname	= "ftrace_enabled",
620 		.data		= &ftrace_enabled,
621 		.maxlen		= sizeof(int),
622 		.mode		= 0644,
623 		.proc_handler	= ftrace_enable_sysctl,
624 	},
625 #endif
626 #ifdef CONFIG_STACK_TRACER
627 	{
628 		.procname	= "stack_tracer_enabled",
629 		.data		= &stack_tracer_enabled,
630 		.maxlen		= sizeof(int),
631 		.mode		= 0644,
632 		.proc_handler	= stack_trace_sysctl,
633 	},
634 #endif
635 #ifdef CONFIG_TRACING
636 	{
637 		.procname	= "ftrace_dump_on_oops",
638 		.data		= &ftrace_dump_on_oops,
639 		.maxlen		= sizeof(int),
640 		.mode		= 0644,
641 		.proc_handler	= proc_dointvec,
642 	},
643 	{
644 		.procname	= "traceoff_on_warning",
645 		.data		= &__disable_trace_on_warning,
646 		.maxlen		= sizeof(__disable_trace_on_warning),
647 		.mode		= 0644,
648 		.proc_handler	= proc_dointvec,
649 	},
650 	{
651 		.procname	= "tracepoint_printk",
652 		.data		= &tracepoint_printk,
653 		.maxlen		= sizeof(tracepoint_printk),
654 		.mode		= 0644,
655 		.proc_handler	= tracepoint_printk_sysctl,
656 	},
657 #endif
658 #ifdef CONFIG_KEXEC_CORE
659 	{
660 		.procname	= "kexec_load_disabled",
661 		.data		= &kexec_load_disabled,
662 		.maxlen		= sizeof(int),
663 		.mode		= 0644,
664 		/* only handle a transition from default "0" to "1" */
665 		.proc_handler	= proc_dointvec_minmax,
666 		.extra1		= &one,
667 		.extra2		= &one,
668 	},
669 #endif
670 #ifdef CONFIG_MODULES
671 	{
672 		.procname	= "modprobe",
673 		.data		= &modprobe_path,
674 		.maxlen		= KMOD_PATH_LEN,
675 		.mode		= 0644,
676 		.proc_handler	= proc_dostring,
677 	},
678 	{
679 		.procname	= "modules_disabled",
680 		.data		= &modules_disabled,
681 		.maxlen		= sizeof(int),
682 		.mode		= 0644,
683 		/* only handle a transition from default "0" to "1" */
684 		.proc_handler	= proc_dointvec_minmax,
685 		.extra1		= &one,
686 		.extra2		= &one,
687 	},
688 #endif
689 #ifdef CONFIG_UEVENT_HELPER
690 	{
691 		.procname	= "hotplug",
692 		.data		= &uevent_helper,
693 		.maxlen		= UEVENT_HELPER_PATH_LEN,
694 		.mode		= 0644,
695 		.proc_handler	= proc_dostring,
696 	},
697 #endif
698 #ifdef CONFIG_CHR_DEV_SG
699 	{
700 		.procname	= "sg-big-buff",
701 		.data		= &sg_big_buff,
702 		.maxlen		= sizeof (int),
703 		.mode		= 0444,
704 		.proc_handler	= proc_dointvec,
705 	},
706 #endif
707 #ifdef CONFIG_BSD_PROCESS_ACCT
708 	{
709 		.procname	= "acct",
710 		.data		= &acct_parm,
711 		.maxlen		= 3*sizeof(int),
712 		.mode		= 0644,
713 		.proc_handler	= proc_dointvec,
714 	},
715 #endif
716 #ifdef CONFIG_MAGIC_SYSRQ
717 	{
718 		.procname	= "sysrq",
719 		.data		= &__sysrq_enabled,
720 		.maxlen		= sizeof (int),
721 		.mode		= 0644,
722 		.proc_handler	= sysrq_sysctl_handler,
723 	},
724 #endif
725 #ifdef CONFIG_PROC_SYSCTL
726 	{
727 		.procname	= "cad_pid",
728 		.data		= NULL,
729 		.maxlen		= sizeof (int),
730 		.mode		= 0600,
731 		.proc_handler	= proc_do_cad_pid,
732 	},
733 #endif
734 	{
735 		.procname	= "threads-max",
736 		.data		= NULL,
737 		.maxlen		= sizeof(int),
738 		.mode		= 0644,
739 		.proc_handler	= sysctl_max_threads,
740 	},
741 	{
742 		.procname	= "random",
743 		.mode		= 0555,
744 		.child		= random_table,
745 	},
746 	{
747 		.procname	= "usermodehelper",
748 		.mode		= 0555,
749 		.child		= usermodehelper_table,
750 	},
751 #ifdef CONFIG_FW_LOADER_USER_HELPER
752 	{
753 		.procname	= "firmware_config",
754 		.mode		= 0555,
755 		.child		= firmware_config_table,
756 	},
757 #endif
758 	{
759 		.procname	= "overflowuid",
760 		.data		= &overflowuid,
761 		.maxlen		= sizeof(int),
762 		.mode		= 0644,
763 		.proc_handler	= proc_dointvec_minmax,
764 		.extra1		= &minolduid,
765 		.extra2		= &maxolduid,
766 	},
767 	{
768 		.procname	= "overflowgid",
769 		.data		= &overflowgid,
770 		.maxlen		= sizeof(int),
771 		.mode		= 0644,
772 		.proc_handler	= proc_dointvec_minmax,
773 		.extra1		= &minolduid,
774 		.extra2		= &maxolduid,
775 	},
776 #ifdef CONFIG_S390
777 #ifdef CONFIG_MATHEMU
778 	{
779 		.procname	= "ieee_emulation_warnings",
780 		.data		= &sysctl_ieee_emulation_warnings,
781 		.maxlen		= sizeof(int),
782 		.mode		= 0644,
783 		.proc_handler	= proc_dointvec,
784 	},
785 #endif
786 	{
787 		.procname	= "userprocess_debug",
788 		.data		= &show_unhandled_signals,
789 		.maxlen		= sizeof(int),
790 		.mode		= 0644,
791 		.proc_handler	= proc_dointvec,
792 	},
793 #endif
794 	{
795 		.procname	= "pid_max",
796 		.data		= &pid_max,
797 		.maxlen		= sizeof (int),
798 		.mode		= 0644,
799 		.proc_handler	= proc_dointvec_minmax,
800 		.extra1		= &pid_max_min,
801 		.extra2		= &pid_max_max,
802 	},
803 	{
804 		.procname	= "panic_on_oops",
805 		.data		= &panic_on_oops,
806 		.maxlen		= sizeof(int),
807 		.mode		= 0644,
808 		.proc_handler	= proc_dointvec,
809 	},
810 #if defined CONFIG_PRINTK
811 	{
812 		.procname	= "printk",
813 		.data		= &console_loglevel,
814 		.maxlen		= 4*sizeof(int),
815 		.mode		= 0644,
816 		.proc_handler	= proc_dointvec,
817 	},
818 	{
819 		.procname	= "printk_ratelimit",
820 		.data		= &printk_ratelimit_state.interval,
821 		.maxlen		= sizeof(int),
822 		.mode		= 0644,
823 		.proc_handler	= proc_dointvec_jiffies,
824 	},
825 	{
826 		.procname	= "printk_ratelimit_burst",
827 		.data		= &printk_ratelimit_state.burst,
828 		.maxlen		= sizeof(int),
829 		.mode		= 0644,
830 		.proc_handler	= proc_dointvec,
831 	},
832 	{
833 		.procname	= "printk_delay",
834 		.data		= &printk_delay_msec,
835 		.maxlen		= sizeof(int),
836 		.mode		= 0644,
837 		.proc_handler	= proc_dointvec_minmax,
838 		.extra1		= &zero,
839 		.extra2		= &ten_thousand,
840 	},
841 	{
842 		.procname	= "printk_devkmsg",
843 		.data		= devkmsg_log_str,
844 		.maxlen		= DEVKMSG_STR_MAX_SIZE,
845 		.mode		= 0644,
846 		.proc_handler	= devkmsg_sysctl_set_loglvl,
847 	},
848 	{
849 		.procname	= "dmesg_restrict",
850 		.data		= &dmesg_restrict,
851 		.maxlen		= sizeof(int),
852 		.mode		= 0644,
853 		.proc_handler	= proc_dointvec_minmax_sysadmin,
854 		.extra1		= &zero,
855 		.extra2		= &one,
856 	},
857 	{
858 		.procname	= "kptr_restrict",
859 		.data		= &kptr_restrict,
860 		.maxlen		= sizeof(int),
861 		.mode		= 0644,
862 		.proc_handler	= proc_dointvec_minmax_sysadmin,
863 		.extra1		= &zero,
864 		.extra2		= &two,
865 	},
866 #endif
867 	{
868 		.procname	= "ngroups_max",
869 		.data		= &ngroups_max,
870 		.maxlen		= sizeof (int),
871 		.mode		= 0444,
872 		.proc_handler	= proc_dointvec,
873 	},
874 	{
875 		.procname	= "cap_last_cap",
876 		.data		= (void *)&cap_last_cap,
877 		.maxlen		= sizeof(int),
878 		.mode		= 0444,
879 		.proc_handler	= proc_dointvec,
880 	},
881 #if defined(CONFIG_LOCKUP_DETECTOR)
882 	{
883 		.procname       = "watchdog",
884 		.data		= &watchdog_user_enabled,
885 		.maxlen		= sizeof(int),
886 		.mode		= 0644,
887 		.proc_handler   = proc_watchdog,
888 		.extra1		= &zero,
889 		.extra2		= &one,
890 	},
891 	{
892 		.procname	= "watchdog_thresh",
893 		.data		= &watchdog_thresh,
894 		.maxlen		= sizeof(int),
895 		.mode		= 0644,
896 		.proc_handler	= proc_watchdog_thresh,
897 		.extra1		= &zero,
898 		.extra2		= &sixty,
899 	},
900 	{
901 		.procname       = "nmi_watchdog",
902 		.data		= &nmi_watchdog_user_enabled,
903 		.maxlen		= sizeof(int),
904 		.mode		= NMI_WATCHDOG_SYSCTL_PERM,
905 		.proc_handler   = proc_nmi_watchdog,
906 		.extra1		= &zero,
907 		.extra2		= &one,
908 	},
909 	{
910 		.procname	= "watchdog_cpumask",
911 		.data		= &watchdog_cpumask_bits,
912 		.maxlen		= NR_CPUS,
913 		.mode		= 0644,
914 		.proc_handler	= proc_watchdog_cpumask,
915 	},
916 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
917 	{
918 		.procname       = "soft_watchdog",
919 		.data		= &soft_watchdog_user_enabled,
920 		.maxlen		= sizeof(int),
921 		.mode		= 0644,
922 		.proc_handler   = proc_soft_watchdog,
923 		.extra1		= &zero,
924 		.extra2		= &one,
925 	},
926 	{
927 		.procname	= "softlockup_panic",
928 		.data		= &softlockup_panic,
929 		.maxlen		= sizeof(int),
930 		.mode		= 0644,
931 		.proc_handler	= proc_dointvec_minmax,
932 		.extra1		= &zero,
933 		.extra2		= &one,
934 	},
935 #ifdef CONFIG_SMP
936 	{
937 		.procname	= "softlockup_all_cpu_backtrace",
938 		.data		= &sysctl_softlockup_all_cpu_backtrace,
939 		.maxlen		= sizeof(int),
940 		.mode		= 0644,
941 		.proc_handler	= proc_dointvec_minmax,
942 		.extra1		= &zero,
943 		.extra2		= &one,
944 	},
945 #endif /* CONFIG_SMP */
946 #endif
947 #ifdef CONFIG_HARDLOCKUP_DETECTOR
948 	{
949 		.procname	= "hardlockup_panic",
950 		.data		= &hardlockup_panic,
951 		.maxlen		= sizeof(int),
952 		.mode		= 0644,
953 		.proc_handler	= proc_dointvec_minmax,
954 		.extra1		= &zero,
955 		.extra2		= &one,
956 	},
957 #ifdef CONFIG_SMP
958 	{
959 		.procname	= "hardlockup_all_cpu_backtrace",
960 		.data		= &sysctl_hardlockup_all_cpu_backtrace,
961 		.maxlen		= sizeof(int),
962 		.mode		= 0644,
963 		.proc_handler	= proc_dointvec_minmax,
964 		.extra1		= &zero,
965 		.extra2		= &one,
966 	},
967 #endif /* CONFIG_SMP */
968 #endif
969 #endif
970 
971 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
972 	{
973 		.procname       = "unknown_nmi_panic",
974 		.data           = &unknown_nmi_panic,
975 		.maxlen         = sizeof (int),
976 		.mode           = 0644,
977 		.proc_handler   = proc_dointvec,
978 	},
979 #endif
980 #if defined(CONFIG_X86)
981 	{
982 		.procname	= "panic_on_unrecovered_nmi",
983 		.data		= &panic_on_unrecovered_nmi,
984 		.maxlen		= sizeof(int),
985 		.mode		= 0644,
986 		.proc_handler	= proc_dointvec,
987 	},
988 	{
989 		.procname	= "panic_on_io_nmi",
990 		.data		= &panic_on_io_nmi,
991 		.maxlen		= sizeof(int),
992 		.mode		= 0644,
993 		.proc_handler	= proc_dointvec,
994 	},
995 #ifdef CONFIG_DEBUG_STACKOVERFLOW
996 	{
997 		.procname	= "panic_on_stackoverflow",
998 		.data		= &sysctl_panic_on_stackoverflow,
999 		.maxlen		= sizeof(int),
1000 		.mode		= 0644,
1001 		.proc_handler	= proc_dointvec,
1002 	},
1003 #endif
1004 	{
1005 		.procname	= "bootloader_type",
1006 		.data		= &bootloader_type,
1007 		.maxlen		= sizeof (int),
1008 		.mode		= 0444,
1009 		.proc_handler	= proc_dointvec,
1010 	},
1011 	{
1012 		.procname	= "bootloader_version",
1013 		.data		= &bootloader_version,
1014 		.maxlen		= sizeof (int),
1015 		.mode		= 0444,
1016 		.proc_handler	= proc_dointvec,
1017 	},
1018 	{
1019 		.procname	= "io_delay_type",
1020 		.data		= &io_delay_type,
1021 		.maxlen		= sizeof(int),
1022 		.mode		= 0644,
1023 		.proc_handler	= proc_dointvec,
1024 	},
1025 #endif
1026 #if defined(CONFIG_MMU)
1027 	{
1028 		.procname	= "randomize_va_space",
1029 		.data		= &randomize_va_space,
1030 		.maxlen		= sizeof(int),
1031 		.mode		= 0644,
1032 		.proc_handler	= proc_dointvec,
1033 	},
1034 #endif
1035 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
1036 	{
1037 		.procname	= "spin_retry",
1038 		.data		= &spin_retry,
1039 		.maxlen		= sizeof (int),
1040 		.mode		= 0644,
1041 		.proc_handler	= proc_dointvec,
1042 	},
1043 #endif
1044 #if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
1045 	{
1046 		.procname	= "acpi_video_flags",
1047 		.data		= &acpi_realmode_flags,
1048 		.maxlen		= sizeof (unsigned long),
1049 		.mode		= 0644,
1050 		.proc_handler	= proc_doulongvec_minmax,
1051 	},
1052 #endif
1053 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
1054 	{
1055 		.procname	= "ignore-unaligned-usertrap",
1056 		.data		= &no_unaligned_warning,
1057 		.maxlen		= sizeof (int),
1058 	 	.mode		= 0644,
1059 		.proc_handler	= proc_dointvec,
1060 	},
1061 #endif
1062 #ifdef CONFIG_IA64
1063 	{
1064 		.procname	= "unaligned-dump-stack",
1065 		.data		= &unaligned_dump_stack,
1066 		.maxlen		= sizeof (int),
1067 		.mode		= 0644,
1068 		.proc_handler	= proc_dointvec,
1069 	},
1070 #endif
1071 #ifdef CONFIG_DETECT_HUNG_TASK
1072 	{
1073 		.procname	= "hung_task_panic",
1074 		.data		= &sysctl_hung_task_panic,
1075 		.maxlen		= sizeof(int),
1076 		.mode		= 0644,
1077 		.proc_handler	= proc_dointvec_minmax,
1078 		.extra1		= &zero,
1079 		.extra2		= &one,
1080 	},
1081 	{
1082 		.procname	= "hung_task_check_count",
1083 		.data		= &sysctl_hung_task_check_count,
1084 		.maxlen		= sizeof(int),
1085 		.mode		= 0644,
1086 		.proc_handler	= proc_dointvec_minmax,
1087 		.extra1		= &zero,
1088 	},
1089 	{
1090 		.procname	= "hung_task_timeout_secs",
1091 		.data		= &sysctl_hung_task_timeout_secs,
1092 		.maxlen		= sizeof(unsigned long),
1093 		.mode		= 0644,
1094 		.proc_handler	= proc_dohung_task_timeout_secs,
1095 		.extra2		= &hung_task_timeout_max,
1096 	},
1097 	{
1098 		.procname	= "hung_task_check_interval_secs",
1099 		.data		= &sysctl_hung_task_check_interval_secs,
1100 		.maxlen		= sizeof(unsigned long),
1101 		.mode		= 0644,
1102 		.proc_handler	= proc_dohung_task_timeout_secs,
1103 		.extra2		= &hung_task_timeout_max,
1104 	},
1105 	{
1106 		.procname	= "hung_task_warnings",
1107 		.data		= &sysctl_hung_task_warnings,
1108 		.maxlen		= sizeof(int),
1109 		.mode		= 0644,
1110 		.proc_handler	= proc_dointvec_minmax,
1111 		.extra1		= &neg_one,
1112 	},
1113 #endif
1114 #ifdef CONFIG_RT_MUTEXES
1115 	{
1116 		.procname	= "max_lock_depth",
1117 		.data		= &max_lock_depth,
1118 		.maxlen		= sizeof(int),
1119 		.mode		= 0644,
1120 		.proc_handler	= proc_dointvec,
1121 	},
1122 #endif
1123 	{
1124 		.procname	= "poweroff_cmd",
1125 		.data		= &poweroff_cmd,
1126 		.maxlen		= POWEROFF_CMD_PATH_LEN,
1127 		.mode		= 0644,
1128 		.proc_handler	= proc_dostring,
1129 	},
1130 #ifdef CONFIG_KEYS
1131 	{
1132 		.procname	= "keys",
1133 		.mode		= 0555,
1134 		.child		= key_sysctls,
1135 	},
1136 #endif
1137 #ifdef CONFIG_PERF_EVENTS
1138 	/*
1139 	 * User-space scripts rely on the existence of this file
1140 	 * as a feature check for perf_events being enabled.
1141 	 *
1142 	 * So it's an ABI, do not remove!
1143 	 */
1144 	{
1145 		.procname	= "perf_event_paranoid",
1146 		.data		= &sysctl_perf_event_paranoid,
1147 		.maxlen		= sizeof(sysctl_perf_event_paranoid),
1148 		.mode		= 0644,
1149 		.proc_handler	= proc_dointvec,
1150 	},
1151 	{
1152 		.procname	= "perf_event_mlock_kb",
1153 		.data		= &sysctl_perf_event_mlock,
1154 		.maxlen		= sizeof(sysctl_perf_event_mlock),
1155 		.mode		= 0644,
1156 		.proc_handler	= proc_dointvec,
1157 	},
1158 	{
1159 		.procname	= "perf_event_max_sample_rate",
1160 		.data		= &sysctl_perf_event_sample_rate,
1161 		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
1162 		.mode		= 0644,
1163 		.proc_handler	= perf_proc_update_handler,
1164 		.extra1		= &one,
1165 	},
1166 	{
1167 		.procname	= "perf_cpu_time_max_percent",
1168 		.data		= &sysctl_perf_cpu_time_max_percent,
1169 		.maxlen		= sizeof(sysctl_perf_cpu_time_max_percent),
1170 		.mode		= 0644,
1171 		.proc_handler	= perf_cpu_time_max_percent_handler,
1172 		.extra1		= &zero,
1173 		.extra2		= &one_hundred,
1174 	},
1175 	{
1176 		.procname	= "perf_event_max_stack",
1177 		.data		= &sysctl_perf_event_max_stack,
1178 		.maxlen		= sizeof(sysctl_perf_event_max_stack),
1179 		.mode		= 0644,
1180 		.proc_handler	= perf_event_max_stack_handler,
1181 		.extra1		= &zero,
1182 		.extra2		= &six_hundred_forty_kb,
1183 	},
1184 	{
1185 		.procname	= "perf_event_max_contexts_per_stack",
1186 		.data		= &sysctl_perf_event_max_contexts_per_stack,
1187 		.maxlen		= sizeof(sysctl_perf_event_max_contexts_per_stack),
1188 		.mode		= 0644,
1189 		.proc_handler	= perf_event_max_stack_handler,
1190 		.extra1		= &zero,
1191 		.extra2		= &one_thousand,
1192 	},
1193 #endif
1194 	{
1195 		.procname	= "panic_on_warn",
1196 		.data		= &panic_on_warn,
1197 		.maxlen		= sizeof(int),
1198 		.mode		= 0644,
1199 		.proc_handler	= proc_dointvec_minmax,
1200 		.extra1		= &zero,
1201 		.extra2		= &one,
1202 	},
1203 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
1204 	{
1205 		.procname	= "timer_migration",
1206 		.data		= &sysctl_timer_migration,
1207 		.maxlen		= sizeof(unsigned int),
1208 		.mode		= 0644,
1209 		.proc_handler	= timer_migration_handler,
1210 		.extra1		= &zero,
1211 		.extra2		= &one,
1212 	},
1213 #endif
1214 #ifdef CONFIG_BPF_SYSCALL
1215 	{
1216 		.procname	= "unprivileged_bpf_disabled",
1217 		.data		= &sysctl_unprivileged_bpf_disabled,
1218 		.maxlen		= sizeof(sysctl_unprivileged_bpf_disabled),
1219 		.mode		= 0644,
1220 		/* only handle a transition from default "0" to "1" */
1221 		.proc_handler	= proc_dointvec_minmax,
1222 		.extra1		= &one,
1223 		.extra2		= &one,
1224 	},
1225 #endif
1226 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
1227 	{
1228 		.procname	= "panic_on_rcu_stall",
1229 		.data		= &sysctl_panic_on_rcu_stall,
1230 		.maxlen		= sizeof(sysctl_panic_on_rcu_stall),
1231 		.mode		= 0644,
1232 		.proc_handler	= proc_dointvec_minmax,
1233 		.extra1		= &zero,
1234 		.extra2		= &one,
1235 	},
1236 #endif
1237 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
1238 	{
1239 		.procname	= "stack_erasing",
1240 		.data		= NULL,
1241 		.maxlen		= sizeof(int),
1242 		.mode		= 0600,
1243 		.proc_handler	= stack_erasing_sysctl,
1244 		.extra1		= &zero,
1245 		.extra2		= &one,
1246 	},
1247 #endif
1248 	{ }
1249 };
1250 
1251 static struct ctl_table vm_table[] = {
1252 	{
1253 		.procname	= "overcommit_memory",
1254 		.data		= &sysctl_overcommit_memory,
1255 		.maxlen		= sizeof(sysctl_overcommit_memory),
1256 		.mode		= 0644,
1257 		.proc_handler	= proc_dointvec_minmax,
1258 		.extra1		= &zero,
1259 		.extra2		= &two,
1260 	},
1261 	{
1262 		.procname	= "panic_on_oom",
1263 		.data		= &sysctl_panic_on_oom,
1264 		.maxlen		= sizeof(sysctl_panic_on_oom),
1265 		.mode		= 0644,
1266 		.proc_handler	= proc_dointvec_minmax,
1267 		.extra1		= &zero,
1268 		.extra2		= &two,
1269 	},
1270 	{
1271 		.procname	= "oom_kill_allocating_task",
1272 		.data		= &sysctl_oom_kill_allocating_task,
1273 		.maxlen		= sizeof(sysctl_oom_kill_allocating_task),
1274 		.mode		= 0644,
1275 		.proc_handler	= proc_dointvec,
1276 	},
1277 	{
1278 		.procname	= "oom_dump_tasks",
1279 		.data		= &sysctl_oom_dump_tasks,
1280 		.maxlen		= sizeof(sysctl_oom_dump_tasks),
1281 		.mode		= 0644,
1282 		.proc_handler	= proc_dointvec,
1283 	},
1284 	{
1285 		.procname	= "overcommit_ratio",
1286 		.data		= &sysctl_overcommit_ratio,
1287 		.maxlen		= sizeof(sysctl_overcommit_ratio),
1288 		.mode		= 0644,
1289 		.proc_handler	= overcommit_ratio_handler,
1290 	},
1291 	{
1292 		.procname	= "overcommit_kbytes",
1293 		.data		= &sysctl_overcommit_kbytes,
1294 		.maxlen		= sizeof(sysctl_overcommit_kbytes),
1295 		.mode		= 0644,
1296 		.proc_handler	= overcommit_kbytes_handler,
1297 	},
1298 	{
1299 		.procname	= "page-cluster",
1300 		.data		= &page_cluster,
1301 		.maxlen		= sizeof(int),
1302 		.mode		= 0644,
1303 		.proc_handler	= proc_dointvec_minmax,
1304 		.extra1		= &zero,
1305 	},
1306 	{
1307 		.procname	= "dirty_background_ratio",
1308 		.data		= &dirty_background_ratio,
1309 		.maxlen		= sizeof(dirty_background_ratio),
1310 		.mode		= 0644,
1311 		.proc_handler	= dirty_background_ratio_handler,
1312 		.extra1		= &zero,
1313 		.extra2		= &one_hundred,
1314 	},
1315 	{
1316 		.procname	= "dirty_background_bytes",
1317 		.data		= &dirty_background_bytes,
1318 		.maxlen		= sizeof(dirty_background_bytes),
1319 		.mode		= 0644,
1320 		.proc_handler	= dirty_background_bytes_handler,
1321 		.extra1		= &one_ul,
1322 	},
1323 	{
1324 		.procname	= "dirty_ratio",
1325 		.data		= &vm_dirty_ratio,
1326 		.maxlen		= sizeof(vm_dirty_ratio),
1327 		.mode		= 0644,
1328 		.proc_handler	= dirty_ratio_handler,
1329 		.extra1		= &zero,
1330 		.extra2		= &one_hundred,
1331 	},
1332 	{
1333 		.procname	= "dirty_bytes",
1334 		.data		= &vm_dirty_bytes,
1335 		.maxlen		= sizeof(vm_dirty_bytes),
1336 		.mode		= 0644,
1337 		.proc_handler	= dirty_bytes_handler,
1338 		.extra1		= &dirty_bytes_min,
1339 	},
1340 	{
1341 		.procname	= "dirty_writeback_centisecs",
1342 		.data		= &dirty_writeback_interval,
1343 		.maxlen		= sizeof(dirty_writeback_interval),
1344 		.mode		= 0644,
1345 		.proc_handler	= dirty_writeback_centisecs_handler,
1346 	},
1347 	{
1348 		.procname	= "dirty_expire_centisecs",
1349 		.data		= &dirty_expire_interval,
1350 		.maxlen		= sizeof(dirty_expire_interval),
1351 		.mode		= 0644,
1352 		.proc_handler	= proc_dointvec_minmax,
1353 		.extra1		= &zero,
1354 	},
1355 	{
1356 		.procname	= "dirtytime_expire_seconds",
1357 		.data		= &dirtytime_expire_interval,
1358 		.maxlen		= sizeof(dirtytime_expire_interval),
1359 		.mode		= 0644,
1360 		.proc_handler	= dirtytime_interval_handler,
1361 		.extra1		= &zero,
1362 	},
1363 	{
1364 		.procname	= "swappiness",
1365 		.data		= &vm_swappiness,
1366 		.maxlen		= sizeof(vm_swappiness),
1367 		.mode		= 0644,
1368 		.proc_handler	= proc_dointvec_minmax,
1369 		.extra1		= &zero,
1370 		.extra2		= &one_hundred,
1371 	},
1372 #ifdef CONFIG_HUGETLB_PAGE
1373 	{
1374 		.procname	= "nr_hugepages",
1375 		.data		= NULL,
1376 		.maxlen		= sizeof(unsigned long),
1377 		.mode		= 0644,
1378 		.proc_handler	= hugetlb_sysctl_handler,
1379 	},
1380 #ifdef CONFIG_NUMA
1381 	{
1382 		.procname       = "nr_hugepages_mempolicy",
1383 		.data           = NULL,
1384 		.maxlen         = sizeof(unsigned long),
1385 		.mode           = 0644,
1386 		.proc_handler   = &hugetlb_mempolicy_sysctl_handler,
1387 	},
1388 	{
1389 		.procname		= "numa_stat",
1390 		.data			= &sysctl_vm_numa_stat,
1391 		.maxlen			= sizeof(int),
1392 		.mode			= 0644,
1393 		.proc_handler	= sysctl_vm_numa_stat_handler,
1394 		.extra1			= &zero,
1395 		.extra2			= &one,
1396 	},
1397 #endif
1398 	 {
1399 		.procname	= "hugetlb_shm_group",
1400 		.data		= &sysctl_hugetlb_shm_group,
1401 		.maxlen		= sizeof(gid_t),
1402 		.mode		= 0644,
1403 		.proc_handler	= proc_dointvec,
1404 	 },
1405 	{
1406 		.procname	= "nr_overcommit_hugepages",
1407 		.data		= NULL,
1408 		.maxlen		= sizeof(unsigned long),
1409 		.mode		= 0644,
1410 		.proc_handler	= hugetlb_overcommit_handler,
1411 	},
1412 #endif
1413 	{
1414 		.procname	= "lowmem_reserve_ratio",
1415 		.data		= &sysctl_lowmem_reserve_ratio,
1416 		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
1417 		.mode		= 0644,
1418 		.proc_handler	= lowmem_reserve_ratio_sysctl_handler,
1419 	},
1420 	{
1421 		.procname	= "drop_caches",
1422 		.data		= &sysctl_drop_caches,
1423 		.maxlen		= sizeof(int),
1424 		.mode		= 0644,
1425 		.proc_handler	= drop_caches_sysctl_handler,
1426 		.extra1		= &one,
1427 		.extra2		= &four,
1428 	},
1429 #ifdef CONFIG_COMPACTION
1430 	{
1431 		.procname	= "compact_memory",
1432 		.data		= &sysctl_compact_memory,
1433 		.maxlen		= sizeof(int),
1434 		.mode		= 0200,
1435 		.proc_handler	= sysctl_compaction_handler,
1436 	},
1437 	{
1438 		.procname	= "extfrag_threshold",
1439 		.data		= &sysctl_extfrag_threshold,
1440 		.maxlen		= sizeof(int),
1441 		.mode		= 0644,
1442 		.proc_handler	= sysctl_extfrag_handler,
1443 		.extra1		= &min_extfrag_threshold,
1444 		.extra2		= &max_extfrag_threshold,
1445 	},
1446 	{
1447 		.procname	= "compact_unevictable_allowed",
1448 		.data		= &sysctl_compact_unevictable_allowed,
1449 		.maxlen		= sizeof(int),
1450 		.mode		= 0644,
1451 		.proc_handler	= proc_dointvec,
1452 		.extra1		= &zero,
1453 		.extra2		= &one,
1454 	},
1455 
1456 #endif /* CONFIG_COMPACTION */
1457 	{
1458 		.procname	= "min_free_kbytes",
1459 		.data		= &min_free_kbytes,
1460 		.maxlen		= sizeof(min_free_kbytes),
1461 		.mode		= 0644,
1462 		.proc_handler	= min_free_kbytes_sysctl_handler,
1463 		.extra1		= &zero,
1464 	},
1465 	{
1466 		.procname	= "watermark_boost_factor",
1467 		.data		= &watermark_boost_factor,
1468 		.maxlen		= sizeof(watermark_boost_factor),
1469 		.mode		= 0644,
1470 		.proc_handler	= watermark_boost_factor_sysctl_handler,
1471 		.extra1		= &zero,
1472 	},
1473 	{
1474 		.procname	= "watermark_scale_factor",
1475 		.data		= &watermark_scale_factor,
1476 		.maxlen		= sizeof(watermark_scale_factor),
1477 		.mode		= 0644,
1478 		.proc_handler	= watermark_scale_factor_sysctl_handler,
1479 		.extra1		= &one,
1480 		.extra2		= &one_thousand,
1481 	},
1482 	{
1483 		.procname	= "percpu_pagelist_fraction",
1484 		.data		= &percpu_pagelist_fraction,
1485 		.maxlen		= sizeof(percpu_pagelist_fraction),
1486 		.mode		= 0644,
1487 		.proc_handler	= percpu_pagelist_fraction_sysctl_handler,
1488 		.extra1		= &zero,
1489 	},
1490 #ifdef CONFIG_MMU
1491 	{
1492 		.procname	= "max_map_count",
1493 		.data		= &sysctl_max_map_count,
1494 		.maxlen		= sizeof(sysctl_max_map_count),
1495 		.mode		= 0644,
1496 		.proc_handler	= proc_dointvec_minmax,
1497 		.extra1		= &zero,
1498 	},
1499 #else
1500 	{
1501 		.procname	= "nr_trim_pages",
1502 		.data		= &sysctl_nr_trim_pages,
1503 		.maxlen		= sizeof(sysctl_nr_trim_pages),
1504 		.mode		= 0644,
1505 		.proc_handler	= proc_dointvec_minmax,
1506 		.extra1		= &zero,
1507 	},
1508 #endif
1509 	{
1510 		.procname	= "laptop_mode",
1511 		.data		= &laptop_mode,
1512 		.maxlen		= sizeof(laptop_mode),
1513 		.mode		= 0644,
1514 		.proc_handler	= proc_dointvec_jiffies,
1515 	},
1516 	{
1517 		.procname	= "block_dump",
1518 		.data		= &block_dump,
1519 		.maxlen		= sizeof(block_dump),
1520 		.mode		= 0644,
1521 		.proc_handler	= proc_dointvec,
1522 		.extra1		= &zero,
1523 	},
1524 	{
1525 		.procname	= "vfs_cache_pressure",
1526 		.data		= &sysctl_vfs_cache_pressure,
1527 		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
1528 		.mode		= 0644,
1529 		.proc_handler	= proc_dointvec,
1530 		.extra1		= &zero,
1531 	},
1532 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1533 	{
1534 		.procname	= "legacy_va_layout",
1535 		.data		= &sysctl_legacy_va_layout,
1536 		.maxlen		= sizeof(sysctl_legacy_va_layout),
1537 		.mode		= 0644,
1538 		.proc_handler	= proc_dointvec,
1539 		.extra1		= &zero,
1540 	},
1541 #endif
1542 #ifdef CONFIG_NUMA
1543 	{
1544 		.procname	= "zone_reclaim_mode",
1545 		.data		= &node_reclaim_mode,
1546 		.maxlen		= sizeof(node_reclaim_mode),
1547 		.mode		= 0644,
1548 		.proc_handler	= proc_dointvec,
1549 		.extra1		= &zero,
1550 	},
1551 	{
1552 		.procname	= "min_unmapped_ratio",
1553 		.data		= &sysctl_min_unmapped_ratio,
1554 		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
1555 		.mode		= 0644,
1556 		.proc_handler	= sysctl_min_unmapped_ratio_sysctl_handler,
1557 		.extra1		= &zero,
1558 		.extra2		= &one_hundred,
1559 	},
1560 	{
1561 		.procname	= "min_slab_ratio",
1562 		.data		= &sysctl_min_slab_ratio,
1563 		.maxlen		= sizeof(sysctl_min_slab_ratio),
1564 		.mode		= 0644,
1565 		.proc_handler	= sysctl_min_slab_ratio_sysctl_handler,
1566 		.extra1		= &zero,
1567 		.extra2		= &one_hundred,
1568 	},
1569 #endif
1570 #ifdef CONFIG_SMP
1571 	{
1572 		.procname	= "stat_interval",
1573 		.data		= &sysctl_stat_interval,
1574 		.maxlen		= sizeof(sysctl_stat_interval),
1575 		.mode		= 0644,
1576 		.proc_handler	= proc_dointvec_jiffies,
1577 	},
1578 	{
1579 		.procname	= "stat_refresh",
1580 		.data		= NULL,
1581 		.maxlen		= 0,
1582 		.mode		= 0600,
1583 		.proc_handler	= vmstat_refresh,
1584 	},
1585 #endif
1586 #ifdef CONFIG_MMU
1587 	{
1588 		.procname	= "mmap_min_addr",
1589 		.data		= &dac_mmap_min_addr,
1590 		.maxlen		= sizeof(unsigned long),
1591 		.mode		= 0644,
1592 		.proc_handler	= mmap_min_addr_handler,
1593 	},
1594 #endif
1595 #ifdef CONFIG_NUMA
1596 	{
1597 		.procname	= "numa_zonelist_order",
1598 		.data		= &numa_zonelist_order,
1599 		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
1600 		.mode		= 0644,
1601 		.proc_handler	= numa_zonelist_order_handler,
1602 	},
1603 #endif
1604 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1605    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1606 	{
1607 		.procname	= "vdso_enabled",
1608 #ifdef CONFIG_X86_32
1609 		.data		= &vdso32_enabled,
1610 		.maxlen		= sizeof(vdso32_enabled),
1611 #else
1612 		.data		= &vdso_enabled,
1613 		.maxlen		= sizeof(vdso_enabled),
1614 #endif
1615 		.mode		= 0644,
1616 		.proc_handler	= proc_dointvec,
1617 		.extra1		= &zero,
1618 	},
1619 #endif
1620 #ifdef CONFIG_HIGHMEM
1621 	{
1622 		.procname	= "highmem_is_dirtyable",
1623 		.data		= &vm_highmem_is_dirtyable,
1624 		.maxlen		= sizeof(vm_highmem_is_dirtyable),
1625 		.mode		= 0644,
1626 		.proc_handler	= proc_dointvec_minmax,
1627 		.extra1		= &zero,
1628 		.extra2		= &one,
1629 	},
1630 #endif
1631 #ifdef CONFIG_MEMORY_FAILURE
1632 	{
1633 		.procname	= "memory_failure_early_kill",
1634 		.data		= &sysctl_memory_failure_early_kill,
1635 		.maxlen		= sizeof(sysctl_memory_failure_early_kill),
1636 		.mode		= 0644,
1637 		.proc_handler	= proc_dointvec_minmax,
1638 		.extra1		= &zero,
1639 		.extra2		= &one,
1640 	},
1641 	{
1642 		.procname	= "memory_failure_recovery",
1643 		.data		= &sysctl_memory_failure_recovery,
1644 		.maxlen		= sizeof(sysctl_memory_failure_recovery),
1645 		.mode		= 0644,
1646 		.proc_handler	= proc_dointvec_minmax,
1647 		.extra1		= &zero,
1648 		.extra2		= &one,
1649 	},
1650 #endif
1651 	{
1652 		.procname	= "user_reserve_kbytes",
1653 		.data		= &sysctl_user_reserve_kbytes,
1654 		.maxlen		= sizeof(sysctl_user_reserve_kbytes),
1655 		.mode		= 0644,
1656 		.proc_handler	= proc_doulongvec_minmax,
1657 	},
1658 	{
1659 		.procname	= "admin_reserve_kbytes",
1660 		.data		= &sysctl_admin_reserve_kbytes,
1661 		.maxlen		= sizeof(sysctl_admin_reserve_kbytes),
1662 		.mode		= 0644,
1663 		.proc_handler	= proc_doulongvec_minmax,
1664 	},
1665 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
1666 	{
1667 		.procname	= "mmap_rnd_bits",
1668 		.data		= &mmap_rnd_bits,
1669 		.maxlen		= sizeof(mmap_rnd_bits),
1670 		.mode		= 0600,
1671 		.proc_handler	= proc_dointvec_minmax,
1672 		.extra1		= (void *)&mmap_rnd_bits_min,
1673 		.extra2		= (void *)&mmap_rnd_bits_max,
1674 	},
1675 #endif
1676 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
1677 	{
1678 		.procname	= "mmap_rnd_compat_bits",
1679 		.data		= &mmap_rnd_compat_bits,
1680 		.maxlen		= sizeof(mmap_rnd_compat_bits),
1681 		.mode		= 0600,
1682 		.proc_handler	= proc_dointvec_minmax,
1683 		.extra1		= (void *)&mmap_rnd_compat_bits_min,
1684 		.extra2		= (void *)&mmap_rnd_compat_bits_max,
1685 	},
1686 #endif
1687 	{ }
1688 };
1689 
1690 static struct ctl_table fs_table[] = {
1691 	{
1692 		.procname	= "inode-nr",
1693 		.data		= &inodes_stat,
1694 		.maxlen		= 2*sizeof(long),
1695 		.mode		= 0444,
1696 		.proc_handler	= proc_nr_inodes,
1697 	},
1698 	{
1699 		.procname	= "inode-state",
1700 		.data		= &inodes_stat,
1701 		.maxlen		= 7*sizeof(long),
1702 		.mode		= 0444,
1703 		.proc_handler	= proc_nr_inodes,
1704 	},
1705 	{
1706 		.procname	= "file-nr",
1707 		.data		= &files_stat,
1708 		.maxlen		= sizeof(files_stat),
1709 		.mode		= 0444,
1710 		.proc_handler	= proc_nr_files,
1711 	},
1712 	{
1713 		.procname	= "file-max",
1714 		.data		= &files_stat.max_files,
1715 		.maxlen		= sizeof(files_stat.max_files),
1716 		.mode		= 0644,
1717 		.proc_handler	= proc_doulongvec_minmax,
1718 	},
1719 	{
1720 		.procname	= "nr_open",
1721 		.data		= &sysctl_nr_open,
1722 		.maxlen		= sizeof(unsigned int),
1723 		.mode		= 0644,
1724 		.proc_handler	= proc_dointvec_minmax,
1725 		.extra1		= &sysctl_nr_open_min,
1726 		.extra2		= &sysctl_nr_open_max,
1727 	},
1728 	{
1729 		.procname	= "dentry-state",
1730 		.data		= &dentry_stat,
1731 		.maxlen		= 6*sizeof(long),
1732 		.mode		= 0444,
1733 		.proc_handler	= proc_nr_dentry,
1734 	},
1735 	{
1736 		.procname	= "overflowuid",
1737 		.data		= &fs_overflowuid,
1738 		.maxlen		= sizeof(int),
1739 		.mode		= 0644,
1740 		.proc_handler	= proc_dointvec_minmax,
1741 		.extra1		= &minolduid,
1742 		.extra2		= &maxolduid,
1743 	},
1744 	{
1745 		.procname	= "overflowgid",
1746 		.data		= &fs_overflowgid,
1747 		.maxlen		= sizeof(int),
1748 		.mode		= 0644,
1749 		.proc_handler	= proc_dointvec_minmax,
1750 		.extra1		= &minolduid,
1751 		.extra2		= &maxolduid,
1752 	},
1753 #ifdef CONFIG_FILE_LOCKING
1754 	{
1755 		.procname	= "leases-enable",
1756 		.data		= &leases_enable,
1757 		.maxlen		= sizeof(int),
1758 		.mode		= 0644,
1759 		.proc_handler	= proc_dointvec,
1760 	},
1761 #endif
1762 #ifdef CONFIG_DNOTIFY
1763 	{
1764 		.procname	= "dir-notify-enable",
1765 		.data		= &dir_notify_enable,
1766 		.maxlen		= sizeof(int),
1767 		.mode		= 0644,
1768 		.proc_handler	= proc_dointvec,
1769 	},
1770 #endif
1771 #ifdef CONFIG_MMU
1772 #ifdef CONFIG_FILE_LOCKING
1773 	{
1774 		.procname	= "lease-break-time",
1775 		.data		= &lease_break_time,
1776 		.maxlen		= sizeof(int),
1777 		.mode		= 0644,
1778 		.proc_handler	= proc_dointvec,
1779 	},
1780 #endif
1781 #ifdef CONFIG_AIO
1782 	{
1783 		.procname	= "aio-nr",
1784 		.data		= &aio_nr,
1785 		.maxlen		= sizeof(aio_nr),
1786 		.mode		= 0444,
1787 		.proc_handler	= proc_doulongvec_minmax,
1788 	},
1789 	{
1790 		.procname	= "aio-max-nr",
1791 		.data		= &aio_max_nr,
1792 		.maxlen		= sizeof(aio_max_nr),
1793 		.mode		= 0644,
1794 		.proc_handler	= proc_doulongvec_minmax,
1795 	},
1796 #endif /* CONFIG_AIO */
1797 #ifdef CONFIG_INOTIFY_USER
1798 	{
1799 		.procname	= "inotify",
1800 		.mode		= 0555,
1801 		.child		= inotify_table,
1802 	},
1803 #endif
1804 #ifdef CONFIG_EPOLL
1805 	{
1806 		.procname	= "epoll",
1807 		.mode		= 0555,
1808 		.child		= epoll_table,
1809 	},
1810 #endif
1811 #endif
1812 	{
1813 		.procname	= "protected_symlinks",
1814 		.data		= &sysctl_protected_symlinks,
1815 		.maxlen		= sizeof(int),
1816 		.mode		= 0600,
1817 		.proc_handler	= proc_dointvec_minmax,
1818 		.extra1		= &zero,
1819 		.extra2		= &one,
1820 	},
1821 	{
1822 		.procname	= "protected_hardlinks",
1823 		.data		= &sysctl_protected_hardlinks,
1824 		.maxlen		= sizeof(int),
1825 		.mode		= 0600,
1826 		.proc_handler	= proc_dointvec_minmax,
1827 		.extra1		= &zero,
1828 		.extra2		= &one,
1829 	},
1830 	{
1831 		.procname	= "protected_fifos",
1832 		.data		= &sysctl_protected_fifos,
1833 		.maxlen		= sizeof(int),
1834 		.mode		= 0600,
1835 		.proc_handler	= proc_dointvec_minmax,
1836 		.extra1		= &zero,
1837 		.extra2		= &two,
1838 	},
1839 	{
1840 		.procname	= "protected_regular",
1841 		.data		= &sysctl_protected_regular,
1842 		.maxlen		= sizeof(int),
1843 		.mode		= 0600,
1844 		.proc_handler	= proc_dointvec_minmax,
1845 		.extra1		= &zero,
1846 		.extra2		= &two,
1847 	},
1848 	{
1849 		.procname	= "suid_dumpable",
1850 		.data		= &suid_dumpable,
1851 		.maxlen		= sizeof(int),
1852 		.mode		= 0644,
1853 		.proc_handler	= proc_dointvec_minmax_coredump,
1854 		.extra1		= &zero,
1855 		.extra2		= &two,
1856 	},
1857 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1858 	{
1859 		.procname	= "binfmt_misc",
1860 		.mode		= 0555,
1861 		.child		= sysctl_mount_point,
1862 	},
1863 #endif
1864 	{
1865 		.procname	= "pipe-max-size",
1866 		.data		= &pipe_max_size,
1867 		.maxlen		= sizeof(pipe_max_size),
1868 		.mode		= 0644,
1869 		.proc_handler	= proc_dopipe_max_size,
1870 	},
1871 	{
1872 		.procname	= "pipe-user-pages-hard",
1873 		.data		= &pipe_user_pages_hard,
1874 		.maxlen		= sizeof(pipe_user_pages_hard),
1875 		.mode		= 0644,
1876 		.proc_handler	= proc_doulongvec_minmax,
1877 	},
1878 	{
1879 		.procname	= "pipe-user-pages-soft",
1880 		.data		= &pipe_user_pages_soft,
1881 		.maxlen		= sizeof(pipe_user_pages_soft),
1882 		.mode		= 0644,
1883 		.proc_handler	= proc_doulongvec_minmax,
1884 	},
1885 	{
1886 		.procname	= "mount-max",
1887 		.data		= &sysctl_mount_max,
1888 		.maxlen		= sizeof(unsigned int),
1889 		.mode		= 0644,
1890 		.proc_handler	= proc_dointvec_minmax,
1891 		.extra1		= &one,
1892 	},
1893 	{ }
1894 };
1895 
1896 static struct ctl_table debug_table[] = {
1897 #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
1898 	{
1899 		.procname	= "exception-trace",
1900 		.data		= &show_unhandled_signals,
1901 		.maxlen		= sizeof(int),
1902 		.mode		= 0644,
1903 		.proc_handler	= proc_dointvec
1904 	},
1905 #endif
1906 #if defined(CONFIG_OPTPROBES)
1907 	{
1908 		.procname	= "kprobes-optimization",
1909 		.data		= &sysctl_kprobes_optimization,
1910 		.maxlen		= sizeof(int),
1911 		.mode		= 0644,
1912 		.proc_handler	= proc_kprobes_optimization_handler,
1913 		.extra1		= &zero,
1914 		.extra2		= &one,
1915 	},
1916 #endif
1917 	{ }
1918 };
1919 
1920 static struct ctl_table dev_table[] = {
1921 	{ }
1922 };
1923 
1924 int __init sysctl_init(void)
1925 {
1926 	struct ctl_table_header *hdr;
1927 
1928 	hdr = register_sysctl_table(sysctl_base_table);
1929 	kmemleak_not_leak(hdr);
1930 	return 0;
1931 }
1932 
1933 #endif /* CONFIG_SYSCTL */
1934 
1935 /*
1936  * /proc/sys support
1937  */
1938 
1939 #ifdef CONFIG_PROC_SYSCTL
1940 
1941 static int _proc_do_string(char *data, int maxlen, int write,
1942 			   char __user *buffer,
1943 			   size_t *lenp, loff_t *ppos)
1944 {
1945 	size_t len;
1946 	char __user *p;
1947 	char c;
1948 
1949 	if (!data || !maxlen || !*lenp) {
1950 		*lenp = 0;
1951 		return 0;
1952 	}
1953 
1954 	if (write) {
1955 		if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
1956 			/* Only continue writes not past the end of buffer. */
1957 			len = strlen(data);
1958 			if (len > maxlen - 1)
1959 				len = maxlen - 1;
1960 
1961 			if (*ppos > len)
1962 				return 0;
1963 			len = *ppos;
1964 		} else {
1965 			/* Start writing from beginning of buffer. */
1966 			len = 0;
1967 		}
1968 
1969 		*ppos += *lenp;
1970 		p = buffer;
1971 		while ((p - buffer) < *lenp && len < maxlen - 1) {
1972 			if (get_user(c, p++))
1973 				return -EFAULT;
1974 			if (c == 0 || c == '\n')
1975 				break;
1976 			data[len++] = c;
1977 		}
1978 		data[len] = 0;
1979 	} else {
1980 		len = strlen(data);
1981 		if (len > maxlen)
1982 			len = maxlen;
1983 
1984 		if (*ppos > len) {
1985 			*lenp = 0;
1986 			return 0;
1987 		}
1988 
1989 		data += *ppos;
1990 		len  -= *ppos;
1991 
1992 		if (len > *lenp)
1993 			len = *lenp;
1994 		if (len)
1995 			if (copy_to_user(buffer, data, len))
1996 				return -EFAULT;
1997 		if (len < *lenp) {
1998 			if (put_user('\n', buffer + len))
1999 				return -EFAULT;
2000 			len++;
2001 		}
2002 		*lenp = len;
2003 		*ppos += len;
2004 	}
2005 	return 0;
2006 }
2007 
2008 static void warn_sysctl_write(struct ctl_table *table)
2009 {
2010 	pr_warn_once("%s wrote to %s when file position was not 0!\n"
2011 		"This will not be supported in the future. To silence this\n"
2012 		"warning, set kernel.sysctl_writes_strict = -1\n",
2013 		current->comm, table->procname);
2014 }
2015 
2016 /**
2017  * proc_first_pos_non_zero_ignore - check if first position is allowed
2018  * @ppos: file position
2019  * @table: the sysctl table
2020  *
2021  * Returns true if the first position is non-zero and the sysctl_writes_strict
2022  * mode indicates this is not allowed for numeric input types. String proc
2023  * handlers can ignore the return value.
2024  */
2025 static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
2026 					   struct ctl_table *table)
2027 {
2028 	if (!*ppos)
2029 		return false;
2030 
2031 	switch (sysctl_writes_strict) {
2032 	case SYSCTL_WRITES_STRICT:
2033 		return true;
2034 	case SYSCTL_WRITES_WARN:
2035 		warn_sysctl_write(table);
2036 		return false;
2037 	default:
2038 		return false;
2039 	}
2040 }
2041 
2042 /**
2043  * proc_dostring - read a string sysctl
2044  * @table: the sysctl table
2045  * @write: %TRUE if this is a write to the sysctl file
2046  * @buffer: the user buffer
2047  * @lenp: the size of the user buffer
2048  * @ppos: file position
2049  *
2050  * Reads/writes a string from/to the user buffer. If the kernel
2051  * buffer provided is not large enough to hold the string, the
2052  * string is truncated. The copied string is %NULL-terminated.
2053  * If the string is being read by the user process, it is copied
2054  * and a newline '\n' is added. It is truncated if the buffer is
2055  * not large enough.
2056  *
2057  * Returns 0 on success.
2058  */
2059 int proc_dostring(struct ctl_table *table, int write,
2060 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2061 {
2062 	if (write)
2063 		proc_first_pos_non_zero_ignore(ppos, table);
2064 
2065 	return _proc_do_string((char *)(table->data), table->maxlen, write,
2066 			       (char __user *)buffer, lenp, ppos);
2067 }
2068 
2069 static size_t proc_skip_spaces(char **buf)
2070 {
2071 	size_t ret;
2072 	char *tmp = skip_spaces(*buf);
2073 	ret = tmp - *buf;
2074 	*buf = tmp;
2075 	return ret;
2076 }
2077 
2078 static void proc_skip_char(char **buf, size_t *size, const char v)
2079 {
2080 	while (*size) {
2081 		if (**buf != v)
2082 			break;
2083 		(*size)--;
2084 		(*buf)++;
2085 	}
2086 }
2087 
2088 #define TMPBUFLEN 22
2089 /**
2090  * proc_get_long - reads an ASCII formatted integer from a user buffer
2091  *
2092  * @buf: a kernel buffer
2093  * @size: size of the kernel buffer
2094  * @val: this is where the number will be stored
2095  * @neg: set to %TRUE if number is negative
2096  * @perm_tr: a vector which contains the allowed trailers
2097  * @perm_tr_len: size of the perm_tr vector
2098  * @tr: pointer to store the trailer character
2099  *
2100  * In case of success %0 is returned and @buf and @size are updated with
2101  * the amount of bytes read. If @tr is non-NULL and a trailing
2102  * character exists (size is non-zero after returning from this
2103  * function), @tr is updated with the trailing character.
2104  */
2105 static int proc_get_long(char **buf, size_t *size,
2106 			  unsigned long *val, bool *neg,
2107 			  const char *perm_tr, unsigned perm_tr_len, char *tr)
2108 {
2109 	int len;
2110 	char *p, tmp[TMPBUFLEN];
2111 
2112 	if (!*size)
2113 		return -EINVAL;
2114 
2115 	len = *size;
2116 	if (len > TMPBUFLEN - 1)
2117 		len = TMPBUFLEN - 1;
2118 
2119 	memcpy(tmp, *buf, len);
2120 
2121 	tmp[len] = 0;
2122 	p = tmp;
2123 	if (*p == '-' && *size > 1) {
2124 		*neg = true;
2125 		p++;
2126 	} else
2127 		*neg = false;
2128 	if (!isdigit(*p))
2129 		return -EINVAL;
2130 
2131 	*val = simple_strtoul(p, &p, 0);
2132 
2133 	len = p - tmp;
2134 
2135 	/* We don't know if the next char is whitespace thus we may accept
2136 	 * invalid integers (e.g. 1234...a) or two integers instead of one
2137 	 * (e.g. 123...1). So lets not allow such large numbers. */
2138 	if (len == TMPBUFLEN - 1)
2139 		return -EINVAL;
2140 
2141 	if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
2142 		return -EINVAL;
2143 
2144 	if (tr && (len < *size))
2145 		*tr = *p;
2146 
2147 	*buf += len;
2148 	*size -= len;
2149 
2150 	return 0;
2151 }
2152 
2153 /**
2154  * proc_put_long - converts an integer to a decimal ASCII formatted string
2155  *
2156  * @buf: the user buffer
2157  * @size: the size of the user buffer
2158  * @val: the integer to be converted
2159  * @neg: sign of the number, %TRUE for negative
2160  *
2161  * In case of success %0 is returned and @buf and @size are updated with
2162  * the amount of bytes written.
2163  */
2164 static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
2165 			  bool neg)
2166 {
2167 	int len;
2168 	char tmp[TMPBUFLEN], *p = tmp;
2169 
2170 	sprintf(p, "%s%lu", neg ? "-" : "", val);
2171 	len = strlen(tmp);
2172 	if (len > *size)
2173 		len = *size;
2174 	if (copy_to_user(*buf, tmp, len))
2175 		return -EFAULT;
2176 	*size -= len;
2177 	*buf += len;
2178 	return 0;
2179 }
2180 #undef TMPBUFLEN
2181 
2182 static int proc_put_char(void __user **buf, size_t *size, char c)
2183 {
2184 	if (*size) {
2185 		char __user **buffer = (char __user **)buf;
2186 		if (put_user(c, *buffer))
2187 			return -EFAULT;
2188 		(*size)--, (*buffer)++;
2189 		*buf = *buffer;
2190 	}
2191 	return 0;
2192 }
2193 
2194 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
2195 				 int *valp,
2196 				 int write, void *data)
2197 {
2198 	if (write) {
2199 		if (*negp) {
2200 			if (*lvalp > (unsigned long) INT_MAX + 1)
2201 				return -EINVAL;
2202 			*valp = -*lvalp;
2203 		} else {
2204 			if (*lvalp > (unsigned long) INT_MAX)
2205 				return -EINVAL;
2206 			*valp = *lvalp;
2207 		}
2208 	} else {
2209 		int val = *valp;
2210 		if (val < 0) {
2211 			*negp = true;
2212 			*lvalp = -(unsigned long)val;
2213 		} else {
2214 			*negp = false;
2215 			*lvalp = (unsigned long)val;
2216 		}
2217 	}
2218 	return 0;
2219 }
2220 
2221 static int do_proc_douintvec_conv(unsigned long *lvalp,
2222 				  unsigned int *valp,
2223 				  int write, void *data)
2224 {
2225 	if (write) {
2226 		if (*lvalp > UINT_MAX)
2227 			return -EINVAL;
2228 		*valp = *lvalp;
2229 	} else {
2230 		unsigned int val = *valp;
2231 		*lvalp = (unsigned long)val;
2232 	}
2233 	return 0;
2234 }
2235 
2236 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
2237 
2238 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2239 		  int write, void __user *buffer,
2240 		  size_t *lenp, loff_t *ppos,
2241 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2242 			      int write, void *data),
2243 		  void *data)
2244 {
2245 	int *i, vleft, first = 1, err = 0;
2246 	size_t left;
2247 	char *kbuf = NULL, *p;
2248 
2249 	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2250 		*lenp = 0;
2251 		return 0;
2252 	}
2253 
2254 	i = (int *) tbl_data;
2255 	vleft = table->maxlen / sizeof(*i);
2256 	left = *lenp;
2257 
2258 	if (!conv)
2259 		conv = do_proc_dointvec_conv;
2260 
2261 	if (write) {
2262 		if (proc_first_pos_non_zero_ignore(ppos, table))
2263 			goto out;
2264 
2265 		if (left > PAGE_SIZE - 1)
2266 			left = PAGE_SIZE - 1;
2267 		p = kbuf = memdup_user_nul(buffer, left);
2268 		if (IS_ERR(kbuf))
2269 			return PTR_ERR(kbuf);
2270 	}
2271 
2272 	for (; left && vleft--; i++, first=0) {
2273 		unsigned long lval;
2274 		bool neg;
2275 
2276 		if (write) {
2277 			left -= proc_skip_spaces(&p);
2278 
2279 			if (!left)
2280 				break;
2281 			err = proc_get_long(&p, &left, &lval, &neg,
2282 					     proc_wspace_sep,
2283 					     sizeof(proc_wspace_sep), NULL);
2284 			if (err)
2285 				break;
2286 			if (conv(&neg, &lval, i, 1, data)) {
2287 				err = -EINVAL;
2288 				break;
2289 			}
2290 		} else {
2291 			if (conv(&neg, &lval, i, 0, data)) {
2292 				err = -EINVAL;
2293 				break;
2294 			}
2295 			if (!first)
2296 				err = proc_put_char(&buffer, &left, '\t');
2297 			if (err)
2298 				break;
2299 			err = proc_put_long(&buffer, &left, lval, neg);
2300 			if (err)
2301 				break;
2302 		}
2303 	}
2304 
2305 	if (!write && !first && left && !err)
2306 		err = proc_put_char(&buffer, &left, '\n');
2307 	if (write && !err && left)
2308 		left -= proc_skip_spaces(&p);
2309 	if (write) {
2310 		kfree(kbuf);
2311 		if (first)
2312 			return err ? : -EINVAL;
2313 	}
2314 	*lenp -= left;
2315 out:
2316 	*ppos += *lenp;
2317 	return err;
2318 }
2319 
2320 static int do_proc_dointvec(struct ctl_table *table, int write,
2321 		  void __user *buffer, size_t *lenp, loff_t *ppos,
2322 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2323 			      int write, void *data),
2324 		  void *data)
2325 {
2326 	return __do_proc_dointvec(table->data, table, write,
2327 			buffer, lenp, ppos, conv, data);
2328 }
2329 
2330 static int do_proc_douintvec_w(unsigned int *tbl_data,
2331 			       struct ctl_table *table,
2332 			       void __user *buffer,
2333 			       size_t *lenp, loff_t *ppos,
2334 			       int (*conv)(unsigned long *lvalp,
2335 					   unsigned int *valp,
2336 					   int write, void *data),
2337 			       void *data)
2338 {
2339 	unsigned long lval;
2340 	int err = 0;
2341 	size_t left;
2342 	bool neg;
2343 	char *kbuf = NULL, *p;
2344 
2345 	left = *lenp;
2346 
2347 	if (proc_first_pos_non_zero_ignore(ppos, table))
2348 		goto bail_early;
2349 
2350 	if (left > PAGE_SIZE - 1)
2351 		left = PAGE_SIZE - 1;
2352 
2353 	p = kbuf = memdup_user_nul(buffer, left);
2354 	if (IS_ERR(kbuf))
2355 		return -EINVAL;
2356 
2357 	left -= proc_skip_spaces(&p);
2358 	if (!left) {
2359 		err = -EINVAL;
2360 		goto out_free;
2361 	}
2362 
2363 	err = proc_get_long(&p, &left, &lval, &neg,
2364 			     proc_wspace_sep,
2365 			     sizeof(proc_wspace_sep), NULL);
2366 	if (err || neg) {
2367 		err = -EINVAL;
2368 		goto out_free;
2369 	}
2370 
2371 	if (conv(&lval, tbl_data, 1, data)) {
2372 		err = -EINVAL;
2373 		goto out_free;
2374 	}
2375 
2376 	if (!err && left)
2377 		left -= proc_skip_spaces(&p);
2378 
2379 out_free:
2380 	kfree(kbuf);
2381 	if (err)
2382 		return -EINVAL;
2383 
2384 	return 0;
2385 
2386 	/* This is in keeping with old __do_proc_dointvec() */
2387 bail_early:
2388 	*ppos += *lenp;
2389 	return err;
2390 }
2391 
2392 static int do_proc_douintvec_r(unsigned int *tbl_data, void __user *buffer,
2393 			       size_t *lenp, loff_t *ppos,
2394 			       int (*conv)(unsigned long *lvalp,
2395 					   unsigned int *valp,
2396 					   int write, void *data),
2397 			       void *data)
2398 {
2399 	unsigned long lval;
2400 	int err = 0;
2401 	size_t left;
2402 
2403 	left = *lenp;
2404 
2405 	if (conv(&lval, tbl_data, 0, data)) {
2406 		err = -EINVAL;
2407 		goto out;
2408 	}
2409 
2410 	err = proc_put_long(&buffer, &left, lval, false);
2411 	if (err || !left)
2412 		goto out;
2413 
2414 	err = proc_put_char(&buffer, &left, '\n');
2415 
2416 out:
2417 	*lenp -= left;
2418 	*ppos += *lenp;
2419 
2420 	return err;
2421 }
2422 
2423 static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
2424 			       int write, void __user *buffer,
2425 			       size_t *lenp, loff_t *ppos,
2426 			       int (*conv)(unsigned long *lvalp,
2427 					   unsigned int *valp,
2428 					   int write, void *data),
2429 			       void *data)
2430 {
2431 	unsigned int *i, vleft;
2432 
2433 	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2434 		*lenp = 0;
2435 		return 0;
2436 	}
2437 
2438 	i = (unsigned int *) tbl_data;
2439 	vleft = table->maxlen / sizeof(*i);
2440 
2441 	/*
2442 	 * Arrays are not supported, keep this simple. *Do not* add
2443 	 * support for them.
2444 	 */
2445 	if (vleft != 1) {
2446 		*lenp = 0;
2447 		return -EINVAL;
2448 	}
2449 
2450 	if (!conv)
2451 		conv = do_proc_douintvec_conv;
2452 
2453 	if (write)
2454 		return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
2455 					   conv, data);
2456 	return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
2457 }
2458 
2459 static int do_proc_douintvec(struct ctl_table *table, int write,
2460 			     void __user *buffer, size_t *lenp, loff_t *ppos,
2461 			     int (*conv)(unsigned long *lvalp,
2462 					 unsigned int *valp,
2463 					 int write, void *data),
2464 			     void *data)
2465 {
2466 	return __do_proc_douintvec(table->data, table, write,
2467 				   buffer, lenp, ppos, conv, data);
2468 }
2469 
2470 /**
2471  * proc_dointvec - read a vector of integers
2472  * @table: the sysctl table
2473  * @write: %TRUE if this is a write to the sysctl file
2474  * @buffer: the user buffer
2475  * @lenp: the size of the user buffer
2476  * @ppos: file position
2477  *
2478  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2479  * values from/to the user buffer, treated as an ASCII string.
2480  *
2481  * Returns 0 on success.
2482  */
2483 int proc_dointvec(struct ctl_table *table, int write,
2484 		     void __user *buffer, size_t *lenp, loff_t *ppos)
2485 {
2486 	return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
2487 }
2488 
2489 /**
2490  * proc_douintvec - read a vector of unsigned integers
2491  * @table: the sysctl table
2492  * @write: %TRUE if this is a write to the sysctl file
2493  * @buffer: the user buffer
2494  * @lenp: the size of the user buffer
2495  * @ppos: file position
2496  *
2497  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2498  * values from/to the user buffer, treated as an ASCII string.
2499  *
2500  * Returns 0 on success.
2501  */
2502 int proc_douintvec(struct ctl_table *table, int write,
2503 		     void __user *buffer, size_t *lenp, loff_t *ppos)
2504 {
2505 	return do_proc_douintvec(table, write, buffer, lenp, ppos,
2506 				 do_proc_douintvec_conv, NULL);
2507 }
2508 
2509 /*
2510  * Taint values can only be increased
2511  * This means we can safely use a temporary.
2512  */
2513 static int proc_taint(struct ctl_table *table, int write,
2514 			       void __user *buffer, size_t *lenp, loff_t *ppos)
2515 {
2516 	struct ctl_table t;
2517 	unsigned long tmptaint = get_taint();
2518 	int err;
2519 
2520 	if (write && !capable(CAP_SYS_ADMIN))
2521 		return -EPERM;
2522 
2523 	t = *table;
2524 	t.data = &tmptaint;
2525 	err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2526 	if (err < 0)
2527 		return err;
2528 
2529 	if (write) {
2530 		/*
2531 		 * Poor man's atomic or. Not worth adding a primitive
2532 		 * to everyone's atomic.h for this
2533 		 */
2534 		int i;
2535 		for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2536 			if ((tmptaint >> i) & 1)
2537 				add_taint(i, LOCKDEP_STILL_OK);
2538 		}
2539 	}
2540 
2541 	return err;
2542 }
2543 
2544 #ifdef CONFIG_PRINTK
2545 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
2546 				void __user *buffer, size_t *lenp, loff_t *ppos)
2547 {
2548 	if (write && !capable(CAP_SYS_ADMIN))
2549 		return -EPERM;
2550 
2551 	return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2552 }
2553 #endif
2554 
2555 /**
2556  * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
2557  * @min: pointer to minimum allowable value
2558  * @max: pointer to maximum allowable value
2559  *
2560  * The do_proc_dointvec_minmax_conv_param structure provides the
2561  * minimum and maximum values for doing range checking for those sysctl
2562  * parameters that use the proc_dointvec_minmax() handler.
2563  */
2564 struct do_proc_dointvec_minmax_conv_param {
2565 	int *min;
2566 	int *max;
2567 };
2568 
2569 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2570 					int *valp,
2571 					int write, void *data)
2572 {
2573 	struct do_proc_dointvec_minmax_conv_param *param = data;
2574 	if (write) {
2575 		int val = *negp ? -*lvalp : *lvalp;
2576 		if ((param->min && *param->min > val) ||
2577 		    (param->max && *param->max < val))
2578 			return -EINVAL;
2579 		*valp = val;
2580 	} else {
2581 		int val = *valp;
2582 		if (val < 0) {
2583 			*negp = true;
2584 			*lvalp = -(unsigned long)val;
2585 		} else {
2586 			*negp = false;
2587 			*lvalp = (unsigned long)val;
2588 		}
2589 	}
2590 	return 0;
2591 }
2592 
2593 /**
2594  * proc_dointvec_minmax - read a vector of integers with min/max values
2595  * @table: the sysctl table
2596  * @write: %TRUE if this is a write to the sysctl file
2597  * @buffer: the user buffer
2598  * @lenp: the size of the user buffer
2599  * @ppos: file position
2600  *
2601  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2602  * values from/to the user buffer, treated as an ASCII string.
2603  *
2604  * This routine will ensure the values are within the range specified by
2605  * table->extra1 (min) and table->extra2 (max).
2606  *
2607  * Returns 0 on success or -EINVAL on write when the range check fails.
2608  */
2609 int proc_dointvec_minmax(struct ctl_table *table, int write,
2610 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2611 {
2612 	struct do_proc_dointvec_minmax_conv_param param = {
2613 		.min = (int *) table->extra1,
2614 		.max = (int *) table->extra2,
2615 	};
2616 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
2617 				do_proc_dointvec_minmax_conv, &param);
2618 }
2619 
2620 /**
2621  * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
2622  * @min: pointer to minimum allowable value
2623  * @max: pointer to maximum allowable value
2624  *
2625  * The do_proc_douintvec_minmax_conv_param structure provides the
2626  * minimum and maximum values for doing range checking for those sysctl
2627  * parameters that use the proc_douintvec_minmax() handler.
2628  */
2629 struct do_proc_douintvec_minmax_conv_param {
2630 	unsigned int *min;
2631 	unsigned int *max;
2632 };
2633 
2634 static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
2635 					 unsigned int *valp,
2636 					 int write, void *data)
2637 {
2638 	struct do_proc_douintvec_minmax_conv_param *param = data;
2639 
2640 	if (write) {
2641 		unsigned int val = *lvalp;
2642 
2643 		if (*lvalp > UINT_MAX)
2644 			return -EINVAL;
2645 
2646 		if ((param->min && *param->min > val) ||
2647 		    (param->max && *param->max < val))
2648 			return -ERANGE;
2649 
2650 		*valp = val;
2651 	} else {
2652 		unsigned int val = *valp;
2653 		*lvalp = (unsigned long) val;
2654 	}
2655 
2656 	return 0;
2657 }
2658 
2659 /**
2660  * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
2661  * @table: the sysctl table
2662  * @write: %TRUE if this is a write to the sysctl file
2663  * @buffer: the user buffer
2664  * @lenp: the size of the user buffer
2665  * @ppos: file position
2666  *
2667  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2668  * values from/to the user buffer, treated as an ASCII string. Negative
2669  * strings are not allowed.
2670  *
2671  * This routine will ensure the values are within the range specified by
2672  * table->extra1 (min) and table->extra2 (max). There is a final sanity
2673  * check for UINT_MAX to avoid having to support wrap around uses from
2674  * userspace.
2675  *
2676  * Returns 0 on success or -ERANGE on write when the range check fails.
2677  */
2678 int proc_douintvec_minmax(struct ctl_table *table, int write,
2679 			  void __user *buffer, size_t *lenp, loff_t *ppos)
2680 {
2681 	struct do_proc_douintvec_minmax_conv_param param = {
2682 		.min = (unsigned int *) table->extra1,
2683 		.max = (unsigned int *) table->extra2,
2684 	};
2685 	return do_proc_douintvec(table, write, buffer, lenp, ppos,
2686 				 do_proc_douintvec_minmax_conv, &param);
2687 }
2688 
2689 static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
2690 					unsigned int *valp,
2691 					int write, void *data)
2692 {
2693 	if (write) {
2694 		unsigned int val;
2695 
2696 		val = round_pipe_size(*lvalp);
2697 		if (val == 0)
2698 			return -EINVAL;
2699 
2700 		*valp = val;
2701 	} else {
2702 		unsigned int val = *valp;
2703 		*lvalp = (unsigned long) val;
2704 	}
2705 
2706 	return 0;
2707 }
2708 
2709 static int proc_dopipe_max_size(struct ctl_table *table, int write,
2710 				void __user *buffer, size_t *lenp, loff_t *ppos)
2711 {
2712 	return do_proc_douintvec(table, write, buffer, lenp, ppos,
2713 				 do_proc_dopipe_max_size_conv, NULL);
2714 }
2715 
2716 static void validate_coredump_safety(void)
2717 {
2718 #ifdef CONFIG_COREDUMP
2719 	if (suid_dumpable == SUID_DUMP_ROOT &&
2720 	    core_pattern[0] != '/' && core_pattern[0] != '|') {
2721 		printk(KERN_WARNING
2722 "Unsafe core_pattern used with fs.suid_dumpable=2.\n"
2723 "Pipe handler or fully qualified core dump path required.\n"
2724 "Set kernel.core_pattern before fs.suid_dumpable.\n"
2725 		);
2726 	}
2727 #endif
2728 }
2729 
2730 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
2731 		void __user *buffer, size_t *lenp, loff_t *ppos)
2732 {
2733 	int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2734 	if (!error)
2735 		validate_coredump_safety();
2736 	return error;
2737 }
2738 
2739 #ifdef CONFIG_COREDUMP
2740 static int proc_dostring_coredump(struct ctl_table *table, int write,
2741 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2742 {
2743 	int error = proc_dostring(table, write, buffer, lenp, ppos);
2744 	if (!error)
2745 		validate_coredump_safety();
2746 	return error;
2747 }
2748 #endif
2749 
2750 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2751 				     void __user *buffer,
2752 				     size_t *lenp, loff_t *ppos,
2753 				     unsigned long convmul,
2754 				     unsigned long convdiv)
2755 {
2756 	unsigned long *i, *min, *max;
2757 	int vleft, first = 1, err = 0;
2758 	size_t left;
2759 	char *kbuf = NULL, *p;
2760 
2761 	if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
2762 		*lenp = 0;
2763 		return 0;
2764 	}
2765 
2766 	i = (unsigned long *) data;
2767 	min = (unsigned long *) table->extra1;
2768 	max = (unsigned long *) table->extra2;
2769 	vleft = table->maxlen / sizeof(unsigned long);
2770 	left = *lenp;
2771 
2772 	if (write) {
2773 		if (proc_first_pos_non_zero_ignore(ppos, table))
2774 			goto out;
2775 
2776 		if (left > PAGE_SIZE - 1)
2777 			left = PAGE_SIZE - 1;
2778 		p = kbuf = memdup_user_nul(buffer, left);
2779 		if (IS_ERR(kbuf))
2780 			return PTR_ERR(kbuf);
2781 	}
2782 
2783 	for (; left && vleft--; i++, first = 0) {
2784 		unsigned long val;
2785 
2786 		if (write) {
2787 			bool neg;
2788 
2789 			left -= proc_skip_spaces(&p);
2790 
2791 			err = proc_get_long(&p, &left, &val, &neg,
2792 					     proc_wspace_sep,
2793 					     sizeof(proc_wspace_sep), NULL);
2794 			if (err)
2795 				break;
2796 			if (neg)
2797 				continue;
2798 			val = convmul * val / convdiv;
2799 			if ((min && val < *min) || (max && val > *max))
2800 				continue;
2801 			*i = val;
2802 		} else {
2803 			val = convdiv * (*i) / convmul;
2804 			if (!first) {
2805 				err = proc_put_char(&buffer, &left, '\t');
2806 				if (err)
2807 					break;
2808 			}
2809 			err = proc_put_long(&buffer, &left, val, false);
2810 			if (err)
2811 				break;
2812 		}
2813 	}
2814 
2815 	if (!write && !first && left && !err)
2816 		err = proc_put_char(&buffer, &left, '\n');
2817 	if (write && !err)
2818 		left -= proc_skip_spaces(&p);
2819 	if (write) {
2820 		kfree(kbuf);
2821 		if (first)
2822 			return err ? : -EINVAL;
2823 	}
2824 	*lenp -= left;
2825 out:
2826 	*ppos += *lenp;
2827 	return err;
2828 }
2829 
2830 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2831 				     void __user *buffer,
2832 				     size_t *lenp, loff_t *ppos,
2833 				     unsigned long convmul,
2834 				     unsigned long convdiv)
2835 {
2836 	return __do_proc_doulongvec_minmax(table->data, table, write,
2837 			buffer, lenp, ppos, convmul, convdiv);
2838 }
2839 
2840 /**
2841  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2842  * @table: the sysctl table
2843  * @write: %TRUE if this is a write to the sysctl file
2844  * @buffer: the user buffer
2845  * @lenp: the size of the user buffer
2846  * @ppos: file position
2847  *
2848  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2849  * values from/to the user buffer, treated as an ASCII string.
2850  *
2851  * This routine will ensure the values are within the range specified by
2852  * table->extra1 (min) and table->extra2 (max).
2853  *
2854  * Returns 0 on success.
2855  */
2856 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2857 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2858 {
2859     return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2860 }
2861 
2862 /**
2863  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2864  * @table: the sysctl table
2865  * @write: %TRUE if this is a write to the sysctl file
2866  * @buffer: the user buffer
2867  * @lenp: the size of the user buffer
2868  * @ppos: file position
2869  *
2870  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2871  * values from/to the user buffer, treated as an ASCII string. The values
2872  * are treated as milliseconds, and converted to jiffies when they are stored.
2873  *
2874  * This routine will ensure the values are within the range specified by
2875  * table->extra1 (min) and table->extra2 (max).
2876  *
2877  * Returns 0 on success.
2878  */
2879 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2880 				      void __user *buffer,
2881 				      size_t *lenp, loff_t *ppos)
2882 {
2883     return do_proc_doulongvec_minmax(table, write, buffer,
2884 				     lenp, ppos, HZ, 1000l);
2885 }
2886 
2887 
2888 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
2889 					 int *valp,
2890 					 int write, void *data)
2891 {
2892 	if (write) {
2893 		if (*lvalp > INT_MAX / HZ)
2894 			return 1;
2895 		*valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2896 	} else {
2897 		int val = *valp;
2898 		unsigned long lval;
2899 		if (val < 0) {
2900 			*negp = true;
2901 			lval = -(unsigned long)val;
2902 		} else {
2903 			*negp = false;
2904 			lval = (unsigned long)val;
2905 		}
2906 		*lvalp = lval / HZ;
2907 	}
2908 	return 0;
2909 }
2910 
2911 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
2912 						int *valp,
2913 						int write, void *data)
2914 {
2915 	if (write) {
2916 		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2917 			return 1;
2918 		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2919 	} else {
2920 		int val = *valp;
2921 		unsigned long lval;
2922 		if (val < 0) {
2923 			*negp = true;
2924 			lval = -(unsigned long)val;
2925 		} else {
2926 			*negp = false;
2927 			lval = (unsigned long)val;
2928 		}
2929 		*lvalp = jiffies_to_clock_t(lval);
2930 	}
2931 	return 0;
2932 }
2933 
2934 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
2935 					    int *valp,
2936 					    int write, void *data)
2937 {
2938 	if (write) {
2939 		unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2940 
2941 		if (jif > INT_MAX)
2942 			return 1;
2943 		*valp = (int)jif;
2944 	} else {
2945 		int val = *valp;
2946 		unsigned long lval;
2947 		if (val < 0) {
2948 			*negp = true;
2949 			lval = -(unsigned long)val;
2950 		} else {
2951 			*negp = false;
2952 			lval = (unsigned long)val;
2953 		}
2954 		*lvalp = jiffies_to_msecs(lval);
2955 	}
2956 	return 0;
2957 }
2958 
2959 /**
2960  * proc_dointvec_jiffies - read a vector of integers as seconds
2961  * @table: the sysctl table
2962  * @write: %TRUE if this is a write to the sysctl file
2963  * @buffer: the user buffer
2964  * @lenp: the size of the user buffer
2965  * @ppos: file position
2966  *
2967  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2968  * values from/to the user buffer, treated as an ASCII string.
2969  * The values read are assumed to be in seconds, and are converted into
2970  * jiffies.
2971  *
2972  * Returns 0 on success.
2973  */
2974 int proc_dointvec_jiffies(struct ctl_table *table, int write,
2975 			  void __user *buffer, size_t *lenp, loff_t *ppos)
2976 {
2977     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2978 		    	    do_proc_dointvec_jiffies_conv,NULL);
2979 }
2980 
2981 /**
2982  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2983  * @table: the sysctl table
2984  * @write: %TRUE if this is a write to the sysctl file
2985  * @buffer: the user buffer
2986  * @lenp: the size of the user buffer
2987  * @ppos: pointer to the file position
2988  *
2989  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2990  * values from/to the user buffer, treated as an ASCII string.
2991  * The values read are assumed to be in 1/USER_HZ seconds, and
2992  * are converted into jiffies.
2993  *
2994  * Returns 0 on success.
2995  */
2996 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2997 				 void __user *buffer, size_t *lenp, loff_t *ppos)
2998 {
2999     return do_proc_dointvec(table,write,buffer,lenp,ppos,
3000 		    	    do_proc_dointvec_userhz_jiffies_conv,NULL);
3001 }
3002 
3003 /**
3004  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
3005  * @table: the sysctl table
3006  * @write: %TRUE if this is a write to the sysctl file
3007  * @buffer: the user buffer
3008  * @lenp: the size of the user buffer
3009  * @ppos: file position
3010  * @ppos: the current position in the file
3011  *
3012  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3013  * values from/to the user buffer, treated as an ASCII string.
3014  * The values read are assumed to be in 1/1000 seconds, and
3015  * are converted into jiffies.
3016  *
3017  * Returns 0 on success.
3018  */
3019 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3020 			     void __user *buffer, size_t *lenp, loff_t *ppos)
3021 {
3022 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
3023 				do_proc_dointvec_ms_jiffies_conv, NULL);
3024 }
3025 
3026 static int proc_do_cad_pid(struct ctl_table *table, int write,
3027 			   void __user *buffer, size_t *lenp, loff_t *ppos)
3028 {
3029 	struct pid *new_pid;
3030 	pid_t tmp;
3031 	int r;
3032 
3033 	tmp = pid_vnr(cad_pid);
3034 
3035 	r = __do_proc_dointvec(&tmp, table, write, buffer,
3036 			       lenp, ppos, NULL, NULL);
3037 	if (r || !write)
3038 		return r;
3039 
3040 	new_pid = find_get_pid(tmp);
3041 	if (!new_pid)
3042 		return -ESRCH;
3043 
3044 	put_pid(xchg(&cad_pid, new_pid));
3045 	return 0;
3046 }
3047 
3048 /**
3049  * proc_do_large_bitmap - read/write from/to a large bitmap
3050  * @table: the sysctl table
3051  * @write: %TRUE if this is a write to the sysctl file
3052  * @buffer: the user buffer
3053  * @lenp: the size of the user buffer
3054  * @ppos: file position
3055  *
3056  * The bitmap is stored at table->data and the bitmap length (in bits)
3057  * in table->maxlen.
3058  *
3059  * We use a range comma separated format (e.g. 1,3-4,10-10) so that
3060  * large bitmaps may be represented in a compact manner. Writing into
3061  * the file will clear the bitmap then update it with the given input.
3062  *
3063  * Returns 0 on success.
3064  */
3065 int proc_do_large_bitmap(struct ctl_table *table, int write,
3066 			 void __user *buffer, size_t *lenp, loff_t *ppos)
3067 {
3068 	int err = 0;
3069 	bool first = 1;
3070 	size_t left = *lenp;
3071 	unsigned long bitmap_len = table->maxlen;
3072 	unsigned long *bitmap = *(unsigned long **) table->data;
3073 	unsigned long *tmp_bitmap = NULL;
3074 	char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
3075 
3076 	if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
3077 		*lenp = 0;
3078 		return 0;
3079 	}
3080 
3081 	if (write) {
3082 		char *kbuf, *p;
3083 
3084 		if (left > PAGE_SIZE - 1)
3085 			left = PAGE_SIZE - 1;
3086 
3087 		p = kbuf = memdup_user_nul(buffer, left);
3088 		if (IS_ERR(kbuf))
3089 			return PTR_ERR(kbuf);
3090 
3091 		tmp_bitmap = kcalloc(BITS_TO_LONGS(bitmap_len),
3092 				     sizeof(unsigned long),
3093 				     GFP_KERNEL);
3094 		if (!tmp_bitmap) {
3095 			kfree(kbuf);
3096 			return -ENOMEM;
3097 		}
3098 		proc_skip_char(&p, &left, '\n');
3099 		while (!err && left) {
3100 			unsigned long val_a, val_b;
3101 			bool neg;
3102 
3103 			err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
3104 					     sizeof(tr_a), &c);
3105 			if (err)
3106 				break;
3107 			if (val_a >= bitmap_len || neg) {
3108 				err = -EINVAL;
3109 				break;
3110 			}
3111 
3112 			val_b = val_a;
3113 			if (left) {
3114 				p++;
3115 				left--;
3116 			}
3117 
3118 			if (c == '-') {
3119 				err = proc_get_long(&p, &left, &val_b,
3120 						     &neg, tr_b, sizeof(tr_b),
3121 						     &c);
3122 				if (err)
3123 					break;
3124 				if (val_b >= bitmap_len || neg ||
3125 				    val_a > val_b) {
3126 					err = -EINVAL;
3127 					break;
3128 				}
3129 				if (left) {
3130 					p++;
3131 					left--;
3132 				}
3133 			}
3134 
3135 			bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
3136 			first = 0;
3137 			proc_skip_char(&p, &left, '\n');
3138 		}
3139 		kfree(kbuf);
3140 	} else {
3141 		unsigned long bit_a, bit_b = 0;
3142 
3143 		while (left) {
3144 			bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
3145 			if (bit_a >= bitmap_len)
3146 				break;
3147 			bit_b = find_next_zero_bit(bitmap, bitmap_len,
3148 						   bit_a + 1) - 1;
3149 
3150 			if (!first) {
3151 				err = proc_put_char(&buffer, &left, ',');
3152 				if (err)
3153 					break;
3154 			}
3155 			err = proc_put_long(&buffer, &left, bit_a, false);
3156 			if (err)
3157 				break;
3158 			if (bit_a != bit_b) {
3159 				err = proc_put_char(&buffer, &left, '-');
3160 				if (err)
3161 					break;
3162 				err = proc_put_long(&buffer, &left, bit_b, false);
3163 				if (err)
3164 					break;
3165 			}
3166 
3167 			first = 0; bit_b++;
3168 		}
3169 		if (!err)
3170 			err = proc_put_char(&buffer, &left, '\n');
3171 	}
3172 
3173 	if (!err) {
3174 		if (write) {
3175 			if (*ppos)
3176 				bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
3177 			else
3178 				bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
3179 		}
3180 		*lenp -= left;
3181 		*ppos += *lenp;
3182 	}
3183 
3184 	kfree(tmp_bitmap);
3185 	return err;
3186 }
3187 
3188 #else /* CONFIG_PROC_SYSCTL */
3189 
3190 int proc_dostring(struct ctl_table *table, int write,
3191 		  void __user *buffer, size_t *lenp, loff_t *ppos)
3192 {
3193 	return -ENOSYS;
3194 }
3195 
3196 int proc_dointvec(struct ctl_table *table, int write,
3197 		  void __user *buffer, size_t *lenp, loff_t *ppos)
3198 {
3199 	return -ENOSYS;
3200 }
3201 
3202 int proc_douintvec(struct ctl_table *table, int write,
3203 		  void __user *buffer, size_t *lenp, loff_t *ppos)
3204 {
3205 	return -ENOSYS;
3206 }
3207 
3208 int proc_dointvec_minmax(struct ctl_table *table, int write,
3209 		    void __user *buffer, size_t *lenp, loff_t *ppos)
3210 {
3211 	return -ENOSYS;
3212 }
3213 
3214 int proc_douintvec_minmax(struct ctl_table *table, int write,
3215 			  void __user *buffer, size_t *lenp, loff_t *ppos)
3216 {
3217 	return -ENOSYS;
3218 }
3219 
3220 int proc_dointvec_jiffies(struct ctl_table *table, int write,
3221 		    void __user *buffer, size_t *lenp, loff_t *ppos)
3222 {
3223 	return -ENOSYS;
3224 }
3225 
3226 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
3227 		    void __user *buffer, size_t *lenp, loff_t *ppos)
3228 {
3229 	return -ENOSYS;
3230 }
3231 
3232 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3233 			     void __user *buffer, size_t *lenp, loff_t *ppos)
3234 {
3235 	return -ENOSYS;
3236 }
3237 
3238 int proc_doulongvec_minmax(struct ctl_table *table, int write,
3239 		    void __user *buffer, size_t *lenp, loff_t *ppos)
3240 {
3241 	return -ENOSYS;
3242 }
3243 
3244 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
3245 				      void __user *buffer,
3246 				      size_t *lenp, loff_t *ppos)
3247 {
3248     return -ENOSYS;
3249 }
3250 
3251 
3252 #endif /* CONFIG_PROC_SYSCTL */
3253 
3254 /*
3255  * No sense putting this after each symbol definition, twice,
3256  * exception granted :-)
3257  */
3258 EXPORT_SYMBOL(proc_dointvec);
3259 EXPORT_SYMBOL(proc_douintvec);
3260 EXPORT_SYMBOL(proc_dointvec_jiffies);
3261 EXPORT_SYMBOL(proc_dointvec_minmax);
3262 EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
3263 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
3264 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
3265 EXPORT_SYMBOL(proc_dostring);
3266 EXPORT_SYMBOL(proc_doulongvec_minmax);
3267 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
3268