xref: /openbmc/linux/kernel/sysctl.c (revision be2b81b5)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * sysctl.c: General linux system control interface
4  *
5  * Begun 24 March 1995, Stephen Tweedie
6  * Added /proc support, Dec 1995
7  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
8  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
9  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
10  * Dynamic registration fixes, Stephen Tweedie.
11  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
12  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
13  *  Horn.
14  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
15  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
16  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
17  *  Wendling.
18  * The list_for_each() macro wasn't appropriate for the sysctl loop.
19  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
20  */
21 
22 #include <linux/module.h>
23 #include <linux/mm.h>
24 #include <linux/swap.h>
25 #include <linux/slab.h>
26 #include <linux/sysctl.h>
27 #include <linux/bitmap.h>
28 #include <linux/signal.h>
29 #include <linux/panic.h>
30 #include <linux/printk.h>
31 #include <linux/proc_fs.h>
32 #include <linux/security.h>
33 #include <linux/ctype.h>
34 #include <linux/kmemleak.h>
35 #include <linux/filter.h>
36 #include <linux/fs.h>
37 #include <linux/init.h>
38 #include <linux/kernel.h>
39 #include <linux/kobject.h>
40 #include <linux/net.h>
41 #include <linux/sysrq.h>
42 #include <linux/highuid.h>
43 #include <linux/writeback.h>
44 #include <linux/ratelimit.h>
45 #include <linux/compaction.h>
46 #include <linux/hugetlb.h>
47 #include <linux/initrd.h>
48 #include <linux/key.h>
49 #include <linux/times.h>
50 #include <linux/limits.h>
51 #include <linux/dcache.h>
52 #include <linux/syscalls.h>
53 #include <linux/vmstat.h>
54 #include <linux/nfs_fs.h>
55 #include <linux/acpi.h>
56 #include <linux/reboot.h>
57 #include <linux/ftrace.h>
58 #include <linux/perf_event.h>
59 #include <linux/oom.h>
60 #include <linux/kmod.h>
61 #include <linux/capability.h>
62 #include <linux/binfmts.h>
63 #include <linux/sched/sysctl.h>
64 #include <linux/mount.h>
65 #include <linux/userfaultfd_k.h>
66 #include <linux/pid.h>
67 
68 #include "../lib/kstrtox.h"
69 
70 #include <linux/uaccess.h>
71 #include <asm/processor.h>
72 
73 #ifdef CONFIG_X86
74 #include <asm/nmi.h>
75 #include <asm/stacktrace.h>
76 #include <asm/io.h>
77 #endif
78 #ifdef CONFIG_SPARC
79 #include <asm/setup.h>
80 #endif
81 #ifdef CONFIG_RT_MUTEXES
82 #include <linux/rtmutex.h>
83 #endif
84 
85 /* shared constants to be used in various sysctls */
86 const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 };
87 EXPORT_SYMBOL(sysctl_vals);
88 
89 const unsigned long sysctl_long_vals[] = { 0, 1, LONG_MAX };
90 EXPORT_SYMBOL_GPL(sysctl_long_vals);
91 
92 #if defined(CONFIG_SYSCTL)
93 
94 /* Constants used for minimum and maximum */
95 
96 #ifdef CONFIG_PERF_EVENTS
97 static const int six_hundred_forty_kb = 640 * 1024;
98 #endif
99 
100 
101 static const int ngroups_max = NGROUPS_MAX;
102 static const int cap_last_cap = CAP_LAST_CAP;
103 
104 #ifdef CONFIG_PROC_SYSCTL
105 
106 /**
107  * enum sysctl_writes_mode - supported sysctl write modes
108  *
109  * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
110  *	to be written, and multiple writes on the same sysctl file descriptor
111  *	will rewrite the sysctl value, regardless of file position. No warning
112  *	is issued when the initial position is not 0.
113  * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
114  *	not 0.
115  * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
116  *	file position 0 and the value must be fully contained in the buffer
117  *	sent to the write syscall. If dealing with strings respect the file
118  *	position, but restrict this to the max length of the buffer, anything
119  *	passed the max length will be ignored. Multiple writes will append
120  *	to the buffer.
121  *
122  * These write modes control how current file position affects the behavior of
123  * updating sysctl values through the proc interface on each write.
124  */
125 enum sysctl_writes_mode {
126 	SYSCTL_WRITES_LEGACY		= -1,
127 	SYSCTL_WRITES_WARN		= 0,
128 	SYSCTL_WRITES_STRICT		= 1,
129 };
130 
131 static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
132 #endif /* CONFIG_PROC_SYSCTL */
133 
134 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
135     defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
136 int sysctl_legacy_va_layout;
137 #endif
138 
139 #endif /* CONFIG_SYSCTL */
140 
141 /*
142  * /proc/sys support
143  */
144 
145 #ifdef CONFIG_PROC_SYSCTL
146 
147 static int _proc_do_string(char *data, int maxlen, int write,
148 		char *buffer, size_t *lenp, loff_t *ppos)
149 {
150 	size_t len;
151 	char c, *p;
152 
153 	if (!data || !maxlen || !*lenp) {
154 		*lenp = 0;
155 		return 0;
156 	}
157 
158 	if (write) {
159 		if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
160 			/* Only continue writes not past the end of buffer. */
161 			len = strlen(data);
162 			if (len > maxlen - 1)
163 				len = maxlen - 1;
164 
165 			if (*ppos > len)
166 				return 0;
167 			len = *ppos;
168 		} else {
169 			/* Start writing from beginning of buffer. */
170 			len = 0;
171 		}
172 
173 		*ppos += *lenp;
174 		p = buffer;
175 		while ((p - buffer) < *lenp && len < maxlen - 1) {
176 			c = *(p++);
177 			if (c == 0 || c == '\n')
178 				break;
179 			data[len++] = c;
180 		}
181 		data[len] = 0;
182 	} else {
183 		len = strlen(data);
184 		if (len > maxlen)
185 			len = maxlen;
186 
187 		if (*ppos > len) {
188 			*lenp = 0;
189 			return 0;
190 		}
191 
192 		data += *ppos;
193 		len  -= *ppos;
194 
195 		if (len > *lenp)
196 			len = *lenp;
197 		if (len)
198 			memcpy(buffer, data, len);
199 		if (len < *lenp) {
200 			buffer[len] = '\n';
201 			len++;
202 		}
203 		*lenp = len;
204 		*ppos += len;
205 	}
206 	return 0;
207 }
208 
209 static void warn_sysctl_write(struct ctl_table *table)
210 {
211 	pr_warn_once("%s wrote to %s when file position was not 0!\n"
212 		"This will not be supported in the future. To silence this\n"
213 		"warning, set kernel.sysctl_writes_strict = -1\n",
214 		current->comm, table->procname);
215 }
216 
217 /**
218  * proc_first_pos_non_zero_ignore - check if first position is allowed
219  * @ppos: file position
220  * @table: the sysctl table
221  *
222  * Returns true if the first position is non-zero and the sysctl_writes_strict
223  * mode indicates this is not allowed for numeric input types. String proc
224  * handlers can ignore the return value.
225  */
226 static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
227 					   struct ctl_table *table)
228 {
229 	if (!*ppos)
230 		return false;
231 
232 	switch (sysctl_writes_strict) {
233 	case SYSCTL_WRITES_STRICT:
234 		return true;
235 	case SYSCTL_WRITES_WARN:
236 		warn_sysctl_write(table);
237 		return false;
238 	default:
239 		return false;
240 	}
241 }
242 
243 /**
244  * proc_dostring - read a string sysctl
245  * @table: the sysctl table
246  * @write: %TRUE if this is a write to the sysctl file
247  * @buffer: the user buffer
248  * @lenp: the size of the user buffer
249  * @ppos: file position
250  *
251  * Reads/writes a string from/to the user buffer. If the kernel
252  * buffer provided is not large enough to hold the string, the
253  * string is truncated. The copied string is %NULL-terminated.
254  * If the string is being read by the user process, it is copied
255  * and a newline '\n' is added. It is truncated if the buffer is
256  * not large enough.
257  *
258  * Returns 0 on success.
259  */
260 int proc_dostring(struct ctl_table *table, int write,
261 		  void *buffer, size_t *lenp, loff_t *ppos)
262 {
263 	if (write)
264 		proc_first_pos_non_zero_ignore(ppos, table);
265 
266 	return _proc_do_string(table->data, table->maxlen, write, buffer, lenp,
267 			ppos);
268 }
269 
270 static void proc_skip_spaces(char **buf, size_t *size)
271 {
272 	while (*size) {
273 		if (!isspace(**buf))
274 			break;
275 		(*size)--;
276 		(*buf)++;
277 	}
278 }
279 
280 static void proc_skip_char(char **buf, size_t *size, const char v)
281 {
282 	while (*size) {
283 		if (**buf != v)
284 			break;
285 		(*size)--;
286 		(*buf)++;
287 	}
288 }
289 
290 /**
291  * strtoul_lenient - parse an ASCII formatted integer from a buffer and only
292  *                   fail on overflow
293  *
294  * @cp: kernel buffer containing the string to parse
295  * @endp: pointer to store the trailing characters
296  * @base: the base to use
297  * @res: where the parsed integer will be stored
298  *
299  * In case of success 0 is returned and @res will contain the parsed integer,
300  * @endp will hold any trailing characters.
301  * This function will fail the parse on overflow. If there wasn't an overflow
302  * the function will defer the decision what characters count as invalid to the
303  * caller.
304  */
305 static int strtoul_lenient(const char *cp, char **endp, unsigned int base,
306 			   unsigned long *res)
307 {
308 	unsigned long long result;
309 	unsigned int rv;
310 
311 	cp = _parse_integer_fixup_radix(cp, &base);
312 	rv = _parse_integer(cp, base, &result);
313 	if ((rv & KSTRTOX_OVERFLOW) || (result != (unsigned long)result))
314 		return -ERANGE;
315 
316 	cp += rv;
317 
318 	if (endp)
319 		*endp = (char *)cp;
320 
321 	*res = (unsigned long)result;
322 	return 0;
323 }
324 
325 #define TMPBUFLEN 22
326 /**
327  * proc_get_long - reads an ASCII formatted integer from a user buffer
328  *
329  * @buf: a kernel buffer
330  * @size: size of the kernel buffer
331  * @val: this is where the number will be stored
332  * @neg: set to %TRUE if number is negative
333  * @perm_tr: a vector which contains the allowed trailers
334  * @perm_tr_len: size of the perm_tr vector
335  * @tr: pointer to store the trailer character
336  *
337  * In case of success %0 is returned and @buf and @size are updated with
338  * the amount of bytes read. If @tr is non-NULL and a trailing
339  * character exists (size is non-zero after returning from this
340  * function), @tr is updated with the trailing character.
341  */
342 static int proc_get_long(char **buf, size_t *size,
343 			  unsigned long *val, bool *neg,
344 			  const char *perm_tr, unsigned perm_tr_len, char *tr)
345 {
346 	char *p, tmp[TMPBUFLEN];
347 	ssize_t len = *size;
348 
349 	if (len <= 0)
350 		return -EINVAL;
351 
352 	if (len > TMPBUFLEN - 1)
353 		len = TMPBUFLEN - 1;
354 
355 	memcpy(tmp, *buf, len);
356 
357 	tmp[len] = 0;
358 	p = tmp;
359 	if (*p == '-' && *size > 1) {
360 		*neg = true;
361 		p++;
362 	} else
363 		*neg = false;
364 	if (!isdigit(*p))
365 		return -EINVAL;
366 
367 	if (strtoul_lenient(p, &p, 0, val))
368 		return -EINVAL;
369 
370 	len = p - tmp;
371 
372 	/* We don't know if the next char is whitespace thus we may accept
373 	 * invalid integers (e.g. 1234...a) or two integers instead of one
374 	 * (e.g. 123...1). So lets not allow such large numbers. */
375 	if (len == TMPBUFLEN - 1)
376 		return -EINVAL;
377 
378 	if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
379 		return -EINVAL;
380 
381 	if (tr && (len < *size))
382 		*tr = *p;
383 
384 	*buf += len;
385 	*size -= len;
386 
387 	return 0;
388 }
389 
390 /**
391  * proc_put_long - converts an integer to a decimal ASCII formatted string
392  *
393  * @buf: the user buffer
394  * @size: the size of the user buffer
395  * @val: the integer to be converted
396  * @neg: sign of the number, %TRUE for negative
397  *
398  * In case of success @buf and @size are updated with the amount of bytes
399  * written.
400  */
401 static void proc_put_long(void **buf, size_t *size, unsigned long val, bool neg)
402 {
403 	int len;
404 	char tmp[TMPBUFLEN], *p = tmp;
405 
406 	sprintf(p, "%s%lu", neg ? "-" : "", val);
407 	len = strlen(tmp);
408 	if (len > *size)
409 		len = *size;
410 	memcpy(*buf, tmp, len);
411 	*size -= len;
412 	*buf += len;
413 }
414 #undef TMPBUFLEN
415 
416 static void proc_put_char(void **buf, size_t *size, char c)
417 {
418 	if (*size) {
419 		char **buffer = (char **)buf;
420 		**buffer = c;
421 
422 		(*size)--;
423 		(*buffer)++;
424 		*buf = *buffer;
425 	}
426 }
427 
428 static int do_proc_dobool_conv(bool *negp, unsigned long *lvalp,
429 				int *valp,
430 				int write, void *data)
431 {
432 	if (write) {
433 		*(bool *)valp = *lvalp;
434 	} else {
435 		int val = *(bool *)valp;
436 
437 		*lvalp = (unsigned long)val;
438 		*negp = false;
439 	}
440 	return 0;
441 }
442 
443 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
444 				 int *valp,
445 				 int write, void *data)
446 {
447 	if (write) {
448 		if (*negp) {
449 			if (*lvalp > (unsigned long) INT_MAX + 1)
450 				return -EINVAL;
451 			WRITE_ONCE(*valp, -*lvalp);
452 		} else {
453 			if (*lvalp > (unsigned long) INT_MAX)
454 				return -EINVAL;
455 			WRITE_ONCE(*valp, *lvalp);
456 		}
457 	} else {
458 		int val = READ_ONCE(*valp);
459 		if (val < 0) {
460 			*negp = true;
461 			*lvalp = -(unsigned long)val;
462 		} else {
463 			*negp = false;
464 			*lvalp = (unsigned long)val;
465 		}
466 	}
467 	return 0;
468 }
469 
470 static int do_proc_douintvec_conv(unsigned long *lvalp,
471 				  unsigned int *valp,
472 				  int write, void *data)
473 {
474 	if (write) {
475 		if (*lvalp > UINT_MAX)
476 			return -EINVAL;
477 		WRITE_ONCE(*valp, *lvalp);
478 	} else {
479 		unsigned int val = READ_ONCE(*valp);
480 		*lvalp = (unsigned long)val;
481 	}
482 	return 0;
483 }
484 
485 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
486 
487 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
488 		  int write, void *buffer,
489 		  size_t *lenp, loff_t *ppos,
490 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
491 			      int write, void *data),
492 		  void *data)
493 {
494 	int *i, vleft, first = 1, err = 0;
495 	size_t left;
496 	char *p;
497 
498 	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
499 		*lenp = 0;
500 		return 0;
501 	}
502 
503 	i = (int *) tbl_data;
504 	vleft = table->maxlen / sizeof(*i);
505 	left = *lenp;
506 
507 	if (!conv)
508 		conv = do_proc_dointvec_conv;
509 
510 	if (write) {
511 		if (proc_first_pos_non_zero_ignore(ppos, table))
512 			goto out;
513 
514 		if (left > PAGE_SIZE - 1)
515 			left = PAGE_SIZE - 1;
516 		p = buffer;
517 	}
518 
519 	for (; left && vleft--; i++, first=0) {
520 		unsigned long lval;
521 		bool neg;
522 
523 		if (write) {
524 			proc_skip_spaces(&p, &left);
525 
526 			if (!left)
527 				break;
528 			err = proc_get_long(&p, &left, &lval, &neg,
529 					     proc_wspace_sep,
530 					     sizeof(proc_wspace_sep), NULL);
531 			if (err)
532 				break;
533 			if (conv(&neg, &lval, i, 1, data)) {
534 				err = -EINVAL;
535 				break;
536 			}
537 		} else {
538 			if (conv(&neg, &lval, i, 0, data)) {
539 				err = -EINVAL;
540 				break;
541 			}
542 			if (!first)
543 				proc_put_char(&buffer, &left, '\t');
544 			proc_put_long(&buffer, &left, lval, neg);
545 		}
546 	}
547 
548 	if (!write && !first && left && !err)
549 		proc_put_char(&buffer, &left, '\n');
550 	if (write && !err && left)
551 		proc_skip_spaces(&p, &left);
552 	if (write && first)
553 		return err ? : -EINVAL;
554 	*lenp -= left;
555 out:
556 	*ppos += *lenp;
557 	return err;
558 }
559 
560 static int do_proc_dointvec(struct ctl_table *table, int write,
561 		  void *buffer, size_t *lenp, loff_t *ppos,
562 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
563 			      int write, void *data),
564 		  void *data)
565 {
566 	return __do_proc_dointvec(table->data, table, write,
567 			buffer, lenp, ppos, conv, data);
568 }
569 
570 static int do_proc_douintvec_w(unsigned int *tbl_data,
571 			       struct ctl_table *table,
572 			       void *buffer,
573 			       size_t *lenp, loff_t *ppos,
574 			       int (*conv)(unsigned long *lvalp,
575 					   unsigned int *valp,
576 					   int write, void *data),
577 			       void *data)
578 {
579 	unsigned long lval;
580 	int err = 0;
581 	size_t left;
582 	bool neg;
583 	char *p = buffer;
584 
585 	left = *lenp;
586 
587 	if (proc_first_pos_non_zero_ignore(ppos, table))
588 		goto bail_early;
589 
590 	if (left > PAGE_SIZE - 1)
591 		left = PAGE_SIZE - 1;
592 
593 	proc_skip_spaces(&p, &left);
594 	if (!left) {
595 		err = -EINVAL;
596 		goto out_free;
597 	}
598 
599 	err = proc_get_long(&p, &left, &lval, &neg,
600 			     proc_wspace_sep,
601 			     sizeof(proc_wspace_sep), NULL);
602 	if (err || neg) {
603 		err = -EINVAL;
604 		goto out_free;
605 	}
606 
607 	if (conv(&lval, tbl_data, 1, data)) {
608 		err = -EINVAL;
609 		goto out_free;
610 	}
611 
612 	if (!err && left)
613 		proc_skip_spaces(&p, &left);
614 
615 out_free:
616 	if (err)
617 		return -EINVAL;
618 
619 	return 0;
620 
621 	/* This is in keeping with old __do_proc_dointvec() */
622 bail_early:
623 	*ppos += *lenp;
624 	return err;
625 }
626 
627 static int do_proc_douintvec_r(unsigned int *tbl_data, void *buffer,
628 			       size_t *lenp, loff_t *ppos,
629 			       int (*conv)(unsigned long *lvalp,
630 					   unsigned int *valp,
631 					   int write, void *data),
632 			       void *data)
633 {
634 	unsigned long lval;
635 	int err = 0;
636 	size_t left;
637 
638 	left = *lenp;
639 
640 	if (conv(&lval, tbl_data, 0, data)) {
641 		err = -EINVAL;
642 		goto out;
643 	}
644 
645 	proc_put_long(&buffer, &left, lval, false);
646 	if (!left)
647 		goto out;
648 
649 	proc_put_char(&buffer, &left, '\n');
650 
651 out:
652 	*lenp -= left;
653 	*ppos += *lenp;
654 
655 	return err;
656 }
657 
658 static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
659 			       int write, void *buffer,
660 			       size_t *lenp, loff_t *ppos,
661 			       int (*conv)(unsigned long *lvalp,
662 					   unsigned int *valp,
663 					   int write, void *data),
664 			       void *data)
665 {
666 	unsigned int *i, vleft;
667 
668 	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
669 		*lenp = 0;
670 		return 0;
671 	}
672 
673 	i = (unsigned int *) tbl_data;
674 	vleft = table->maxlen / sizeof(*i);
675 
676 	/*
677 	 * Arrays are not supported, keep this simple. *Do not* add
678 	 * support for them.
679 	 */
680 	if (vleft != 1) {
681 		*lenp = 0;
682 		return -EINVAL;
683 	}
684 
685 	if (!conv)
686 		conv = do_proc_douintvec_conv;
687 
688 	if (write)
689 		return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
690 					   conv, data);
691 	return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
692 }
693 
694 int do_proc_douintvec(struct ctl_table *table, int write,
695 		      void *buffer, size_t *lenp, loff_t *ppos,
696 		      int (*conv)(unsigned long *lvalp,
697 				  unsigned int *valp,
698 				  int write, void *data),
699 		      void *data)
700 {
701 	return __do_proc_douintvec(table->data, table, write,
702 				   buffer, lenp, ppos, conv, data);
703 }
704 
705 /**
706  * proc_dobool - read/write a bool
707  * @table: the sysctl table
708  * @write: %TRUE if this is a write to the sysctl file
709  * @buffer: the user buffer
710  * @lenp: the size of the user buffer
711  * @ppos: file position
712  *
713  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
714  * values from/to the user buffer, treated as an ASCII string.
715  *
716  * Returns 0 on success.
717  */
718 int proc_dobool(struct ctl_table *table, int write, void *buffer,
719 		size_t *lenp, loff_t *ppos)
720 {
721 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
722 				do_proc_dobool_conv, NULL);
723 }
724 
725 /**
726  * proc_dointvec - read a vector of integers
727  * @table: the sysctl table
728  * @write: %TRUE if this is a write to the sysctl file
729  * @buffer: the user buffer
730  * @lenp: the size of the user buffer
731  * @ppos: file position
732  *
733  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
734  * values from/to the user buffer, treated as an ASCII string.
735  *
736  * Returns 0 on success.
737  */
738 int proc_dointvec(struct ctl_table *table, int write, void *buffer,
739 		  size_t *lenp, loff_t *ppos)
740 {
741 	return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
742 }
743 
744 #ifdef CONFIG_COMPACTION
745 static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table,
746 		int write, void *buffer, size_t *lenp, loff_t *ppos)
747 {
748 	int ret, old;
749 
750 	if (!IS_ENABLED(CONFIG_PREEMPT_RT) || !write)
751 		return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
752 
753 	old = *(int *)table->data;
754 	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
755 	if (ret)
756 		return ret;
757 	if (old != *(int *)table->data)
758 		pr_warn_once("sysctl attribute %s changed by %s[%d]\n",
759 			     table->procname, current->comm,
760 			     task_pid_nr(current));
761 	return ret;
762 }
763 #endif
764 
765 /**
766  * proc_douintvec - read a vector of unsigned integers
767  * @table: the sysctl table
768  * @write: %TRUE if this is a write to the sysctl file
769  * @buffer: the user buffer
770  * @lenp: the size of the user buffer
771  * @ppos: file position
772  *
773  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
774  * values from/to the user buffer, treated as an ASCII string.
775  *
776  * Returns 0 on success.
777  */
778 int proc_douintvec(struct ctl_table *table, int write, void *buffer,
779 		size_t *lenp, loff_t *ppos)
780 {
781 	return do_proc_douintvec(table, write, buffer, lenp, ppos,
782 				 do_proc_douintvec_conv, NULL);
783 }
784 
785 /*
786  * Taint values can only be increased
787  * This means we can safely use a temporary.
788  */
789 static int proc_taint(struct ctl_table *table, int write,
790 			       void *buffer, size_t *lenp, loff_t *ppos)
791 {
792 	struct ctl_table t;
793 	unsigned long tmptaint = get_taint();
794 	int err;
795 
796 	if (write && !capable(CAP_SYS_ADMIN))
797 		return -EPERM;
798 
799 	t = *table;
800 	t.data = &tmptaint;
801 	err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
802 	if (err < 0)
803 		return err;
804 
805 	if (write) {
806 		int i;
807 
808 		/*
809 		 * If we are relying on panic_on_taint not producing
810 		 * false positives due to userspace input, bail out
811 		 * before setting the requested taint flags.
812 		 */
813 		if (panic_on_taint_nousertaint && (tmptaint & panic_on_taint))
814 			return -EINVAL;
815 
816 		/*
817 		 * Poor man's atomic or. Not worth adding a primitive
818 		 * to everyone's atomic.h for this
819 		 */
820 		for (i = 0; i < TAINT_FLAGS_COUNT; i++)
821 			if ((1UL << i) & tmptaint)
822 				add_taint(i, LOCKDEP_STILL_OK);
823 	}
824 
825 	return err;
826 }
827 
828 /**
829  * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
830  * @min: pointer to minimum allowable value
831  * @max: pointer to maximum allowable value
832  *
833  * The do_proc_dointvec_minmax_conv_param structure provides the
834  * minimum and maximum values for doing range checking for those sysctl
835  * parameters that use the proc_dointvec_minmax() handler.
836  */
837 struct do_proc_dointvec_minmax_conv_param {
838 	int *min;
839 	int *max;
840 };
841 
842 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
843 					int *valp,
844 					int write, void *data)
845 {
846 	int tmp, ret;
847 	struct do_proc_dointvec_minmax_conv_param *param = data;
848 	/*
849 	 * If writing, first do so via a temporary local int so we can
850 	 * bounds-check it before touching *valp.
851 	 */
852 	int *ip = write ? &tmp : valp;
853 
854 	ret = do_proc_dointvec_conv(negp, lvalp, ip, write, data);
855 	if (ret)
856 		return ret;
857 
858 	if (write) {
859 		if ((param->min && *param->min > tmp) ||
860 		    (param->max && *param->max < tmp))
861 			return -EINVAL;
862 		WRITE_ONCE(*valp, tmp);
863 	}
864 
865 	return 0;
866 }
867 
868 /**
869  * proc_dointvec_minmax - read a vector of integers with min/max values
870  * @table: the sysctl table
871  * @write: %TRUE if this is a write to the sysctl file
872  * @buffer: the user buffer
873  * @lenp: the size of the user buffer
874  * @ppos: file position
875  *
876  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
877  * values from/to the user buffer, treated as an ASCII string.
878  *
879  * This routine will ensure the values are within the range specified by
880  * table->extra1 (min) and table->extra2 (max).
881  *
882  * Returns 0 on success or -EINVAL on write when the range check fails.
883  */
884 int proc_dointvec_minmax(struct ctl_table *table, int write,
885 		  void *buffer, size_t *lenp, loff_t *ppos)
886 {
887 	struct do_proc_dointvec_minmax_conv_param param = {
888 		.min = (int *) table->extra1,
889 		.max = (int *) table->extra2,
890 	};
891 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
892 				do_proc_dointvec_minmax_conv, &param);
893 }
894 
895 /**
896  * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
897  * @min: pointer to minimum allowable value
898  * @max: pointer to maximum allowable value
899  *
900  * The do_proc_douintvec_minmax_conv_param structure provides the
901  * minimum and maximum values for doing range checking for those sysctl
902  * parameters that use the proc_douintvec_minmax() handler.
903  */
904 struct do_proc_douintvec_minmax_conv_param {
905 	unsigned int *min;
906 	unsigned int *max;
907 };
908 
909 static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
910 					 unsigned int *valp,
911 					 int write, void *data)
912 {
913 	int ret;
914 	unsigned int tmp;
915 	struct do_proc_douintvec_minmax_conv_param *param = data;
916 	/* write via temporary local uint for bounds-checking */
917 	unsigned int *up = write ? &tmp : valp;
918 
919 	ret = do_proc_douintvec_conv(lvalp, up, write, data);
920 	if (ret)
921 		return ret;
922 
923 	if (write) {
924 		if ((param->min && *param->min > tmp) ||
925 		    (param->max && *param->max < tmp))
926 			return -ERANGE;
927 
928 		WRITE_ONCE(*valp, tmp);
929 	}
930 
931 	return 0;
932 }
933 
934 /**
935  * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
936  * @table: the sysctl table
937  * @write: %TRUE if this is a write to the sysctl file
938  * @buffer: the user buffer
939  * @lenp: the size of the user buffer
940  * @ppos: file position
941  *
942  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
943  * values from/to the user buffer, treated as an ASCII string. Negative
944  * strings are not allowed.
945  *
946  * This routine will ensure the values are within the range specified by
947  * table->extra1 (min) and table->extra2 (max). There is a final sanity
948  * check for UINT_MAX to avoid having to support wrap around uses from
949  * userspace.
950  *
951  * Returns 0 on success or -ERANGE on write when the range check fails.
952  */
953 int proc_douintvec_minmax(struct ctl_table *table, int write,
954 			  void *buffer, size_t *lenp, loff_t *ppos)
955 {
956 	struct do_proc_douintvec_minmax_conv_param param = {
957 		.min = (unsigned int *) table->extra1,
958 		.max = (unsigned int *) table->extra2,
959 	};
960 	return do_proc_douintvec(table, write, buffer, lenp, ppos,
961 				 do_proc_douintvec_minmax_conv, &param);
962 }
963 
964 /**
965  * proc_dou8vec_minmax - read a vector of unsigned chars with min/max values
966  * @table: the sysctl table
967  * @write: %TRUE if this is a write to the sysctl file
968  * @buffer: the user buffer
969  * @lenp: the size of the user buffer
970  * @ppos: file position
971  *
972  * Reads/writes up to table->maxlen/sizeof(u8) unsigned chars
973  * values from/to the user buffer, treated as an ASCII string. Negative
974  * strings are not allowed.
975  *
976  * This routine will ensure the values are within the range specified by
977  * table->extra1 (min) and table->extra2 (max).
978  *
979  * Returns 0 on success or an error on write when the range check fails.
980  */
981 int proc_dou8vec_minmax(struct ctl_table *table, int write,
982 			void *buffer, size_t *lenp, loff_t *ppos)
983 {
984 	struct ctl_table tmp;
985 	unsigned int min = 0, max = 255U, val;
986 	u8 *data = table->data;
987 	struct do_proc_douintvec_minmax_conv_param param = {
988 		.min = &min,
989 		.max = &max,
990 	};
991 	int res;
992 
993 	/* Do not support arrays yet. */
994 	if (table->maxlen != sizeof(u8))
995 		return -EINVAL;
996 
997 	if (table->extra1) {
998 		min = *(unsigned int *) table->extra1;
999 		if (min > 255U)
1000 			return -EINVAL;
1001 	}
1002 	if (table->extra2) {
1003 		max = *(unsigned int *) table->extra2;
1004 		if (max > 255U)
1005 			return -EINVAL;
1006 	}
1007 
1008 	tmp = *table;
1009 
1010 	tmp.maxlen = sizeof(val);
1011 	tmp.data = &val;
1012 	val = READ_ONCE(*data);
1013 	res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos,
1014 				do_proc_douintvec_minmax_conv, &param);
1015 	if (res)
1016 		return res;
1017 	if (write)
1018 		WRITE_ONCE(*data, val);
1019 	return 0;
1020 }
1021 EXPORT_SYMBOL_GPL(proc_dou8vec_minmax);
1022 
1023 #ifdef CONFIG_MAGIC_SYSRQ
1024 static int sysrq_sysctl_handler(struct ctl_table *table, int write,
1025 				void *buffer, size_t *lenp, loff_t *ppos)
1026 {
1027 	int tmp, ret;
1028 
1029 	tmp = sysrq_mask();
1030 
1031 	ret = __do_proc_dointvec(&tmp, table, write, buffer,
1032 			       lenp, ppos, NULL, NULL);
1033 	if (ret || !write)
1034 		return ret;
1035 
1036 	if (write)
1037 		sysrq_toggle_support(tmp);
1038 
1039 	return 0;
1040 }
1041 #endif
1042 
1043 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table,
1044 		int write, void *buffer, size_t *lenp, loff_t *ppos,
1045 		unsigned long convmul, unsigned long convdiv)
1046 {
1047 	unsigned long *i, *min, *max;
1048 	int vleft, first = 1, err = 0;
1049 	size_t left;
1050 	char *p;
1051 
1052 	if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
1053 		*lenp = 0;
1054 		return 0;
1055 	}
1056 
1057 	i = data;
1058 	min = table->extra1;
1059 	max = table->extra2;
1060 	vleft = table->maxlen / sizeof(unsigned long);
1061 	left = *lenp;
1062 
1063 	if (write) {
1064 		if (proc_first_pos_non_zero_ignore(ppos, table))
1065 			goto out;
1066 
1067 		if (left > PAGE_SIZE - 1)
1068 			left = PAGE_SIZE - 1;
1069 		p = buffer;
1070 	}
1071 
1072 	for (; left && vleft--; i++, first = 0) {
1073 		unsigned long val;
1074 
1075 		if (write) {
1076 			bool neg;
1077 
1078 			proc_skip_spaces(&p, &left);
1079 			if (!left)
1080 				break;
1081 
1082 			err = proc_get_long(&p, &left, &val, &neg,
1083 					     proc_wspace_sep,
1084 					     sizeof(proc_wspace_sep), NULL);
1085 			if (err || neg) {
1086 				err = -EINVAL;
1087 				break;
1088 			}
1089 
1090 			val = convmul * val / convdiv;
1091 			if ((min && val < *min) || (max && val > *max)) {
1092 				err = -EINVAL;
1093 				break;
1094 			}
1095 			WRITE_ONCE(*i, val);
1096 		} else {
1097 			val = convdiv * READ_ONCE(*i) / convmul;
1098 			if (!first)
1099 				proc_put_char(&buffer, &left, '\t');
1100 			proc_put_long(&buffer, &left, val, false);
1101 		}
1102 	}
1103 
1104 	if (!write && !first && left && !err)
1105 		proc_put_char(&buffer, &left, '\n');
1106 	if (write && !err)
1107 		proc_skip_spaces(&p, &left);
1108 	if (write && first)
1109 		return err ? : -EINVAL;
1110 	*lenp -= left;
1111 out:
1112 	*ppos += *lenp;
1113 	return err;
1114 }
1115 
1116 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
1117 		void *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul,
1118 		unsigned long convdiv)
1119 {
1120 	return __do_proc_doulongvec_minmax(table->data, table, write,
1121 			buffer, lenp, ppos, convmul, convdiv);
1122 }
1123 
1124 /**
1125  * proc_doulongvec_minmax - read a vector of long integers with min/max values
1126  * @table: the sysctl table
1127  * @write: %TRUE if this is a write to the sysctl file
1128  * @buffer: the user buffer
1129  * @lenp: the size of the user buffer
1130  * @ppos: file position
1131  *
1132  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1133  * values from/to the user buffer, treated as an ASCII string.
1134  *
1135  * This routine will ensure the values are within the range specified by
1136  * table->extra1 (min) and table->extra2 (max).
1137  *
1138  * Returns 0 on success.
1139  */
1140 int proc_doulongvec_minmax(struct ctl_table *table, int write,
1141 			   void *buffer, size_t *lenp, loff_t *ppos)
1142 {
1143     return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
1144 }
1145 
1146 /**
1147  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
1148  * @table: the sysctl table
1149  * @write: %TRUE if this is a write to the sysctl file
1150  * @buffer: the user buffer
1151  * @lenp: the size of the user buffer
1152  * @ppos: file position
1153  *
1154  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1155  * values from/to the user buffer, treated as an ASCII string. The values
1156  * are treated as milliseconds, and converted to jiffies when they are stored.
1157  *
1158  * This routine will ensure the values are within the range specified by
1159  * table->extra1 (min) and table->extra2 (max).
1160  *
1161  * Returns 0 on success.
1162  */
1163 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1164 				      void *buffer, size_t *lenp, loff_t *ppos)
1165 {
1166     return do_proc_doulongvec_minmax(table, write, buffer,
1167 				     lenp, ppos, HZ, 1000l);
1168 }
1169 
1170 
1171 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
1172 					 int *valp,
1173 					 int write, void *data)
1174 {
1175 	if (write) {
1176 		if (*lvalp > INT_MAX / HZ)
1177 			return 1;
1178 		if (*negp)
1179 			WRITE_ONCE(*valp, -*lvalp * HZ);
1180 		else
1181 			WRITE_ONCE(*valp, *lvalp * HZ);
1182 	} else {
1183 		int val = READ_ONCE(*valp);
1184 		unsigned long lval;
1185 		if (val < 0) {
1186 			*negp = true;
1187 			lval = -(unsigned long)val;
1188 		} else {
1189 			*negp = false;
1190 			lval = (unsigned long)val;
1191 		}
1192 		*lvalp = lval / HZ;
1193 	}
1194 	return 0;
1195 }
1196 
1197 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
1198 						int *valp,
1199 						int write, void *data)
1200 {
1201 	if (write) {
1202 		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
1203 			return 1;
1204 		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
1205 	} else {
1206 		int val = *valp;
1207 		unsigned long lval;
1208 		if (val < 0) {
1209 			*negp = true;
1210 			lval = -(unsigned long)val;
1211 		} else {
1212 			*negp = false;
1213 			lval = (unsigned long)val;
1214 		}
1215 		*lvalp = jiffies_to_clock_t(lval);
1216 	}
1217 	return 0;
1218 }
1219 
1220 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
1221 					    int *valp,
1222 					    int write, void *data)
1223 {
1224 	if (write) {
1225 		unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
1226 
1227 		if (jif > INT_MAX)
1228 			return 1;
1229 		WRITE_ONCE(*valp, (int)jif);
1230 	} else {
1231 		int val = READ_ONCE(*valp);
1232 		unsigned long lval;
1233 		if (val < 0) {
1234 			*negp = true;
1235 			lval = -(unsigned long)val;
1236 		} else {
1237 			*negp = false;
1238 			lval = (unsigned long)val;
1239 		}
1240 		*lvalp = jiffies_to_msecs(lval);
1241 	}
1242 	return 0;
1243 }
1244 
1245 static int do_proc_dointvec_ms_jiffies_minmax_conv(bool *negp, unsigned long *lvalp,
1246 						int *valp, int write, void *data)
1247 {
1248 	int tmp, ret;
1249 	struct do_proc_dointvec_minmax_conv_param *param = data;
1250 	/*
1251 	 * If writing, first do so via a temporary local int so we can
1252 	 * bounds-check it before touching *valp.
1253 	 */
1254 	int *ip = write ? &tmp : valp;
1255 
1256 	ret = do_proc_dointvec_ms_jiffies_conv(negp, lvalp, ip, write, data);
1257 	if (ret)
1258 		return ret;
1259 
1260 	if (write) {
1261 		if ((param->min && *param->min > tmp) ||
1262 				(param->max && *param->max < tmp))
1263 			return -EINVAL;
1264 		*valp = tmp;
1265 	}
1266 	return 0;
1267 }
1268 
1269 /**
1270  * proc_dointvec_jiffies - read a vector of integers as seconds
1271  * @table: the sysctl table
1272  * @write: %TRUE if this is a write to the sysctl file
1273  * @buffer: the user buffer
1274  * @lenp: the size of the user buffer
1275  * @ppos: file position
1276  *
1277  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1278  * values from/to the user buffer, treated as an ASCII string.
1279  * The values read are assumed to be in seconds, and are converted into
1280  * jiffies.
1281  *
1282  * Returns 0 on success.
1283  */
1284 int proc_dointvec_jiffies(struct ctl_table *table, int write,
1285 			  void *buffer, size_t *lenp, loff_t *ppos)
1286 {
1287     return do_proc_dointvec(table,write,buffer,lenp,ppos,
1288 		    	    do_proc_dointvec_jiffies_conv,NULL);
1289 }
1290 
1291 int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1292 			  void *buffer, size_t *lenp, loff_t *ppos)
1293 {
1294 	struct do_proc_dointvec_minmax_conv_param param = {
1295 		.min = (int *) table->extra1,
1296 		.max = (int *) table->extra2,
1297 	};
1298 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
1299 			do_proc_dointvec_ms_jiffies_minmax_conv, &param);
1300 }
1301 
1302 /**
1303  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
1304  * @table: the sysctl table
1305  * @write: %TRUE if this is a write to the sysctl file
1306  * @buffer: the user buffer
1307  * @lenp: the size of the user buffer
1308  * @ppos: pointer to the file position
1309  *
1310  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1311  * values from/to the user buffer, treated as an ASCII string.
1312  * The values read are assumed to be in 1/USER_HZ seconds, and
1313  * are converted into jiffies.
1314  *
1315  * Returns 0 on success.
1316  */
1317 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
1318 				 void *buffer, size_t *lenp, loff_t *ppos)
1319 {
1320 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
1321 				do_proc_dointvec_userhz_jiffies_conv, NULL);
1322 }
1323 
1324 /**
1325  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
1326  * @table: the sysctl table
1327  * @write: %TRUE if this is a write to the sysctl file
1328  * @buffer: the user buffer
1329  * @lenp: the size of the user buffer
1330  * @ppos: file position
1331  * @ppos: the current position in the file
1332  *
1333  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1334  * values from/to the user buffer, treated as an ASCII string.
1335  * The values read are assumed to be in 1/1000 seconds, and
1336  * are converted into jiffies.
1337  *
1338  * Returns 0 on success.
1339  */
1340 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, void *buffer,
1341 		size_t *lenp, loff_t *ppos)
1342 {
1343 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
1344 				do_proc_dointvec_ms_jiffies_conv, NULL);
1345 }
1346 
1347 static int proc_do_cad_pid(struct ctl_table *table, int write, void *buffer,
1348 		size_t *lenp, loff_t *ppos)
1349 {
1350 	struct pid *new_pid;
1351 	pid_t tmp;
1352 	int r;
1353 
1354 	tmp = pid_vnr(cad_pid);
1355 
1356 	r = __do_proc_dointvec(&tmp, table, write, buffer,
1357 			       lenp, ppos, NULL, NULL);
1358 	if (r || !write)
1359 		return r;
1360 
1361 	new_pid = find_get_pid(tmp);
1362 	if (!new_pid)
1363 		return -ESRCH;
1364 
1365 	put_pid(xchg(&cad_pid, new_pid));
1366 	return 0;
1367 }
1368 
1369 /**
1370  * proc_do_large_bitmap - read/write from/to a large bitmap
1371  * @table: the sysctl table
1372  * @write: %TRUE if this is a write to the sysctl file
1373  * @buffer: the user buffer
1374  * @lenp: the size of the user buffer
1375  * @ppos: file position
1376  *
1377  * The bitmap is stored at table->data and the bitmap length (in bits)
1378  * in table->maxlen.
1379  *
1380  * We use a range comma separated format (e.g. 1,3-4,10-10) so that
1381  * large bitmaps may be represented in a compact manner. Writing into
1382  * the file will clear the bitmap then update it with the given input.
1383  *
1384  * Returns 0 on success.
1385  */
1386 int proc_do_large_bitmap(struct ctl_table *table, int write,
1387 			 void *buffer, size_t *lenp, loff_t *ppos)
1388 {
1389 	int err = 0;
1390 	size_t left = *lenp;
1391 	unsigned long bitmap_len = table->maxlen;
1392 	unsigned long *bitmap = *(unsigned long **) table->data;
1393 	unsigned long *tmp_bitmap = NULL;
1394 	char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
1395 
1396 	if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
1397 		*lenp = 0;
1398 		return 0;
1399 	}
1400 
1401 	if (write) {
1402 		char *p = buffer;
1403 		size_t skipped = 0;
1404 
1405 		if (left > PAGE_SIZE - 1) {
1406 			left = PAGE_SIZE - 1;
1407 			/* How much of the buffer we'll skip this pass */
1408 			skipped = *lenp - left;
1409 		}
1410 
1411 		tmp_bitmap = bitmap_zalloc(bitmap_len, GFP_KERNEL);
1412 		if (!tmp_bitmap)
1413 			return -ENOMEM;
1414 		proc_skip_char(&p, &left, '\n');
1415 		while (!err && left) {
1416 			unsigned long val_a, val_b;
1417 			bool neg;
1418 			size_t saved_left;
1419 
1420 			/* In case we stop parsing mid-number, we can reset */
1421 			saved_left = left;
1422 			err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
1423 					     sizeof(tr_a), &c);
1424 			/*
1425 			 * If we consumed the entirety of a truncated buffer or
1426 			 * only one char is left (may be a "-"), then stop here,
1427 			 * reset, & come back for more.
1428 			 */
1429 			if ((left <= 1) && skipped) {
1430 				left = saved_left;
1431 				break;
1432 			}
1433 
1434 			if (err)
1435 				break;
1436 			if (val_a >= bitmap_len || neg) {
1437 				err = -EINVAL;
1438 				break;
1439 			}
1440 
1441 			val_b = val_a;
1442 			if (left) {
1443 				p++;
1444 				left--;
1445 			}
1446 
1447 			if (c == '-') {
1448 				err = proc_get_long(&p, &left, &val_b,
1449 						     &neg, tr_b, sizeof(tr_b),
1450 						     &c);
1451 				/*
1452 				 * If we consumed all of a truncated buffer or
1453 				 * then stop here, reset, & come back for more.
1454 				 */
1455 				if (!left && skipped) {
1456 					left = saved_left;
1457 					break;
1458 				}
1459 
1460 				if (err)
1461 					break;
1462 				if (val_b >= bitmap_len || neg ||
1463 				    val_a > val_b) {
1464 					err = -EINVAL;
1465 					break;
1466 				}
1467 				if (left) {
1468 					p++;
1469 					left--;
1470 				}
1471 			}
1472 
1473 			bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
1474 			proc_skip_char(&p, &left, '\n');
1475 		}
1476 		left += skipped;
1477 	} else {
1478 		unsigned long bit_a, bit_b = 0;
1479 		bool first = 1;
1480 
1481 		while (left) {
1482 			bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
1483 			if (bit_a >= bitmap_len)
1484 				break;
1485 			bit_b = find_next_zero_bit(bitmap, bitmap_len,
1486 						   bit_a + 1) - 1;
1487 
1488 			if (!first)
1489 				proc_put_char(&buffer, &left, ',');
1490 			proc_put_long(&buffer, &left, bit_a, false);
1491 			if (bit_a != bit_b) {
1492 				proc_put_char(&buffer, &left, '-');
1493 				proc_put_long(&buffer, &left, bit_b, false);
1494 			}
1495 
1496 			first = 0; bit_b++;
1497 		}
1498 		proc_put_char(&buffer, &left, '\n');
1499 	}
1500 
1501 	if (!err) {
1502 		if (write) {
1503 			if (*ppos)
1504 				bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
1505 			else
1506 				bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
1507 		}
1508 		*lenp -= left;
1509 		*ppos += *lenp;
1510 	}
1511 
1512 	bitmap_free(tmp_bitmap);
1513 	return err;
1514 }
1515 
1516 #else /* CONFIG_PROC_SYSCTL */
1517 
1518 int proc_dostring(struct ctl_table *table, int write,
1519 		  void *buffer, size_t *lenp, loff_t *ppos)
1520 {
1521 	return -ENOSYS;
1522 }
1523 
1524 int proc_dobool(struct ctl_table *table, int write,
1525 		void *buffer, size_t *lenp, loff_t *ppos)
1526 {
1527 	return -ENOSYS;
1528 }
1529 
1530 int proc_dointvec(struct ctl_table *table, int write,
1531 		  void *buffer, size_t *lenp, loff_t *ppos)
1532 {
1533 	return -ENOSYS;
1534 }
1535 
1536 int proc_douintvec(struct ctl_table *table, int write,
1537 		  void *buffer, size_t *lenp, loff_t *ppos)
1538 {
1539 	return -ENOSYS;
1540 }
1541 
1542 int proc_dointvec_minmax(struct ctl_table *table, int write,
1543 		    void *buffer, size_t *lenp, loff_t *ppos)
1544 {
1545 	return -ENOSYS;
1546 }
1547 
1548 int proc_douintvec_minmax(struct ctl_table *table, int write,
1549 			  void *buffer, size_t *lenp, loff_t *ppos)
1550 {
1551 	return -ENOSYS;
1552 }
1553 
1554 int proc_dou8vec_minmax(struct ctl_table *table, int write,
1555 			void *buffer, size_t *lenp, loff_t *ppos)
1556 {
1557 	return -ENOSYS;
1558 }
1559 
1560 int proc_dointvec_jiffies(struct ctl_table *table, int write,
1561 		    void *buffer, size_t *lenp, loff_t *ppos)
1562 {
1563 	return -ENOSYS;
1564 }
1565 
1566 int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1567 				    void *buffer, size_t *lenp, loff_t *ppos)
1568 {
1569 	return -ENOSYS;
1570 }
1571 
1572 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
1573 		    void *buffer, size_t *lenp, loff_t *ppos)
1574 {
1575 	return -ENOSYS;
1576 }
1577 
1578 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
1579 			     void *buffer, size_t *lenp, loff_t *ppos)
1580 {
1581 	return -ENOSYS;
1582 }
1583 
1584 int proc_doulongvec_minmax(struct ctl_table *table, int write,
1585 		    void *buffer, size_t *lenp, loff_t *ppos)
1586 {
1587 	return -ENOSYS;
1588 }
1589 
1590 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1591 				      void *buffer, size_t *lenp, loff_t *ppos)
1592 {
1593 	return -ENOSYS;
1594 }
1595 
1596 int proc_do_large_bitmap(struct ctl_table *table, int write,
1597 			 void *buffer, size_t *lenp, loff_t *ppos)
1598 {
1599 	return -ENOSYS;
1600 }
1601 
1602 #endif /* CONFIG_PROC_SYSCTL */
1603 
1604 #if defined(CONFIG_SYSCTL)
1605 int proc_do_static_key(struct ctl_table *table, int write,
1606 		       void *buffer, size_t *lenp, loff_t *ppos)
1607 {
1608 	struct static_key *key = (struct static_key *)table->data;
1609 	static DEFINE_MUTEX(static_key_mutex);
1610 	int val, ret;
1611 	struct ctl_table tmp = {
1612 		.data   = &val,
1613 		.maxlen = sizeof(val),
1614 		.mode   = table->mode,
1615 		.extra1 = SYSCTL_ZERO,
1616 		.extra2 = SYSCTL_ONE,
1617 	};
1618 
1619 	if (write && !capable(CAP_SYS_ADMIN))
1620 		return -EPERM;
1621 
1622 	mutex_lock(&static_key_mutex);
1623 	val = static_key_enabled(key);
1624 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
1625 	if (write && !ret) {
1626 		if (val)
1627 			static_key_enable(key);
1628 		else
1629 			static_key_disable(key);
1630 	}
1631 	mutex_unlock(&static_key_mutex);
1632 	return ret;
1633 }
1634 
1635 static struct ctl_table kern_table[] = {
1636 	{
1637 		.procname	= "panic",
1638 		.data		= &panic_timeout,
1639 		.maxlen		= sizeof(int),
1640 		.mode		= 0644,
1641 		.proc_handler	= proc_dointvec,
1642 	},
1643 #ifdef CONFIG_PROC_SYSCTL
1644 	{
1645 		.procname	= "tainted",
1646 		.maxlen 	= sizeof(long),
1647 		.mode		= 0644,
1648 		.proc_handler	= proc_taint,
1649 	},
1650 	{
1651 		.procname	= "sysctl_writes_strict",
1652 		.data		= &sysctl_writes_strict,
1653 		.maxlen		= sizeof(int),
1654 		.mode		= 0644,
1655 		.proc_handler	= proc_dointvec_minmax,
1656 		.extra1		= SYSCTL_NEG_ONE,
1657 		.extra2		= SYSCTL_ONE,
1658 	},
1659 #endif
1660 	{
1661 		.procname	= "print-fatal-signals",
1662 		.data		= &print_fatal_signals,
1663 		.maxlen		= sizeof(int),
1664 		.mode		= 0644,
1665 		.proc_handler	= proc_dointvec,
1666 	},
1667 #ifdef CONFIG_SPARC
1668 	{
1669 		.procname	= "reboot-cmd",
1670 		.data		= reboot_command,
1671 		.maxlen		= 256,
1672 		.mode		= 0644,
1673 		.proc_handler	= proc_dostring,
1674 	},
1675 	{
1676 		.procname	= "stop-a",
1677 		.data		= &stop_a_enabled,
1678 		.maxlen		= sizeof (int),
1679 		.mode		= 0644,
1680 		.proc_handler	= proc_dointvec,
1681 	},
1682 	{
1683 		.procname	= "scons-poweroff",
1684 		.data		= &scons_pwroff,
1685 		.maxlen		= sizeof (int),
1686 		.mode		= 0644,
1687 		.proc_handler	= proc_dointvec,
1688 	},
1689 #endif
1690 #ifdef CONFIG_SPARC64
1691 	{
1692 		.procname	= "tsb-ratio",
1693 		.data		= &sysctl_tsb_ratio,
1694 		.maxlen		= sizeof (int),
1695 		.mode		= 0644,
1696 		.proc_handler	= proc_dointvec,
1697 	},
1698 #endif
1699 #ifdef CONFIG_PARISC
1700 	{
1701 		.procname	= "soft-power",
1702 		.data		= &pwrsw_enabled,
1703 		.maxlen		= sizeof (int),
1704 		.mode		= 0644,
1705 		.proc_handler	= proc_dointvec,
1706 	},
1707 #endif
1708 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
1709 	{
1710 		.procname	= "unaligned-trap",
1711 		.data		= &unaligned_enabled,
1712 		.maxlen		= sizeof (int),
1713 		.mode		= 0644,
1714 		.proc_handler	= proc_dointvec,
1715 	},
1716 #endif
1717 #ifdef CONFIG_STACK_TRACER
1718 	{
1719 		.procname	= "stack_tracer_enabled",
1720 		.data		= &stack_tracer_enabled,
1721 		.maxlen		= sizeof(int),
1722 		.mode		= 0644,
1723 		.proc_handler	= stack_trace_sysctl,
1724 	},
1725 #endif
1726 #ifdef CONFIG_TRACING
1727 	{
1728 		.procname	= "ftrace_dump_on_oops",
1729 		.data		= &ftrace_dump_on_oops,
1730 		.maxlen		= sizeof(int),
1731 		.mode		= 0644,
1732 		.proc_handler	= proc_dointvec,
1733 	},
1734 	{
1735 		.procname	= "traceoff_on_warning",
1736 		.data		= &__disable_trace_on_warning,
1737 		.maxlen		= sizeof(__disable_trace_on_warning),
1738 		.mode		= 0644,
1739 		.proc_handler	= proc_dointvec,
1740 	},
1741 	{
1742 		.procname	= "tracepoint_printk",
1743 		.data		= &tracepoint_printk,
1744 		.maxlen		= sizeof(tracepoint_printk),
1745 		.mode		= 0644,
1746 		.proc_handler	= tracepoint_printk_sysctl,
1747 	},
1748 #endif
1749 #ifdef CONFIG_MODULES
1750 	{
1751 		.procname	= "modprobe",
1752 		.data		= &modprobe_path,
1753 		.maxlen		= KMOD_PATH_LEN,
1754 		.mode		= 0644,
1755 		.proc_handler	= proc_dostring,
1756 	},
1757 	{
1758 		.procname	= "modules_disabled",
1759 		.data		= &modules_disabled,
1760 		.maxlen		= sizeof(int),
1761 		.mode		= 0644,
1762 		/* only handle a transition from default "0" to "1" */
1763 		.proc_handler	= proc_dointvec_minmax,
1764 		.extra1		= SYSCTL_ONE,
1765 		.extra2		= SYSCTL_ONE,
1766 	},
1767 #endif
1768 #ifdef CONFIG_UEVENT_HELPER
1769 	{
1770 		.procname	= "hotplug",
1771 		.data		= &uevent_helper,
1772 		.maxlen		= UEVENT_HELPER_PATH_LEN,
1773 		.mode		= 0644,
1774 		.proc_handler	= proc_dostring,
1775 	},
1776 #endif
1777 #ifdef CONFIG_MAGIC_SYSRQ
1778 	{
1779 		.procname	= "sysrq",
1780 		.data		= NULL,
1781 		.maxlen		= sizeof (int),
1782 		.mode		= 0644,
1783 		.proc_handler	= sysrq_sysctl_handler,
1784 	},
1785 #endif
1786 #ifdef CONFIG_PROC_SYSCTL
1787 	{
1788 		.procname	= "cad_pid",
1789 		.data		= NULL,
1790 		.maxlen		= sizeof (int),
1791 		.mode		= 0600,
1792 		.proc_handler	= proc_do_cad_pid,
1793 	},
1794 #endif
1795 	{
1796 		.procname	= "threads-max",
1797 		.data		= NULL,
1798 		.maxlen		= sizeof(int),
1799 		.mode		= 0644,
1800 		.proc_handler	= sysctl_max_threads,
1801 	},
1802 	{
1803 		.procname	= "usermodehelper",
1804 		.mode		= 0555,
1805 		.child		= usermodehelper_table,
1806 	},
1807 	{
1808 		.procname	= "overflowuid",
1809 		.data		= &overflowuid,
1810 		.maxlen		= sizeof(int),
1811 		.mode		= 0644,
1812 		.proc_handler	= proc_dointvec_minmax,
1813 		.extra1		= SYSCTL_ZERO,
1814 		.extra2		= SYSCTL_MAXOLDUID,
1815 	},
1816 	{
1817 		.procname	= "overflowgid",
1818 		.data		= &overflowgid,
1819 		.maxlen		= sizeof(int),
1820 		.mode		= 0644,
1821 		.proc_handler	= proc_dointvec_minmax,
1822 		.extra1		= SYSCTL_ZERO,
1823 		.extra2		= SYSCTL_MAXOLDUID,
1824 	},
1825 #ifdef CONFIG_S390
1826 	{
1827 		.procname	= "userprocess_debug",
1828 		.data		= &show_unhandled_signals,
1829 		.maxlen		= sizeof(int),
1830 		.mode		= 0644,
1831 		.proc_handler	= proc_dointvec,
1832 	},
1833 #endif
1834 	{
1835 		.procname	= "pid_max",
1836 		.data		= &pid_max,
1837 		.maxlen		= sizeof (int),
1838 		.mode		= 0644,
1839 		.proc_handler	= proc_dointvec_minmax,
1840 		.extra1		= &pid_max_min,
1841 		.extra2		= &pid_max_max,
1842 	},
1843 	{
1844 		.procname	= "panic_on_oops",
1845 		.data		= &panic_on_oops,
1846 		.maxlen		= sizeof(int),
1847 		.mode		= 0644,
1848 		.proc_handler	= proc_dointvec,
1849 	},
1850 	{
1851 		.procname	= "panic_print",
1852 		.data		= &panic_print,
1853 		.maxlen		= sizeof(unsigned long),
1854 		.mode		= 0644,
1855 		.proc_handler	= proc_doulongvec_minmax,
1856 	},
1857 	{
1858 		.procname	= "ngroups_max",
1859 		.data		= (void *)&ngroups_max,
1860 		.maxlen		= sizeof (int),
1861 		.mode		= 0444,
1862 		.proc_handler	= proc_dointvec,
1863 	},
1864 	{
1865 		.procname	= "cap_last_cap",
1866 		.data		= (void *)&cap_last_cap,
1867 		.maxlen		= sizeof(int),
1868 		.mode		= 0444,
1869 		.proc_handler	= proc_dointvec,
1870 	},
1871 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
1872 	{
1873 		.procname       = "unknown_nmi_panic",
1874 		.data           = &unknown_nmi_panic,
1875 		.maxlen         = sizeof (int),
1876 		.mode           = 0644,
1877 		.proc_handler   = proc_dointvec,
1878 	},
1879 #endif
1880 
1881 #if (defined(CONFIG_X86_32) || defined(CONFIG_PARISC)) && \
1882 	defined(CONFIG_DEBUG_STACKOVERFLOW)
1883 	{
1884 		.procname	= "panic_on_stackoverflow",
1885 		.data		= &sysctl_panic_on_stackoverflow,
1886 		.maxlen		= sizeof(int),
1887 		.mode		= 0644,
1888 		.proc_handler	= proc_dointvec,
1889 	},
1890 #endif
1891 #if defined(CONFIG_X86)
1892 	{
1893 		.procname	= "panic_on_unrecovered_nmi",
1894 		.data		= &panic_on_unrecovered_nmi,
1895 		.maxlen		= sizeof(int),
1896 		.mode		= 0644,
1897 		.proc_handler	= proc_dointvec,
1898 	},
1899 	{
1900 		.procname	= "panic_on_io_nmi",
1901 		.data		= &panic_on_io_nmi,
1902 		.maxlen		= sizeof(int),
1903 		.mode		= 0644,
1904 		.proc_handler	= proc_dointvec,
1905 	},
1906 	{
1907 		.procname	= "bootloader_type",
1908 		.data		= &bootloader_type,
1909 		.maxlen		= sizeof (int),
1910 		.mode		= 0444,
1911 		.proc_handler	= proc_dointvec,
1912 	},
1913 	{
1914 		.procname	= "bootloader_version",
1915 		.data		= &bootloader_version,
1916 		.maxlen		= sizeof (int),
1917 		.mode		= 0444,
1918 		.proc_handler	= proc_dointvec,
1919 	},
1920 	{
1921 		.procname	= "io_delay_type",
1922 		.data		= &io_delay_type,
1923 		.maxlen		= sizeof(int),
1924 		.mode		= 0644,
1925 		.proc_handler	= proc_dointvec,
1926 	},
1927 #endif
1928 #if defined(CONFIG_MMU)
1929 	{
1930 		.procname	= "randomize_va_space",
1931 		.data		= &randomize_va_space,
1932 		.maxlen		= sizeof(int),
1933 		.mode		= 0644,
1934 		.proc_handler	= proc_dointvec,
1935 	},
1936 #endif
1937 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
1938 	{
1939 		.procname	= "spin_retry",
1940 		.data		= &spin_retry,
1941 		.maxlen		= sizeof (int),
1942 		.mode		= 0644,
1943 		.proc_handler	= proc_dointvec,
1944 	},
1945 #endif
1946 #if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
1947 	{
1948 		.procname	= "acpi_video_flags",
1949 		.data		= &acpi_realmode_flags,
1950 		.maxlen		= sizeof (unsigned long),
1951 		.mode		= 0644,
1952 		.proc_handler	= proc_doulongvec_minmax,
1953 	},
1954 #endif
1955 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
1956 	{
1957 		.procname	= "ignore-unaligned-usertrap",
1958 		.data		= &no_unaligned_warning,
1959 		.maxlen		= sizeof (int),
1960 		.mode		= 0644,
1961 		.proc_handler	= proc_dointvec,
1962 	},
1963 #endif
1964 #ifdef CONFIG_IA64
1965 	{
1966 		.procname	= "unaligned-dump-stack",
1967 		.data		= &unaligned_dump_stack,
1968 		.maxlen		= sizeof (int),
1969 		.mode		= 0644,
1970 		.proc_handler	= proc_dointvec,
1971 	},
1972 #endif
1973 #ifdef CONFIG_RT_MUTEXES
1974 	{
1975 		.procname	= "max_lock_depth",
1976 		.data		= &max_lock_depth,
1977 		.maxlen		= sizeof(int),
1978 		.mode		= 0644,
1979 		.proc_handler	= proc_dointvec,
1980 	},
1981 #endif
1982 #ifdef CONFIG_KEYS
1983 	{
1984 		.procname	= "keys",
1985 		.mode		= 0555,
1986 		.child		= key_sysctls,
1987 	},
1988 #endif
1989 #ifdef CONFIG_PERF_EVENTS
1990 	/*
1991 	 * User-space scripts rely on the existence of this file
1992 	 * as a feature check for perf_events being enabled.
1993 	 *
1994 	 * So it's an ABI, do not remove!
1995 	 */
1996 	{
1997 		.procname	= "perf_event_paranoid",
1998 		.data		= &sysctl_perf_event_paranoid,
1999 		.maxlen		= sizeof(sysctl_perf_event_paranoid),
2000 		.mode		= 0644,
2001 		.proc_handler	= proc_dointvec,
2002 	},
2003 	{
2004 		.procname	= "perf_event_mlock_kb",
2005 		.data		= &sysctl_perf_event_mlock,
2006 		.maxlen		= sizeof(sysctl_perf_event_mlock),
2007 		.mode		= 0644,
2008 		.proc_handler	= proc_dointvec,
2009 	},
2010 	{
2011 		.procname	= "perf_event_max_sample_rate",
2012 		.data		= &sysctl_perf_event_sample_rate,
2013 		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
2014 		.mode		= 0644,
2015 		.proc_handler	= perf_proc_update_handler,
2016 		.extra1		= SYSCTL_ONE,
2017 	},
2018 	{
2019 		.procname	= "perf_cpu_time_max_percent",
2020 		.data		= &sysctl_perf_cpu_time_max_percent,
2021 		.maxlen		= sizeof(sysctl_perf_cpu_time_max_percent),
2022 		.mode		= 0644,
2023 		.proc_handler	= perf_cpu_time_max_percent_handler,
2024 		.extra1		= SYSCTL_ZERO,
2025 		.extra2		= SYSCTL_ONE_HUNDRED,
2026 	},
2027 	{
2028 		.procname	= "perf_event_max_stack",
2029 		.data		= &sysctl_perf_event_max_stack,
2030 		.maxlen		= sizeof(sysctl_perf_event_max_stack),
2031 		.mode		= 0644,
2032 		.proc_handler	= perf_event_max_stack_handler,
2033 		.extra1		= SYSCTL_ZERO,
2034 		.extra2		= (void *)&six_hundred_forty_kb,
2035 	},
2036 	{
2037 		.procname	= "perf_event_max_contexts_per_stack",
2038 		.data		= &sysctl_perf_event_max_contexts_per_stack,
2039 		.maxlen		= sizeof(sysctl_perf_event_max_contexts_per_stack),
2040 		.mode		= 0644,
2041 		.proc_handler	= perf_event_max_stack_handler,
2042 		.extra1		= SYSCTL_ZERO,
2043 		.extra2		= SYSCTL_ONE_THOUSAND,
2044 	},
2045 #endif
2046 	{
2047 		.procname	= "panic_on_warn",
2048 		.data		= &panic_on_warn,
2049 		.maxlen		= sizeof(int),
2050 		.mode		= 0644,
2051 		.proc_handler	= proc_dointvec_minmax,
2052 		.extra1		= SYSCTL_ZERO,
2053 		.extra2		= SYSCTL_ONE,
2054 	},
2055 #ifdef CONFIG_TREE_RCU
2056 	{
2057 		.procname	= "panic_on_rcu_stall",
2058 		.data		= &sysctl_panic_on_rcu_stall,
2059 		.maxlen		= sizeof(sysctl_panic_on_rcu_stall),
2060 		.mode		= 0644,
2061 		.proc_handler	= proc_dointvec_minmax,
2062 		.extra1		= SYSCTL_ZERO,
2063 		.extra2		= SYSCTL_ONE,
2064 	},
2065 	{
2066 		.procname	= "max_rcu_stall_to_panic",
2067 		.data		= &sysctl_max_rcu_stall_to_panic,
2068 		.maxlen		= sizeof(sysctl_max_rcu_stall_to_panic),
2069 		.mode		= 0644,
2070 		.proc_handler	= proc_dointvec_minmax,
2071 		.extra1		= SYSCTL_ONE,
2072 		.extra2		= SYSCTL_INT_MAX,
2073 	},
2074 #endif
2075 	{ }
2076 };
2077 
2078 static struct ctl_table vm_table[] = {
2079 	{
2080 		.procname	= "overcommit_memory",
2081 		.data		= &sysctl_overcommit_memory,
2082 		.maxlen		= sizeof(sysctl_overcommit_memory),
2083 		.mode		= 0644,
2084 		.proc_handler	= overcommit_policy_handler,
2085 		.extra1		= SYSCTL_ZERO,
2086 		.extra2		= SYSCTL_TWO,
2087 	},
2088 	{
2089 		.procname	= "overcommit_ratio",
2090 		.data		= &sysctl_overcommit_ratio,
2091 		.maxlen		= sizeof(sysctl_overcommit_ratio),
2092 		.mode		= 0644,
2093 		.proc_handler	= overcommit_ratio_handler,
2094 	},
2095 	{
2096 		.procname	= "overcommit_kbytes",
2097 		.data		= &sysctl_overcommit_kbytes,
2098 		.maxlen		= sizeof(sysctl_overcommit_kbytes),
2099 		.mode		= 0644,
2100 		.proc_handler	= overcommit_kbytes_handler,
2101 	},
2102 	{
2103 		.procname	= "page-cluster",
2104 		.data		= &page_cluster,
2105 		.maxlen		= sizeof(int),
2106 		.mode		= 0644,
2107 		.proc_handler	= proc_dointvec_minmax,
2108 		.extra1		= SYSCTL_ZERO,
2109 		.extra2		= (void *)&page_cluster_max,
2110 	},
2111 	{
2112 		.procname	= "dirtytime_expire_seconds",
2113 		.data		= &dirtytime_expire_interval,
2114 		.maxlen		= sizeof(dirtytime_expire_interval),
2115 		.mode		= 0644,
2116 		.proc_handler	= dirtytime_interval_handler,
2117 		.extra1		= SYSCTL_ZERO,
2118 	},
2119 	{
2120 		.procname	= "swappiness",
2121 		.data		= &vm_swappiness,
2122 		.maxlen		= sizeof(vm_swappiness),
2123 		.mode		= 0644,
2124 		.proc_handler	= proc_dointvec_minmax,
2125 		.extra1		= SYSCTL_ZERO,
2126 		.extra2		= SYSCTL_TWO_HUNDRED,
2127 	},
2128 #ifdef CONFIG_NUMA
2129 	{
2130 		.procname	= "numa_stat",
2131 		.data		= &sysctl_vm_numa_stat,
2132 		.maxlen		= sizeof(int),
2133 		.mode		= 0644,
2134 		.proc_handler	= sysctl_vm_numa_stat_handler,
2135 		.extra1		= SYSCTL_ZERO,
2136 		.extra2		= SYSCTL_ONE,
2137 	},
2138 #endif
2139 #ifdef CONFIG_HUGETLB_PAGE
2140 	{
2141 		.procname	= "nr_hugepages",
2142 		.data		= NULL,
2143 		.maxlen		= sizeof(unsigned long),
2144 		.mode		= 0644,
2145 		.proc_handler	= hugetlb_sysctl_handler,
2146 	},
2147 #ifdef CONFIG_NUMA
2148 	{
2149 		.procname       = "nr_hugepages_mempolicy",
2150 		.data           = NULL,
2151 		.maxlen         = sizeof(unsigned long),
2152 		.mode           = 0644,
2153 		.proc_handler   = &hugetlb_mempolicy_sysctl_handler,
2154 	},
2155 #endif
2156 	 {
2157 		.procname	= "hugetlb_shm_group",
2158 		.data		= &sysctl_hugetlb_shm_group,
2159 		.maxlen		= sizeof(gid_t),
2160 		.mode		= 0644,
2161 		.proc_handler	= proc_dointvec,
2162 	 },
2163 	{
2164 		.procname	= "nr_overcommit_hugepages",
2165 		.data		= NULL,
2166 		.maxlen		= sizeof(unsigned long),
2167 		.mode		= 0644,
2168 		.proc_handler	= hugetlb_overcommit_handler,
2169 	},
2170 #endif
2171 	{
2172 		.procname	= "lowmem_reserve_ratio",
2173 		.data		= &sysctl_lowmem_reserve_ratio,
2174 		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
2175 		.mode		= 0644,
2176 		.proc_handler	= lowmem_reserve_ratio_sysctl_handler,
2177 	},
2178 	{
2179 		.procname	= "drop_caches",
2180 		.data		= &sysctl_drop_caches,
2181 		.maxlen		= sizeof(int),
2182 		.mode		= 0200,
2183 		.proc_handler	= drop_caches_sysctl_handler,
2184 		.extra1		= SYSCTL_ONE,
2185 		.extra2		= SYSCTL_FOUR,
2186 	},
2187 #ifdef CONFIG_COMPACTION
2188 	{
2189 		.procname	= "compact_memory",
2190 		.data		= NULL,
2191 		.maxlen		= sizeof(int),
2192 		.mode		= 0200,
2193 		.proc_handler	= sysctl_compaction_handler,
2194 	},
2195 	{
2196 		.procname	= "compaction_proactiveness",
2197 		.data		= &sysctl_compaction_proactiveness,
2198 		.maxlen		= sizeof(sysctl_compaction_proactiveness),
2199 		.mode		= 0644,
2200 		.proc_handler	= compaction_proactiveness_sysctl_handler,
2201 		.extra1		= SYSCTL_ZERO,
2202 		.extra2		= SYSCTL_ONE_HUNDRED,
2203 	},
2204 	{
2205 		.procname	= "extfrag_threshold",
2206 		.data		= &sysctl_extfrag_threshold,
2207 		.maxlen		= sizeof(int),
2208 		.mode		= 0644,
2209 		.proc_handler	= proc_dointvec_minmax,
2210 		.extra1		= SYSCTL_ZERO,
2211 		.extra2		= SYSCTL_ONE_THOUSAND,
2212 	},
2213 	{
2214 		.procname	= "compact_unevictable_allowed",
2215 		.data		= &sysctl_compact_unevictable_allowed,
2216 		.maxlen		= sizeof(int),
2217 		.mode		= 0644,
2218 		.proc_handler	= proc_dointvec_minmax_warn_RT_change,
2219 		.extra1		= SYSCTL_ZERO,
2220 		.extra2		= SYSCTL_ONE,
2221 	},
2222 
2223 #endif /* CONFIG_COMPACTION */
2224 	{
2225 		.procname	= "min_free_kbytes",
2226 		.data		= &min_free_kbytes,
2227 		.maxlen		= sizeof(min_free_kbytes),
2228 		.mode		= 0644,
2229 		.proc_handler	= min_free_kbytes_sysctl_handler,
2230 		.extra1		= SYSCTL_ZERO,
2231 	},
2232 	{
2233 		.procname	= "watermark_boost_factor",
2234 		.data		= &watermark_boost_factor,
2235 		.maxlen		= sizeof(watermark_boost_factor),
2236 		.mode		= 0644,
2237 		.proc_handler	= proc_dointvec_minmax,
2238 		.extra1		= SYSCTL_ZERO,
2239 	},
2240 	{
2241 		.procname	= "watermark_scale_factor",
2242 		.data		= &watermark_scale_factor,
2243 		.maxlen		= sizeof(watermark_scale_factor),
2244 		.mode		= 0644,
2245 		.proc_handler	= watermark_scale_factor_sysctl_handler,
2246 		.extra1		= SYSCTL_ONE,
2247 		.extra2		= SYSCTL_THREE_THOUSAND,
2248 	},
2249 	{
2250 		.procname	= "percpu_pagelist_high_fraction",
2251 		.data		= &percpu_pagelist_high_fraction,
2252 		.maxlen		= sizeof(percpu_pagelist_high_fraction),
2253 		.mode		= 0644,
2254 		.proc_handler	= percpu_pagelist_high_fraction_sysctl_handler,
2255 		.extra1		= SYSCTL_ZERO,
2256 	},
2257 	{
2258 		.procname	= "page_lock_unfairness",
2259 		.data		= &sysctl_page_lock_unfairness,
2260 		.maxlen		= sizeof(sysctl_page_lock_unfairness),
2261 		.mode		= 0644,
2262 		.proc_handler	= proc_dointvec_minmax,
2263 		.extra1		= SYSCTL_ZERO,
2264 	},
2265 #ifdef CONFIG_MMU
2266 	{
2267 		.procname	= "max_map_count",
2268 		.data		= &sysctl_max_map_count,
2269 		.maxlen		= sizeof(sysctl_max_map_count),
2270 		.mode		= 0644,
2271 		.proc_handler	= proc_dointvec_minmax,
2272 		.extra1		= SYSCTL_ZERO,
2273 	},
2274 #else
2275 	{
2276 		.procname	= "nr_trim_pages",
2277 		.data		= &sysctl_nr_trim_pages,
2278 		.maxlen		= sizeof(sysctl_nr_trim_pages),
2279 		.mode		= 0644,
2280 		.proc_handler	= proc_dointvec_minmax,
2281 		.extra1		= SYSCTL_ZERO,
2282 	},
2283 #endif
2284 	{
2285 		.procname	= "vfs_cache_pressure",
2286 		.data		= &sysctl_vfs_cache_pressure,
2287 		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
2288 		.mode		= 0644,
2289 		.proc_handler	= proc_dointvec_minmax,
2290 		.extra1		= SYSCTL_ZERO,
2291 	},
2292 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
2293     defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
2294 	{
2295 		.procname	= "legacy_va_layout",
2296 		.data		= &sysctl_legacy_va_layout,
2297 		.maxlen		= sizeof(sysctl_legacy_va_layout),
2298 		.mode		= 0644,
2299 		.proc_handler	= proc_dointvec_minmax,
2300 		.extra1		= SYSCTL_ZERO,
2301 	},
2302 #endif
2303 #ifdef CONFIG_NUMA
2304 	{
2305 		.procname	= "zone_reclaim_mode",
2306 		.data		= &node_reclaim_mode,
2307 		.maxlen		= sizeof(node_reclaim_mode),
2308 		.mode		= 0644,
2309 		.proc_handler	= proc_dointvec_minmax,
2310 		.extra1		= SYSCTL_ZERO,
2311 	},
2312 	{
2313 		.procname	= "min_unmapped_ratio",
2314 		.data		= &sysctl_min_unmapped_ratio,
2315 		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
2316 		.mode		= 0644,
2317 		.proc_handler	= sysctl_min_unmapped_ratio_sysctl_handler,
2318 		.extra1		= SYSCTL_ZERO,
2319 		.extra2		= SYSCTL_ONE_HUNDRED,
2320 	},
2321 	{
2322 		.procname	= "min_slab_ratio",
2323 		.data		= &sysctl_min_slab_ratio,
2324 		.maxlen		= sizeof(sysctl_min_slab_ratio),
2325 		.mode		= 0644,
2326 		.proc_handler	= sysctl_min_slab_ratio_sysctl_handler,
2327 		.extra1		= SYSCTL_ZERO,
2328 		.extra2		= SYSCTL_ONE_HUNDRED,
2329 	},
2330 #endif
2331 #ifdef CONFIG_SMP
2332 	{
2333 		.procname	= "stat_interval",
2334 		.data		= &sysctl_stat_interval,
2335 		.maxlen		= sizeof(sysctl_stat_interval),
2336 		.mode		= 0644,
2337 		.proc_handler	= proc_dointvec_jiffies,
2338 	},
2339 	{
2340 		.procname	= "stat_refresh",
2341 		.data		= NULL,
2342 		.maxlen		= 0,
2343 		.mode		= 0600,
2344 		.proc_handler	= vmstat_refresh,
2345 	},
2346 #endif
2347 #ifdef CONFIG_MMU
2348 	{
2349 		.procname	= "mmap_min_addr",
2350 		.data		= &dac_mmap_min_addr,
2351 		.maxlen		= sizeof(unsigned long),
2352 		.mode		= 0644,
2353 		.proc_handler	= mmap_min_addr_handler,
2354 	},
2355 #endif
2356 #ifdef CONFIG_NUMA
2357 	{
2358 		.procname	= "numa_zonelist_order",
2359 		.data		= &numa_zonelist_order,
2360 		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
2361 		.mode		= 0644,
2362 		.proc_handler	= numa_zonelist_order_handler,
2363 	},
2364 #endif
2365 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
2366    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
2367 	{
2368 		.procname	= "vdso_enabled",
2369 #ifdef CONFIG_X86_32
2370 		.data		= &vdso32_enabled,
2371 		.maxlen		= sizeof(vdso32_enabled),
2372 #else
2373 		.data		= &vdso_enabled,
2374 		.maxlen		= sizeof(vdso_enabled),
2375 #endif
2376 		.mode		= 0644,
2377 		.proc_handler	= proc_dointvec,
2378 		.extra1		= SYSCTL_ZERO,
2379 	},
2380 #endif
2381 #ifdef CONFIG_MEMORY_FAILURE
2382 	{
2383 		.procname	= "memory_failure_early_kill",
2384 		.data		= &sysctl_memory_failure_early_kill,
2385 		.maxlen		= sizeof(sysctl_memory_failure_early_kill),
2386 		.mode		= 0644,
2387 		.proc_handler	= proc_dointvec_minmax,
2388 		.extra1		= SYSCTL_ZERO,
2389 		.extra2		= SYSCTL_ONE,
2390 	},
2391 	{
2392 		.procname	= "memory_failure_recovery",
2393 		.data		= &sysctl_memory_failure_recovery,
2394 		.maxlen		= sizeof(sysctl_memory_failure_recovery),
2395 		.mode		= 0644,
2396 		.proc_handler	= proc_dointvec_minmax,
2397 		.extra1		= SYSCTL_ZERO,
2398 		.extra2		= SYSCTL_ONE,
2399 	},
2400 #endif
2401 	{
2402 		.procname	= "user_reserve_kbytes",
2403 		.data		= &sysctl_user_reserve_kbytes,
2404 		.maxlen		= sizeof(sysctl_user_reserve_kbytes),
2405 		.mode		= 0644,
2406 		.proc_handler	= proc_doulongvec_minmax,
2407 	},
2408 	{
2409 		.procname	= "admin_reserve_kbytes",
2410 		.data		= &sysctl_admin_reserve_kbytes,
2411 		.maxlen		= sizeof(sysctl_admin_reserve_kbytes),
2412 		.mode		= 0644,
2413 		.proc_handler	= proc_doulongvec_minmax,
2414 	},
2415 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
2416 	{
2417 		.procname	= "mmap_rnd_bits",
2418 		.data		= &mmap_rnd_bits,
2419 		.maxlen		= sizeof(mmap_rnd_bits),
2420 		.mode		= 0600,
2421 		.proc_handler	= proc_dointvec_minmax,
2422 		.extra1		= (void *)&mmap_rnd_bits_min,
2423 		.extra2		= (void *)&mmap_rnd_bits_max,
2424 	},
2425 #endif
2426 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
2427 	{
2428 		.procname	= "mmap_rnd_compat_bits",
2429 		.data		= &mmap_rnd_compat_bits,
2430 		.maxlen		= sizeof(mmap_rnd_compat_bits),
2431 		.mode		= 0600,
2432 		.proc_handler	= proc_dointvec_minmax,
2433 		.extra1		= (void *)&mmap_rnd_compat_bits_min,
2434 		.extra2		= (void *)&mmap_rnd_compat_bits_max,
2435 	},
2436 #endif
2437 #ifdef CONFIG_USERFAULTFD
2438 	{
2439 		.procname	= "unprivileged_userfaultfd",
2440 		.data		= &sysctl_unprivileged_userfaultfd,
2441 		.maxlen		= sizeof(sysctl_unprivileged_userfaultfd),
2442 		.mode		= 0644,
2443 		.proc_handler	= proc_dointvec_minmax,
2444 		.extra1		= SYSCTL_ZERO,
2445 		.extra2		= SYSCTL_ONE,
2446 	},
2447 #endif
2448 	{ }
2449 };
2450 
2451 static struct ctl_table debug_table[] = {
2452 #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
2453 	{
2454 		.procname	= "exception-trace",
2455 		.data		= &show_unhandled_signals,
2456 		.maxlen		= sizeof(int),
2457 		.mode		= 0644,
2458 		.proc_handler	= proc_dointvec
2459 	},
2460 #endif
2461 	{ }
2462 };
2463 
2464 static struct ctl_table dev_table[] = {
2465 	{ }
2466 };
2467 
2468 DECLARE_SYSCTL_BASE(kernel, kern_table);
2469 DECLARE_SYSCTL_BASE(vm, vm_table);
2470 DECLARE_SYSCTL_BASE(debug, debug_table);
2471 DECLARE_SYSCTL_BASE(dev, dev_table);
2472 
2473 int __init sysctl_init_bases(void)
2474 {
2475 	register_sysctl_base(kernel);
2476 	register_sysctl_base(vm);
2477 	register_sysctl_base(debug);
2478 	register_sysctl_base(dev);
2479 
2480 	return 0;
2481 }
2482 #endif /* CONFIG_SYSCTL */
2483 /*
2484  * No sense putting this after each symbol definition, twice,
2485  * exception granted :-)
2486  */
2487 EXPORT_SYMBOL(proc_dobool);
2488 EXPORT_SYMBOL(proc_dointvec);
2489 EXPORT_SYMBOL(proc_douintvec);
2490 EXPORT_SYMBOL(proc_dointvec_jiffies);
2491 EXPORT_SYMBOL(proc_dointvec_minmax);
2492 EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
2493 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2494 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2495 EXPORT_SYMBOL(proc_dostring);
2496 EXPORT_SYMBOL(proc_doulongvec_minmax);
2497 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2498 EXPORT_SYMBOL(proc_do_large_bitmap);
2499