xref: /openbmc/linux/kernel/sysctl.c (revision e6cfaf34be9fcd1a8285a294e18986bfc41a409c)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * sysctl.c: General linux system control interface
4  *
5  * Begun 24 March 1995, Stephen Tweedie
6  * Added /proc support, Dec 1995
7  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
8  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
9  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
10  * Dynamic registration fixes, Stephen Tweedie.
11  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
12  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
13  *  Horn.
14  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
15  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
16  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
17  *  Wendling.
18  * The list_for_each() macro wasn't appropriate for the sysctl loop.
19  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
20  */
21 
22 #include <linux/module.h>
23 #include <linux/mm.h>
24 #include <linux/swap.h>
25 #include <linux/slab.h>
26 #include <linux/sysctl.h>
27 #include <linux/bitmap.h>
28 #include <linux/signal.h>
29 #include <linux/panic.h>
30 #include <linux/printk.h>
31 #include <linux/proc_fs.h>
32 #include <linux/security.h>
33 #include <linux/ctype.h>
34 #include <linux/kmemleak.h>
35 #include <linux/filter.h>
36 #include <linux/fs.h>
37 #include <linux/init.h>
38 #include <linux/kernel.h>
39 #include <linux/kobject.h>
40 #include <linux/net.h>
41 #include <linux/sysrq.h>
42 #include <linux/highuid.h>
43 #include <linux/writeback.h>
44 #include <linux/ratelimit.h>
45 #include <linux/compaction.h>
46 #include <linux/hugetlb.h>
47 #include <linux/initrd.h>
48 #include <linux/key.h>
49 #include <linux/times.h>
50 #include <linux/limits.h>
51 #include <linux/dcache.h>
52 #include <linux/syscalls.h>
53 #include <linux/vmstat.h>
54 #include <linux/nfs_fs.h>
55 #include <linux/acpi.h>
56 #include <linux/reboot.h>
57 #include <linux/ftrace.h>
58 #include <linux/perf_event.h>
59 #include <linux/oom.h>
60 #include <linux/kmod.h>
61 #include <linux/capability.h>
62 #include <linux/binfmts.h>
63 #include <linux/sched/sysctl.h>
64 #include <linux/mount.h>
65 #include <linux/userfaultfd_k.h>
66 #include <linux/pid.h>
67 
68 #include "../lib/kstrtox.h"
69 
70 #include <linux/uaccess.h>
71 #include <asm/processor.h>
72 
73 #ifdef CONFIG_X86
74 #include <asm/nmi.h>
75 #include <asm/stacktrace.h>
76 #include <asm/io.h>
77 #endif
78 #ifdef CONFIG_SPARC
79 #include <asm/setup.h>
80 #endif
81 #ifdef CONFIG_RT_MUTEXES
82 #include <linux/rtmutex.h>
83 #endif
84 
85 /* shared constants to be used in various sysctls */
86 const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 };
87 EXPORT_SYMBOL(sysctl_vals);
88 
89 const unsigned long sysctl_long_vals[] = { 0, 1, LONG_MAX };
90 EXPORT_SYMBOL_GPL(sysctl_long_vals);
91 
92 #if defined(CONFIG_SYSCTL)
93 
94 /* Constants used for minimum and maximum */
95 
96 #ifdef CONFIG_PERF_EVENTS
97 static const int six_hundred_forty_kb = 640 * 1024;
98 #endif
99 
100 
101 static const int ngroups_max = NGROUPS_MAX;
102 static const int cap_last_cap = CAP_LAST_CAP;
103 
104 #ifdef CONFIG_PROC_SYSCTL
105 
106 /**
107  * enum sysctl_writes_mode - supported sysctl write modes
108  *
109  * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
110  *	to be written, and multiple writes on the same sysctl file descriptor
111  *	will rewrite the sysctl value, regardless of file position. No warning
112  *	is issued when the initial position is not 0.
113  * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
114  *	not 0.
115  * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
116  *	file position 0 and the value must be fully contained in the buffer
117  *	sent to the write syscall. If dealing with strings respect the file
118  *	position, but restrict this to the max length of the buffer, anything
119  *	passed the max length will be ignored. Multiple writes will append
120  *	to the buffer.
121  *
122  * These write modes control how current file position affects the behavior of
123  * updating sysctl values through the proc interface on each write.
124  */
125 enum sysctl_writes_mode {
126 	SYSCTL_WRITES_LEGACY		= -1,
127 	SYSCTL_WRITES_WARN		= 0,
128 	SYSCTL_WRITES_STRICT		= 1,
129 };
130 
131 static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
132 #endif /* CONFIG_PROC_SYSCTL */
133 
134 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
135     defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
136 int sysctl_legacy_va_layout;
137 #endif
138 
139 #endif /* CONFIG_SYSCTL */
140 
141 /*
142  * /proc/sys support
143  */
144 
145 #ifdef CONFIG_PROC_SYSCTL
146 
147 static int _proc_do_string(char *data, int maxlen, int write,
148 		char *buffer, size_t *lenp, loff_t *ppos)
149 {
150 	size_t len;
151 	char c, *p;
152 
153 	if (!data || !maxlen || !*lenp) {
154 		*lenp = 0;
155 		return 0;
156 	}
157 
158 	if (write) {
159 		if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
160 			/* Only continue writes not past the end of buffer. */
161 			len = strlen(data);
162 			if (len > maxlen - 1)
163 				len = maxlen - 1;
164 
165 			if (*ppos > len)
166 				return 0;
167 			len = *ppos;
168 		} else {
169 			/* Start writing from beginning of buffer. */
170 			len = 0;
171 		}
172 
173 		*ppos += *lenp;
174 		p = buffer;
175 		while ((p - buffer) < *lenp && len < maxlen - 1) {
176 			c = *(p++);
177 			if (c == 0 || c == '\n')
178 				break;
179 			data[len++] = c;
180 		}
181 		data[len] = 0;
182 	} else {
183 		len = strlen(data);
184 		if (len > maxlen)
185 			len = maxlen;
186 
187 		if (*ppos > len) {
188 			*lenp = 0;
189 			return 0;
190 		}
191 
192 		data += *ppos;
193 		len  -= *ppos;
194 
195 		if (len > *lenp)
196 			len = *lenp;
197 		if (len)
198 			memcpy(buffer, data, len);
199 		if (len < *lenp) {
200 			buffer[len] = '\n';
201 			len++;
202 		}
203 		*lenp = len;
204 		*ppos += len;
205 	}
206 	return 0;
207 }
208 
209 static void warn_sysctl_write(struct ctl_table *table)
210 {
211 	pr_warn_once("%s wrote to %s when file position was not 0!\n"
212 		"This will not be supported in the future. To silence this\n"
213 		"warning, set kernel.sysctl_writes_strict = -1\n",
214 		current->comm, table->procname);
215 }
216 
217 /**
218  * proc_first_pos_non_zero_ignore - check if first position is allowed
219  * @ppos: file position
220  * @table: the sysctl table
221  *
222  * Returns true if the first position is non-zero and the sysctl_writes_strict
223  * mode indicates this is not allowed for numeric input types. String proc
224  * handlers can ignore the return value.
225  */
226 static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
227 					   struct ctl_table *table)
228 {
229 	if (!*ppos)
230 		return false;
231 
232 	switch (sysctl_writes_strict) {
233 	case SYSCTL_WRITES_STRICT:
234 		return true;
235 	case SYSCTL_WRITES_WARN:
236 		warn_sysctl_write(table);
237 		return false;
238 	default:
239 		return false;
240 	}
241 }
242 
243 /**
244  * proc_dostring - read a string sysctl
245  * @table: the sysctl table
246  * @write: %TRUE if this is a write to the sysctl file
247  * @buffer: the user buffer
248  * @lenp: the size of the user buffer
249  * @ppos: file position
250  *
251  * Reads/writes a string from/to the user buffer. If the kernel
252  * buffer provided is not large enough to hold the string, the
253  * string is truncated. The copied string is %NULL-terminated.
254  * If the string is being read by the user process, it is copied
255  * and a newline '\n' is added. It is truncated if the buffer is
256  * not large enough.
257  *
258  * Returns 0 on success.
259  */
260 int proc_dostring(struct ctl_table *table, int write,
261 		  void *buffer, size_t *lenp, loff_t *ppos)
262 {
263 	if (write)
264 		proc_first_pos_non_zero_ignore(ppos, table);
265 
266 	return _proc_do_string(table->data, table->maxlen, write, buffer, lenp,
267 			ppos);
268 }
269 
270 static size_t proc_skip_spaces(char **buf)
271 {
272 	size_t ret;
273 	char *tmp = skip_spaces(*buf);
274 	ret = tmp - *buf;
275 	*buf = tmp;
276 	return ret;
277 }
278 
279 static void proc_skip_char(char **buf, size_t *size, const char v)
280 {
281 	while (*size) {
282 		if (**buf != v)
283 			break;
284 		(*size)--;
285 		(*buf)++;
286 	}
287 }
288 
289 /**
290  * strtoul_lenient - parse an ASCII formatted integer from a buffer and only
291  *                   fail on overflow
292  *
293  * @cp: kernel buffer containing the string to parse
294  * @endp: pointer to store the trailing characters
295  * @base: the base to use
296  * @res: where the parsed integer will be stored
297  *
298  * In case of success 0 is returned and @res will contain the parsed integer,
299  * @endp will hold any trailing characters.
300  * This function will fail the parse on overflow. If there wasn't an overflow
301  * the function will defer the decision what characters count as invalid to the
302  * caller.
303  */
304 static int strtoul_lenient(const char *cp, char **endp, unsigned int base,
305 			   unsigned long *res)
306 {
307 	unsigned long long result;
308 	unsigned int rv;
309 
310 	cp = _parse_integer_fixup_radix(cp, &base);
311 	rv = _parse_integer(cp, base, &result);
312 	if ((rv & KSTRTOX_OVERFLOW) || (result != (unsigned long)result))
313 		return -ERANGE;
314 
315 	cp += rv;
316 
317 	if (endp)
318 		*endp = (char *)cp;
319 
320 	*res = (unsigned long)result;
321 	return 0;
322 }
323 
324 #define TMPBUFLEN 22
325 /**
326  * proc_get_long - reads an ASCII formatted integer from a user buffer
327  *
328  * @buf: a kernel buffer
329  * @size: size of the kernel buffer
330  * @val: this is where the number will be stored
331  * @neg: set to %TRUE if number is negative
332  * @perm_tr: a vector which contains the allowed trailers
333  * @perm_tr_len: size of the perm_tr vector
334  * @tr: pointer to store the trailer character
335  *
336  * In case of success %0 is returned and @buf and @size are updated with
337  * the amount of bytes read. If @tr is non-NULL and a trailing
338  * character exists (size is non-zero after returning from this
339  * function), @tr is updated with the trailing character.
340  */
341 static int proc_get_long(char **buf, size_t *size,
342 			  unsigned long *val, bool *neg,
343 			  const char *perm_tr, unsigned perm_tr_len, char *tr)
344 {
345 	char *p, tmp[TMPBUFLEN];
346 	ssize_t len = *size;
347 
348 	if (len <= 0)
349 		return -EINVAL;
350 
351 	if (len > TMPBUFLEN - 1)
352 		len = TMPBUFLEN - 1;
353 
354 	memcpy(tmp, *buf, len);
355 
356 	tmp[len] = 0;
357 	p = tmp;
358 	if (*p == '-' && *size > 1) {
359 		*neg = true;
360 		p++;
361 	} else
362 		*neg = false;
363 	if (!isdigit(*p))
364 		return -EINVAL;
365 
366 	if (strtoul_lenient(p, &p, 0, val))
367 		return -EINVAL;
368 
369 	len = p - tmp;
370 
371 	/* We don't know if the next char is whitespace thus we may accept
372 	 * invalid integers (e.g. 1234...a) or two integers instead of one
373 	 * (e.g. 123...1). So lets not allow such large numbers. */
374 	if (len == TMPBUFLEN - 1)
375 		return -EINVAL;
376 
377 	if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
378 		return -EINVAL;
379 
380 	if (tr && (len < *size))
381 		*tr = *p;
382 
383 	*buf += len;
384 	*size -= len;
385 
386 	return 0;
387 }
388 
389 /**
390  * proc_put_long - converts an integer to a decimal ASCII formatted string
391  *
392  * @buf: the user buffer
393  * @size: the size of the user buffer
394  * @val: the integer to be converted
395  * @neg: sign of the number, %TRUE for negative
396  *
397  * In case of success @buf and @size are updated with the amount of bytes
398  * written.
399  */
400 static void proc_put_long(void **buf, size_t *size, unsigned long val, bool neg)
401 {
402 	int len;
403 	char tmp[TMPBUFLEN], *p = tmp;
404 
405 	sprintf(p, "%s%lu", neg ? "-" : "", val);
406 	len = strlen(tmp);
407 	if (len > *size)
408 		len = *size;
409 	memcpy(*buf, tmp, len);
410 	*size -= len;
411 	*buf += len;
412 }
413 #undef TMPBUFLEN
414 
415 static void proc_put_char(void **buf, size_t *size, char c)
416 {
417 	if (*size) {
418 		char **buffer = (char **)buf;
419 		**buffer = c;
420 
421 		(*size)--;
422 		(*buffer)++;
423 		*buf = *buffer;
424 	}
425 }
426 
427 static int do_proc_dobool_conv(bool *negp, unsigned long *lvalp,
428 				int *valp,
429 				int write, void *data)
430 {
431 	if (write) {
432 		*(bool *)valp = *lvalp;
433 	} else {
434 		int val = *(bool *)valp;
435 
436 		*lvalp = (unsigned long)val;
437 		*negp = false;
438 	}
439 	return 0;
440 }
441 
442 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
443 				 int *valp,
444 				 int write, void *data)
445 {
446 	if (write) {
447 		if (*negp) {
448 			if (*lvalp > (unsigned long) INT_MAX + 1)
449 				return -EINVAL;
450 			WRITE_ONCE(*valp, -*lvalp);
451 		} else {
452 			if (*lvalp > (unsigned long) INT_MAX)
453 				return -EINVAL;
454 			WRITE_ONCE(*valp, *lvalp);
455 		}
456 	} else {
457 		int val = READ_ONCE(*valp);
458 		if (val < 0) {
459 			*negp = true;
460 			*lvalp = -(unsigned long)val;
461 		} else {
462 			*negp = false;
463 			*lvalp = (unsigned long)val;
464 		}
465 	}
466 	return 0;
467 }
468 
469 static int do_proc_douintvec_conv(unsigned long *lvalp,
470 				  unsigned int *valp,
471 				  int write, void *data)
472 {
473 	if (write) {
474 		if (*lvalp > UINT_MAX)
475 			return -EINVAL;
476 		WRITE_ONCE(*valp, *lvalp);
477 	} else {
478 		unsigned int val = READ_ONCE(*valp);
479 		*lvalp = (unsigned long)val;
480 	}
481 	return 0;
482 }
483 
484 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
485 
486 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
487 		  int write, void *buffer,
488 		  size_t *lenp, loff_t *ppos,
489 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
490 			      int write, void *data),
491 		  void *data)
492 {
493 	int *i, vleft, first = 1, err = 0;
494 	size_t left;
495 	char *p;
496 
497 	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
498 		*lenp = 0;
499 		return 0;
500 	}
501 
502 	i = (int *) tbl_data;
503 	vleft = table->maxlen / sizeof(*i);
504 	left = *lenp;
505 
506 	if (!conv)
507 		conv = do_proc_dointvec_conv;
508 
509 	if (write) {
510 		if (proc_first_pos_non_zero_ignore(ppos, table))
511 			goto out;
512 
513 		if (left > PAGE_SIZE - 1)
514 			left = PAGE_SIZE - 1;
515 		p = buffer;
516 	}
517 
518 	for (; left && vleft--; i++, first=0) {
519 		unsigned long lval;
520 		bool neg;
521 
522 		if (write) {
523 			left -= proc_skip_spaces(&p);
524 
525 			if (!left)
526 				break;
527 			err = proc_get_long(&p, &left, &lval, &neg,
528 					     proc_wspace_sep,
529 					     sizeof(proc_wspace_sep), NULL);
530 			if (err)
531 				break;
532 			if (conv(&neg, &lval, i, 1, data)) {
533 				err = -EINVAL;
534 				break;
535 			}
536 		} else {
537 			if (conv(&neg, &lval, i, 0, data)) {
538 				err = -EINVAL;
539 				break;
540 			}
541 			if (!first)
542 				proc_put_char(&buffer, &left, '\t');
543 			proc_put_long(&buffer, &left, lval, neg);
544 		}
545 	}
546 
547 	if (!write && !first && left && !err)
548 		proc_put_char(&buffer, &left, '\n');
549 	if (write && !err && left)
550 		left -= proc_skip_spaces(&p);
551 	if (write && first)
552 		return err ? : -EINVAL;
553 	*lenp -= left;
554 out:
555 	*ppos += *lenp;
556 	return err;
557 }
558 
559 static int do_proc_dointvec(struct ctl_table *table, int write,
560 		  void *buffer, size_t *lenp, loff_t *ppos,
561 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
562 			      int write, void *data),
563 		  void *data)
564 {
565 	return __do_proc_dointvec(table->data, table, write,
566 			buffer, lenp, ppos, conv, data);
567 }
568 
569 static int do_proc_douintvec_w(unsigned int *tbl_data,
570 			       struct ctl_table *table,
571 			       void *buffer,
572 			       size_t *lenp, loff_t *ppos,
573 			       int (*conv)(unsigned long *lvalp,
574 					   unsigned int *valp,
575 					   int write, void *data),
576 			       void *data)
577 {
578 	unsigned long lval;
579 	int err = 0;
580 	size_t left;
581 	bool neg;
582 	char *p = buffer;
583 
584 	left = *lenp;
585 
586 	if (proc_first_pos_non_zero_ignore(ppos, table))
587 		goto bail_early;
588 
589 	if (left > PAGE_SIZE - 1)
590 		left = PAGE_SIZE - 1;
591 
592 	left -= proc_skip_spaces(&p);
593 	if (!left) {
594 		err = -EINVAL;
595 		goto out_free;
596 	}
597 
598 	err = proc_get_long(&p, &left, &lval, &neg,
599 			     proc_wspace_sep,
600 			     sizeof(proc_wspace_sep), NULL);
601 	if (err || neg) {
602 		err = -EINVAL;
603 		goto out_free;
604 	}
605 
606 	if (conv(&lval, tbl_data, 1, data)) {
607 		err = -EINVAL;
608 		goto out_free;
609 	}
610 
611 	if (!err && left)
612 		left -= proc_skip_spaces(&p);
613 
614 out_free:
615 	if (err)
616 		return -EINVAL;
617 
618 	return 0;
619 
620 	/* This is in keeping with old __do_proc_dointvec() */
621 bail_early:
622 	*ppos += *lenp;
623 	return err;
624 }
625 
626 static int do_proc_douintvec_r(unsigned int *tbl_data, void *buffer,
627 			       size_t *lenp, loff_t *ppos,
628 			       int (*conv)(unsigned long *lvalp,
629 					   unsigned int *valp,
630 					   int write, void *data),
631 			       void *data)
632 {
633 	unsigned long lval;
634 	int err = 0;
635 	size_t left;
636 
637 	left = *lenp;
638 
639 	if (conv(&lval, tbl_data, 0, data)) {
640 		err = -EINVAL;
641 		goto out;
642 	}
643 
644 	proc_put_long(&buffer, &left, lval, false);
645 	if (!left)
646 		goto out;
647 
648 	proc_put_char(&buffer, &left, '\n');
649 
650 out:
651 	*lenp -= left;
652 	*ppos += *lenp;
653 
654 	return err;
655 }
656 
657 static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
658 			       int write, void *buffer,
659 			       size_t *lenp, loff_t *ppos,
660 			       int (*conv)(unsigned long *lvalp,
661 					   unsigned int *valp,
662 					   int write, void *data),
663 			       void *data)
664 {
665 	unsigned int *i, vleft;
666 
667 	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
668 		*lenp = 0;
669 		return 0;
670 	}
671 
672 	i = (unsigned int *) tbl_data;
673 	vleft = table->maxlen / sizeof(*i);
674 
675 	/*
676 	 * Arrays are not supported, keep this simple. *Do not* add
677 	 * support for them.
678 	 */
679 	if (vleft != 1) {
680 		*lenp = 0;
681 		return -EINVAL;
682 	}
683 
684 	if (!conv)
685 		conv = do_proc_douintvec_conv;
686 
687 	if (write)
688 		return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
689 					   conv, data);
690 	return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
691 }
692 
693 int do_proc_douintvec(struct ctl_table *table, int write,
694 		      void *buffer, size_t *lenp, loff_t *ppos,
695 		      int (*conv)(unsigned long *lvalp,
696 				  unsigned int *valp,
697 				  int write, void *data),
698 		      void *data)
699 {
700 	return __do_proc_douintvec(table->data, table, write,
701 				   buffer, lenp, ppos, conv, data);
702 }
703 
704 /**
705  * proc_dobool - read/write a bool
706  * @table: the sysctl table
707  * @write: %TRUE if this is a write to the sysctl file
708  * @buffer: the user buffer
709  * @lenp: the size of the user buffer
710  * @ppos: file position
711  *
712  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
713  * values from/to the user buffer, treated as an ASCII string.
714  *
715  * Returns 0 on success.
716  */
717 int proc_dobool(struct ctl_table *table, int write, void *buffer,
718 		size_t *lenp, loff_t *ppos)
719 {
720 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
721 				do_proc_dobool_conv, NULL);
722 }
723 
724 /**
725  * proc_dointvec - read a vector of integers
726  * @table: the sysctl table
727  * @write: %TRUE if this is a write to the sysctl file
728  * @buffer: the user buffer
729  * @lenp: the size of the user buffer
730  * @ppos: file position
731  *
732  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
733  * values from/to the user buffer, treated as an ASCII string.
734  *
735  * Returns 0 on success.
736  */
737 int proc_dointvec(struct ctl_table *table, int write, void *buffer,
738 		  size_t *lenp, loff_t *ppos)
739 {
740 	return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
741 }
742 
743 #ifdef CONFIG_COMPACTION
744 static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table,
745 		int write, void *buffer, size_t *lenp, loff_t *ppos)
746 {
747 	int ret, old;
748 
749 	if (!IS_ENABLED(CONFIG_PREEMPT_RT) || !write)
750 		return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
751 
752 	old = *(int *)table->data;
753 	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
754 	if (ret)
755 		return ret;
756 	if (old != *(int *)table->data)
757 		pr_warn_once("sysctl attribute %s changed by %s[%d]\n",
758 			     table->procname, current->comm,
759 			     task_pid_nr(current));
760 	return ret;
761 }
762 #endif
763 
764 /**
765  * proc_douintvec - read a vector of unsigned integers
766  * @table: the sysctl table
767  * @write: %TRUE if this is a write to the sysctl file
768  * @buffer: the user buffer
769  * @lenp: the size of the user buffer
770  * @ppos: file position
771  *
772  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
773  * values from/to the user buffer, treated as an ASCII string.
774  *
775  * Returns 0 on success.
776  */
777 int proc_douintvec(struct ctl_table *table, int write, void *buffer,
778 		size_t *lenp, loff_t *ppos)
779 {
780 	return do_proc_douintvec(table, write, buffer, lenp, ppos,
781 				 do_proc_douintvec_conv, NULL);
782 }
783 
784 /*
785  * Taint values can only be increased
786  * This means we can safely use a temporary.
787  */
788 static int proc_taint(struct ctl_table *table, int write,
789 			       void *buffer, size_t *lenp, loff_t *ppos)
790 {
791 	struct ctl_table t;
792 	unsigned long tmptaint = get_taint();
793 	int err;
794 
795 	if (write && !capable(CAP_SYS_ADMIN))
796 		return -EPERM;
797 
798 	t = *table;
799 	t.data = &tmptaint;
800 	err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
801 	if (err < 0)
802 		return err;
803 
804 	if (write) {
805 		int i;
806 
807 		/*
808 		 * If we are relying on panic_on_taint not producing
809 		 * false positives due to userspace input, bail out
810 		 * before setting the requested taint flags.
811 		 */
812 		if (panic_on_taint_nousertaint && (tmptaint & panic_on_taint))
813 			return -EINVAL;
814 
815 		/*
816 		 * Poor man's atomic or. Not worth adding a primitive
817 		 * to everyone's atomic.h for this
818 		 */
819 		for (i = 0; i < TAINT_FLAGS_COUNT; i++)
820 			if ((1UL << i) & tmptaint)
821 				add_taint(i, LOCKDEP_STILL_OK);
822 	}
823 
824 	return err;
825 }
826 
827 /**
828  * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
829  * @min: pointer to minimum allowable value
830  * @max: pointer to maximum allowable value
831  *
832  * The do_proc_dointvec_minmax_conv_param structure provides the
833  * minimum and maximum values for doing range checking for those sysctl
834  * parameters that use the proc_dointvec_minmax() handler.
835  */
836 struct do_proc_dointvec_minmax_conv_param {
837 	int *min;
838 	int *max;
839 };
840 
841 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
842 					int *valp,
843 					int write, void *data)
844 {
845 	int tmp, ret;
846 	struct do_proc_dointvec_minmax_conv_param *param = data;
847 	/*
848 	 * If writing, first do so via a temporary local int so we can
849 	 * bounds-check it before touching *valp.
850 	 */
851 	int *ip = write ? &tmp : valp;
852 
853 	ret = do_proc_dointvec_conv(negp, lvalp, ip, write, data);
854 	if (ret)
855 		return ret;
856 
857 	if (write) {
858 		if ((param->min && *param->min > tmp) ||
859 		    (param->max && *param->max < tmp))
860 			return -EINVAL;
861 		WRITE_ONCE(*valp, tmp);
862 	}
863 
864 	return 0;
865 }
866 
867 /**
868  * proc_dointvec_minmax - read a vector of integers with min/max values
869  * @table: the sysctl table
870  * @write: %TRUE if this is a write to the sysctl file
871  * @buffer: the user buffer
872  * @lenp: the size of the user buffer
873  * @ppos: file position
874  *
875  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
876  * values from/to the user buffer, treated as an ASCII string.
877  *
878  * This routine will ensure the values are within the range specified by
879  * table->extra1 (min) and table->extra2 (max).
880  *
881  * Returns 0 on success or -EINVAL on write when the range check fails.
882  */
883 int proc_dointvec_minmax(struct ctl_table *table, int write,
884 		  void *buffer, size_t *lenp, loff_t *ppos)
885 {
886 	struct do_proc_dointvec_minmax_conv_param param = {
887 		.min = (int *) table->extra1,
888 		.max = (int *) table->extra2,
889 	};
890 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
891 				do_proc_dointvec_minmax_conv, &param);
892 }
893 
894 /**
895  * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
896  * @min: pointer to minimum allowable value
897  * @max: pointer to maximum allowable value
898  *
899  * The do_proc_douintvec_minmax_conv_param structure provides the
900  * minimum and maximum values for doing range checking for those sysctl
901  * parameters that use the proc_douintvec_minmax() handler.
902  */
903 struct do_proc_douintvec_minmax_conv_param {
904 	unsigned int *min;
905 	unsigned int *max;
906 };
907 
908 static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
909 					 unsigned int *valp,
910 					 int write, void *data)
911 {
912 	int ret;
913 	unsigned int tmp;
914 	struct do_proc_douintvec_minmax_conv_param *param = data;
915 	/* write via temporary local uint for bounds-checking */
916 	unsigned int *up = write ? &tmp : valp;
917 
918 	ret = do_proc_douintvec_conv(lvalp, up, write, data);
919 	if (ret)
920 		return ret;
921 
922 	if (write) {
923 		if ((param->min && *param->min > tmp) ||
924 		    (param->max && *param->max < tmp))
925 			return -ERANGE;
926 
927 		WRITE_ONCE(*valp, tmp);
928 	}
929 
930 	return 0;
931 }
932 
933 /**
934  * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
935  * @table: the sysctl table
936  * @write: %TRUE if this is a write to the sysctl file
937  * @buffer: the user buffer
938  * @lenp: the size of the user buffer
939  * @ppos: file position
940  *
941  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
942  * values from/to the user buffer, treated as an ASCII string. Negative
943  * strings are not allowed.
944  *
945  * This routine will ensure the values are within the range specified by
946  * table->extra1 (min) and table->extra2 (max). There is a final sanity
947  * check for UINT_MAX to avoid having to support wrap around uses from
948  * userspace.
949  *
950  * Returns 0 on success or -ERANGE on write when the range check fails.
951  */
952 int proc_douintvec_minmax(struct ctl_table *table, int write,
953 			  void *buffer, size_t *lenp, loff_t *ppos)
954 {
955 	struct do_proc_douintvec_minmax_conv_param param = {
956 		.min = (unsigned int *) table->extra1,
957 		.max = (unsigned int *) table->extra2,
958 	};
959 	return do_proc_douintvec(table, write, buffer, lenp, ppos,
960 				 do_proc_douintvec_minmax_conv, &param);
961 }
962 
963 /**
964  * proc_dou8vec_minmax - read a vector of unsigned chars with min/max values
965  * @table: the sysctl table
966  * @write: %TRUE if this is a write to the sysctl file
967  * @buffer: the user buffer
968  * @lenp: the size of the user buffer
969  * @ppos: file position
970  *
971  * Reads/writes up to table->maxlen/sizeof(u8) unsigned chars
972  * values from/to the user buffer, treated as an ASCII string. Negative
973  * strings are not allowed.
974  *
975  * This routine will ensure the values are within the range specified by
976  * table->extra1 (min) and table->extra2 (max).
977  *
978  * Returns 0 on success or an error on write when the range check fails.
979  */
980 int proc_dou8vec_minmax(struct ctl_table *table, int write,
981 			void *buffer, size_t *lenp, loff_t *ppos)
982 {
983 	struct ctl_table tmp;
984 	unsigned int min = 0, max = 255U, val;
985 	u8 *data = table->data;
986 	struct do_proc_douintvec_minmax_conv_param param = {
987 		.min = &min,
988 		.max = &max,
989 	};
990 	int res;
991 
992 	/* Do not support arrays yet. */
993 	if (table->maxlen != sizeof(u8))
994 		return -EINVAL;
995 
996 	if (table->extra1) {
997 		min = *(unsigned int *) table->extra1;
998 		if (min > 255U)
999 			return -EINVAL;
1000 	}
1001 	if (table->extra2) {
1002 		max = *(unsigned int *) table->extra2;
1003 		if (max > 255U)
1004 			return -EINVAL;
1005 	}
1006 
1007 	tmp = *table;
1008 
1009 	tmp.maxlen = sizeof(val);
1010 	tmp.data = &val;
1011 	val = READ_ONCE(*data);
1012 	res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos,
1013 				do_proc_douintvec_minmax_conv, &param);
1014 	if (res)
1015 		return res;
1016 	if (write)
1017 		WRITE_ONCE(*data, val);
1018 	return 0;
1019 }
1020 EXPORT_SYMBOL_GPL(proc_dou8vec_minmax);
1021 
1022 #ifdef CONFIG_MAGIC_SYSRQ
1023 static int sysrq_sysctl_handler(struct ctl_table *table, int write,
1024 				void *buffer, size_t *lenp, loff_t *ppos)
1025 {
1026 	int tmp, ret;
1027 
1028 	tmp = sysrq_mask();
1029 
1030 	ret = __do_proc_dointvec(&tmp, table, write, buffer,
1031 			       lenp, ppos, NULL, NULL);
1032 	if (ret || !write)
1033 		return ret;
1034 
1035 	if (write)
1036 		sysrq_toggle_support(tmp);
1037 
1038 	return 0;
1039 }
1040 #endif
1041 
1042 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table,
1043 		int write, void *buffer, size_t *lenp, loff_t *ppos,
1044 		unsigned long convmul, unsigned long convdiv)
1045 {
1046 	unsigned long *i, *min, *max;
1047 	int vleft, first = 1, err = 0;
1048 	size_t left;
1049 	char *p;
1050 
1051 	if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
1052 		*lenp = 0;
1053 		return 0;
1054 	}
1055 
1056 	i = data;
1057 	min = table->extra1;
1058 	max = table->extra2;
1059 	vleft = table->maxlen / sizeof(unsigned long);
1060 	left = *lenp;
1061 
1062 	if (write) {
1063 		if (proc_first_pos_non_zero_ignore(ppos, table))
1064 			goto out;
1065 
1066 		if (left > PAGE_SIZE - 1)
1067 			left = PAGE_SIZE - 1;
1068 		p = buffer;
1069 	}
1070 
1071 	for (; left && vleft--; i++, first = 0) {
1072 		unsigned long val;
1073 
1074 		if (write) {
1075 			bool neg;
1076 
1077 			left -= proc_skip_spaces(&p);
1078 			if (!left)
1079 				break;
1080 
1081 			err = proc_get_long(&p, &left, &val, &neg,
1082 					     proc_wspace_sep,
1083 					     sizeof(proc_wspace_sep), NULL);
1084 			if (err || neg) {
1085 				err = -EINVAL;
1086 				break;
1087 			}
1088 
1089 			val = convmul * val / convdiv;
1090 			if ((min && val < *min) || (max && val > *max)) {
1091 				err = -EINVAL;
1092 				break;
1093 			}
1094 			WRITE_ONCE(*i, val);
1095 		} else {
1096 			val = convdiv * READ_ONCE(*i) / convmul;
1097 			if (!first)
1098 				proc_put_char(&buffer, &left, '\t');
1099 			proc_put_long(&buffer, &left, val, false);
1100 		}
1101 	}
1102 
1103 	if (!write && !first && left && !err)
1104 		proc_put_char(&buffer, &left, '\n');
1105 	if (write && !err)
1106 		left -= proc_skip_spaces(&p);
1107 	if (write && first)
1108 		return err ? : -EINVAL;
1109 	*lenp -= left;
1110 out:
1111 	*ppos += *lenp;
1112 	return err;
1113 }
1114 
1115 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
1116 		void *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul,
1117 		unsigned long convdiv)
1118 {
1119 	return __do_proc_doulongvec_minmax(table->data, table, write,
1120 			buffer, lenp, ppos, convmul, convdiv);
1121 }
1122 
1123 /**
1124  * proc_doulongvec_minmax - read a vector of long integers with min/max values
1125  * @table: the sysctl table
1126  * @write: %TRUE if this is a write to the sysctl file
1127  * @buffer: the user buffer
1128  * @lenp: the size of the user buffer
1129  * @ppos: file position
1130  *
1131  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1132  * values from/to the user buffer, treated as an ASCII string.
1133  *
1134  * This routine will ensure the values are within the range specified by
1135  * table->extra1 (min) and table->extra2 (max).
1136  *
1137  * Returns 0 on success.
1138  */
1139 int proc_doulongvec_minmax(struct ctl_table *table, int write,
1140 			   void *buffer, size_t *lenp, loff_t *ppos)
1141 {
1142     return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
1143 }
1144 
1145 /**
1146  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
1147  * @table: the sysctl table
1148  * @write: %TRUE if this is a write to the sysctl file
1149  * @buffer: the user buffer
1150  * @lenp: the size of the user buffer
1151  * @ppos: file position
1152  *
1153  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1154  * values from/to the user buffer, treated as an ASCII string. The values
1155  * are treated as milliseconds, and converted to jiffies when they are stored.
1156  *
1157  * This routine will ensure the values are within the range specified by
1158  * table->extra1 (min) and table->extra2 (max).
1159  *
1160  * Returns 0 on success.
1161  */
1162 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1163 				      void *buffer, size_t *lenp, loff_t *ppos)
1164 {
1165     return do_proc_doulongvec_minmax(table, write, buffer,
1166 				     lenp, ppos, HZ, 1000l);
1167 }
1168 
1169 
1170 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
1171 					 int *valp,
1172 					 int write, void *data)
1173 {
1174 	if (write) {
1175 		if (*lvalp > INT_MAX / HZ)
1176 			return 1;
1177 		if (*negp)
1178 			WRITE_ONCE(*valp, -*lvalp * HZ);
1179 		else
1180 			WRITE_ONCE(*valp, *lvalp * HZ);
1181 	} else {
1182 		int val = READ_ONCE(*valp);
1183 		unsigned long lval;
1184 		if (val < 0) {
1185 			*negp = true;
1186 			lval = -(unsigned long)val;
1187 		} else {
1188 			*negp = false;
1189 			lval = (unsigned long)val;
1190 		}
1191 		*lvalp = lval / HZ;
1192 	}
1193 	return 0;
1194 }
1195 
1196 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
1197 						int *valp,
1198 						int write, void *data)
1199 {
1200 	if (write) {
1201 		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
1202 			return 1;
1203 		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
1204 	} else {
1205 		int val = *valp;
1206 		unsigned long lval;
1207 		if (val < 0) {
1208 			*negp = true;
1209 			lval = -(unsigned long)val;
1210 		} else {
1211 			*negp = false;
1212 			lval = (unsigned long)val;
1213 		}
1214 		*lvalp = jiffies_to_clock_t(lval);
1215 	}
1216 	return 0;
1217 }
1218 
1219 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
1220 					    int *valp,
1221 					    int write, void *data)
1222 {
1223 	if (write) {
1224 		unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
1225 
1226 		if (jif > INT_MAX)
1227 			return 1;
1228 		WRITE_ONCE(*valp, (int)jif);
1229 	} else {
1230 		int val = READ_ONCE(*valp);
1231 		unsigned long lval;
1232 		if (val < 0) {
1233 			*negp = true;
1234 			lval = -(unsigned long)val;
1235 		} else {
1236 			*negp = false;
1237 			lval = (unsigned long)val;
1238 		}
1239 		*lvalp = jiffies_to_msecs(lval);
1240 	}
1241 	return 0;
1242 }
1243 
1244 static int do_proc_dointvec_ms_jiffies_minmax_conv(bool *negp, unsigned long *lvalp,
1245 						int *valp, int write, void *data)
1246 {
1247 	int tmp, ret;
1248 	struct do_proc_dointvec_minmax_conv_param *param = data;
1249 	/*
1250 	 * If writing, first do so via a temporary local int so we can
1251 	 * bounds-check it before touching *valp.
1252 	 */
1253 	int *ip = write ? &tmp : valp;
1254 
1255 	ret = do_proc_dointvec_ms_jiffies_conv(negp, lvalp, ip, write, data);
1256 	if (ret)
1257 		return ret;
1258 
1259 	if (write) {
1260 		if ((param->min && *param->min > tmp) ||
1261 				(param->max && *param->max < tmp))
1262 			return -EINVAL;
1263 		*valp = tmp;
1264 	}
1265 	return 0;
1266 }
1267 
1268 /**
1269  * proc_dointvec_jiffies - read a vector of integers as seconds
1270  * @table: the sysctl table
1271  * @write: %TRUE if this is a write to the sysctl file
1272  * @buffer: the user buffer
1273  * @lenp: the size of the user buffer
1274  * @ppos: file position
1275  *
1276  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1277  * values from/to the user buffer, treated as an ASCII string.
1278  * The values read are assumed to be in seconds, and are converted into
1279  * jiffies.
1280  *
1281  * Returns 0 on success.
1282  */
1283 int proc_dointvec_jiffies(struct ctl_table *table, int write,
1284 			  void *buffer, size_t *lenp, loff_t *ppos)
1285 {
1286     return do_proc_dointvec(table,write,buffer,lenp,ppos,
1287 		    	    do_proc_dointvec_jiffies_conv,NULL);
1288 }
1289 
1290 int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1291 			  void *buffer, size_t *lenp, loff_t *ppos)
1292 {
1293 	struct do_proc_dointvec_minmax_conv_param param = {
1294 		.min = (int *) table->extra1,
1295 		.max = (int *) table->extra2,
1296 	};
1297 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
1298 			do_proc_dointvec_ms_jiffies_minmax_conv, &param);
1299 }
1300 
1301 /**
1302  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
1303  * @table: the sysctl table
1304  * @write: %TRUE if this is a write to the sysctl file
1305  * @buffer: the user buffer
1306  * @lenp: the size of the user buffer
1307  * @ppos: pointer to the file position
1308  *
1309  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1310  * values from/to the user buffer, treated as an ASCII string.
1311  * The values read are assumed to be in 1/USER_HZ seconds, and
1312  * are converted into jiffies.
1313  *
1314  * Returns 0 on success.
1315  */
1316 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
1317 				 void *buffer, size_t *lenp, loff_t *ppos)
1318 {
1319 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
1320 				do_proc_dointvec_userhz_jiffies_conv, NULL);
1321 }
1322 
1323 /**
1324  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
1325  * @table: the sysctl table
1326  * @write: %TRUE if this is a write to the sysctl file
1327  * @buffer: the user buffer
1328  * @lenp: the size of the user buffer
1329  * @ppos: file position
1330  * @ppos: the current position in the file
1331  *
1332  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1333  * values from/to the user buffer, treated as an ASCII string.
1334  * The values read are assumed to be in 1/1000 seconds, and
1335  * are converted into jiffies.
1336  *
1337  * Returns 0 on success.
1338  */
1339 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, void *buffer,
1340 		size_t *lenp, loff_t *ppos)
1341 {
1342 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
1343 				do_proc_dointvec_ms_jiffies_conv, NULL);
1344 }
1345 
1346 static int proc_do_cad_pid(struct ctl_table *table, int write, void *buffer,
1347 		size_t *lenp, loff_t *ppos)
1348 {
1349 	struct pid *new_pid;
1350 	pid_t tmp;
1351 	int r;
1352 
1353 	tmp = pid_vnr(cad_pid);
1354 
1355 	r = __do_proc_dointvec(&tmp, table, write, buffer,
1356 			       lenp, ppos, NULL, NULL);
1357 	if (r || !write)
1358 		return r;
1359 
1360 	new_pid = find_get_pid(tmp);
1361 	if (!new_pid)
1362 		return -ESRCH;
1363 
1364 	put_pid(xchg(&cad_pid, new_pid));
1365 	return 0;
1366 }
1367 
1368 /**
1369  * proc_do_large_bitmap - read/write from/to a large bitmap
1370  * @table: the sysctl table
1371  * @write: %TRUE if this is a write to the sysctl file
1372  * @buffer: the user buffer
1373  * @lenp: the size of the user buffer
1374  * @ppos: file position
1375  *
1376  * The bitmap is stored at table->data and the bitmap length (in bits)
1377  * in table->maxlen.
1378  *
1379  * We use a range comma separated format (e.g. 1,3-4,10-10) so that
1380  * large bitmaps may be represented in a compact manner. Writing into
1381  * the file will clear the bitmap then update it with the given input.
1382  *
1383  * Returns 0 on success.
1384  */
1385 int proc_do_large_bitmap(struct ctl_table *table, int write,
1386 			 void *buffer, size_t *lenp, loff_t *ppos)
1387 {
1388 	int err = 0;
1389 	size_t left = *lenp;
1390 	unsigned long bitmap_len = table->maxlen;
1391 	unsigned long *bitmap = *(unsigned long **) table->data;
1392 	unsigned long *tmp_bitmap = NULL;
1393 	char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
1394 
1395 	if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
1396 		*lenp = 0;
1397 		return 0;
1398 	}
1399 
1400 	if (write) {
1401 		char *p = buffer;
1402 		size_t skipped = 0;
1403 
1404 		if (left > PAGE_SIZE - 1) {
1405 			left = PAGE_SIZE - 1;
1406 			/* How much of the buffer we'll skip this pass */
1407 			skipped = *lenp - left;
1408 		}
1409 
1410 		tmp_bitmap = bitmap_zalloc(bitmap_len, GFP_KERNEL);
1411 		if (!tmp_bitmap)
1412 			return -ENOMEM;
1413 		proc_skip_char(&p, &left, '\n');
1414 		while (!err && left) {
1415 			unsigned long val_a, val_b;
1416 			bool neg;
1417 			size_t saved_left;
1418 
1419 			/* In case we stop parsing mid-number, we can reset */
1420 			saved_left = left;
1421 			err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
1422 					     sizeof(tr_a), &c);
1423 			/*
1424 			 * If we consumed the entirety of a truncated buffer or
1425 			 * only one char is left (may be a "-"), then stop here,
1426 			 * reset, & come back for more.
1427 			 */
1428 			if ((left <= 1) && skipped) {
1429 				left = saved_left;
1430 				break;
1431 			}
1432 
1433 			if (err)
1434 				break;
1435 			if (val_a >= bitmap_len || neg) {
1436 				err = -EINVAL;
1437 				break;
1438 			}
1439 
1440 			val_b = val_a;
1441 			if (left) {
1442 				p++;
1443 				left--;
1444 			}
1445 
1446 			if (c == '-') {
1447 				err = proc_get_long(&p, &left, &val_b,
1448 						     &neg, tr_b, sizeof(tr_b),
1449 						     &c);
1450 				/*
1451 				 * If we consumed all of a truncated buffer or
1452 				 * then stop here, reset, & come back for more.
1453 				 */
1454 				if (!left && skipped) {
1455 					left = saved_left;
1456 					break;
1457 				}
1458 
1459 				if (err)
1460 					break;
1461 				if (val_b >= bitmap_len || neg ||
1462 				    val_a > val_b) {
1463 					err = -EINVAL;
1464 					break;
1465 				}
1466 				if (left) {
1467 					p++;
1468 					left--;
1469 				}
1470 			}
1471 
1472 			bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
1473 			proc_skip_char(&p, &left, '\n');
1474 		}
1475 		left += skipped;
1476 	} else {
1477 		unsigned long bit_a, bit_b = 0;
1478 		bool first = 1;
1479 
1480 		while (left) {
1481 			bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
1482 			if (bit_a >= bitmap_len)
1483 				break;
1484 			bit_b = find_next_zero_bit(bitmap, bitmap_len,
1485 						   bit_a + 1) - 1;
1486 
1487 			if (!first)
1488 				proc_put_char(&buffer, &left, ',');
1489 			proc_put_long(&buffer, &left, bit_a, false);
1490 			if (bit_a != bit_b) {
1491 				proc_put_char(&buffer, &left, '-');
1492 				proc_put_long(&buffer, &left, bit_b, false);
1493 			}
1494 
1495 			first = 0; bit_b++;
1496 		}
1497 		proc_put_char(&buffer, &left, '\n');
1498 	}
1499 
1500 	if (!err) {
1501 		if (write) {
1502 			if (*ppos)
1503 				bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
1504 			else
1505 				bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
1506 		}
1507 		*lenp -= left;
1508 		*ppos += *lenp;
1509 	}
1510 
1511 	bitmap_free(tmp_bitmap);
1512 	return err;
1513 }
1514 
1515 #else /* CONFIG_PROC_SYSCTL */
1516 
1517 int proc_dostring(struct ctl_table *table, int write,
1518 		  void *buffer, size_t *lenp, loff_t *ppos)
1519 {
1520 	return -ENOSYS;
1521 }
1522 
1523 int proc_dobool(struct ctl_table *table, int write,
1524 		void *buffer, size_t *lenp, loff_t *ppos)
1525 {
1526 	return -ENOSYS;
1527 }
1528 
1529 int proc_dointvec(struct ctl_table *table, int write,
1530 		  void *buffer, size_t *lenp, loff_t *ppos)
1531 {
1532 	return -ENOSYS;
1533 }
1534 
1535 int proc_douintvec(struct ctl_table *table, int write,
1536 		  void *buffer, size_t *lenp, loff_t *ppos)
1537 {
1538 	return -ENOSYS;
1539 }
1540 
1541 int proc_dointvec_minmax(struct ctl_table *table, int write,
1542 		    void *buffer, size_t *lenp, loff_t *ppos)
1543 {
1544 	return -ENOSYS;
1545 }
1546 
1547 int proc_douintvec_minmax(struct ctl_table *table, int write,
1548 			  void *buffer, size_t *lenp, loff_t *ppos)
1549 {
1550 	return -ENOSYS;
1551 }
1552 
1553 int proc_dou8vec_minmax(struct ctl_table *table, int write,
1554 			void *buffer, size_t *lenp, loff_t *ppos)
1555 {
1556 	return -ENOSYS;
1557 }
1558 
1559 int proc_dointvec_jiffies(struct ctl_table *table, int write,
1560 		    void *buffer, size_t *lenp, loff_t *ppos)
1561 {
1562 	return -ENOSYS;
1563 }
1564 
1565 int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1566 				    void *buffer, size_t *lenp, loff_t *ppos)
1567 {
1568 	return -ENOSYS;
1569 }
1570 
1571 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
1572 		    void *buffer, size_t *lenp, loff_t *ppos)
1573 {
1574 	return -ENOSYS;
1575 }
1576 
1577 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
1578 			     void *buffer, size_t *lenp, loff_t *ppos)
1579 {
1580 	return -ENOSYS;
1581 }
1582 
1583 int proc_doulongvec_minmax(struct ctl_table *table, int write,
1584 		    void *buffer, size_t *lenp, loff_t *ppos)
1585 {
1586 	return -ENOSYS;
1587 }
1588 
1589 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1590 				      void *buffer, size_t *lenp, loff_t *ppos)
1591 {
1592 	return -ENOSYS;
1593 }
1594 
1595 int proc_do_large_bitmap(struct ctl_table *table, int write,
1596 			 void *buffer, size_t *lenp, loff_t *ppos)
1597 {
1598 	return -ENOSYS;
1599 }
1600 
1601 #endif /* CONFIG_PROC_SYSCTL */
1602 
1603 #if defined(CONFIG_SYSCTL)
1604 int proc_do_static_key(struct ctl_table *table, int write,
1605 		       void *buffer, size_t *lenp, loff_t *ppos)
1606 {
1607 	struct static_key *key = (struct static_key *)table->data;
1608 	static DEFINE_MUTEX(static_key_mutex);
1609 	int val, ret;
1610 	struct ctl_table tmp = {
1611 		.data   = &val,
1612 		.maxlen = sizeof(val),
1613 		.mode   = table->mode,
1614 		.extra1 = SYSCTL_ZERO,
1615 		.extra2 = SYSCTL_ONE,
1616 	};
1617 
1618 	if (write && !capable(CAP_SYS_ADMIN))
1619 		return -EPERM;
1620 
1621 	mutex_lock(&static_key_mutex);
1622 	val = static_key_enabled(key);
1623 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
1624 	if (write && !ret) {
1625 		if (val)
1626 			static_key_enable(key);
1627 		else
1628 			static_key_disable(key);
1629 	}
1630 	mutex_unlock(&static_key_mutex);
1631 	return ret;
1632 }
1633 
1634 static struct ctl_table kern_table[] = {
1635 #ifdef CONFIG_NUMA_BALANCING
1636 	{
1637 		.procname	= "numa_balancing",
1638 		.data		= NULL, /* filled in by handler */
1639 		.maxlen		= sizeof(unsigned int),
1640 		.mode		= 0644,
1641 		.proc_handler	= sysctl_numa_balancing,
1642 		.extra1		= SYSCTL_ZERO,
1643 		.extra2		= SYSCTL_FOUR,
1644 	},
1645 	{
1646 		.procname	= "numa_balancing_promote_rate_limit_MBps",
1647 		.data		= &sysctl_numa_balancing_promote_rate_limit,
1648 		.maxlen		= sizeof(unsigned int),
1649 		.mode		= 0644,
1650 		.proc_handler	= proc_dointvec_minmax,
1651 		.extra1		= SYSCTL_ZERO,
1652 	},
1653 #endif /* CONFIG_NUMA_BALANCING */
1654 	{
1655 		.procname	= "panic",
1656 		.data		= &panic_timeout,
1657 		.maxlen		= sizeof(int),
1658 		.mode		= 0644,
1659 		.proc_handler	= proc_dointvec,
1660 	},
1661 #ifdef CONFIG_PROC_SYSCTL
1662 	{
1663 		.procname	= "tainted",
1664 		.maxlen 	= sizeof(long),
1665 		.mode		= 0644,
1666 		.proc_handler	= proc_taint,
1667 	},
1668 	{
1669 		.procname	= "sysctl_writes_strict",
1670 		.data		= &sysctl_writes_strict,
1671 		.maxlen		= sizeof(int),
1672 		.mode		= 0644,
1673 		.proc_handler	= proc_dointvec_minmax,
1674 		.extra1		= SYSCTL_NEG_ONE,
1675 		.extra2		= SYSCTL_ONE,
1676 	},
1677 #endif
1678 	{
1679 		.procname	= "print-fatal-signals",
1680 		.data		= &print_fatal_signals,
1681 		.maxlen		= sizeof(int),
1682 		.mode		= 0644,
1683 		.proc_handler	= proc_dointvec,
1684 	},
1685 #ifdef CONFIG_SPARC
1686 	{
1687 		.procname	= "reboot-cmd",
1688 		.data		= reboot_command,
1689 		.maxlen		= 256,
1690 		.mode		= 0644,
1691 		.proc_handler	= proc_dostring,
1692 	},
1693 	{
1694 		.procname	= "stop-a",
1695 		.data		= &stop_a_enabled,
1696 		.maxlen		= sizeof (int),
1697 		.mode		= 0644,
1698 		.proc_handler	= proc_dointvec,
1699 	},
1700 	{
1701 		.procname	= "scons-poweroff",
1702 		.data		= &scons_pwroff,
1703 		.maxlen		= sizeof (int),
1704 		.mode		= 0644,
1705 		.proc_handler	= proc_dointvec,
1706 	},
1707 #endif
1708 #ifdef CONFIG_SPARC64
1709 	{
1710 		.procname	= "tsb-ratio",
1711 		.data		= &sysctl_tsb_ratio,
1712 		.maxlen		= sizeof (int),
1713 		.mode		= 0644,
1714 		.proc_handler	= proc_dointvec,
1715 	},
1716 #endif
1717 #ifdef CONFIG_PARISC
1718 	{
1719 		.procname	= "soft-power",
1720 		.data		= &pwrsw_enabled,
1721 		.maxlen		= sizeof (int),
1722 		.mode		= 0644,
1723 		.proc_handler	= proc_dointvec,
1724 	},
1725 #endif
1726 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
1727 	{
1728 		.procname	= "unaligned-trap",
1729 		.data		= &unaligned_enabled,
1730 		.maxlen		= sizeof (int),
1731 		.mode		= 0644,
1732 		.proc_handler	= proc_dointvec,
1733 	},
1734 #endif
1735 #ifdef CONFIG_STACK_TRACER
1736 	{
1737 		.procname	= "stack_tracer_enabled",
1738 		.data		= &stack_tracer_enabled,
1739 		.maxlen		= sizeof(int),
1740 		.mode		= 0644,
1741 		.proc_handler	= stack_trace_sysctl,
1742 	},
1743 #endif
1744 #ifdef CONFIG_TRACING
1745 	{
1746 		.procname	= "ftrace_dump_on_oops",
1747 		.data		= &ftrace_dump_on_oops,
1748 		.maxlen		= sizeof(int),
1749 		.mode		= 0644,
1750 		.proc_handler	= proc_dointvec,
1751 	},
1752 	{
1753 		.procname	= "traceoff_on_warning",
1754 		.data		= &__disable_trace_on_warning,
1755 		.maxlen		= sizeof(__disable_trace_on_warning),
1756 		.mode		= 0644,
1757 		.proc_handler	= proc_dointvec,
1758 	},
1759 	{
1760 		.procname	= "tracepoint_printk",
1761 		.data		= &tracepoint_printk,
1762 		.maxlen		= sizeof(tracepoint_printk),
1763 		.mode		= 0644,
1764 		.proc_handler	= tracepoint_printk_sysctl,
1765 	},
1766 #endif
1767 #ifdef CONFIG_MODULES
1768 	{
1769 		.procname	= "modprobe",
1770 		.data		= &modprobe_path,
1771 		.maxlen		= KMOD_PATH_LEN,
1772 		.mode		= 0644,
1773 		.proc_handler	= proc_dostring,
1774 	},
1775 	{
1776 		.procname	= "modules_disabled",
1777 		.data		= &modules_disabled,
1778 		.maxlen		= sizeof(int),
1779 		.mode		= 0644,
1780 		/* only handle a transition from default "0" to "1" */
1781 		.proc_handler	= proc_dointvec_minmax,
1782 		.extra1		= SYSCTL_ONE,
1783 		.extra2		= SYSCTL_ONE,
1784 	},
1785 #endif
1786 #ifdef CONFIG_UEVENT_HELPER
1787 	{
1788 		.procname	= "hotplug",
1789 		.data		= &uevent_helper,
1790 		.maxlen		= UEVENT_HELPER_PATH_LEN,
1791 		.mode		= 0644,
1792 		.proc_handler	= proc_dostring,
1793 	},
1794 #endif
1795 #ifdef CONFIG_MAGIC_SYSRQ
1796 	{
1797 		.procname	= "sysrq",
1798 		.data		= NULL,
1799 		.maxlen		= sizeof (int),
1800 		.mode		= 0644,
1801 		.proc_handler	= sysrq_sysctl_handler,
1802 	},
1803 #endif
1804 #ifdef CONFIG_PROC_SYSCTL
1805 	{
1806 		.procname	= "cad_pid",
1807 		.data		= NULL,
1808 		.maxlen		= sizeof (int),
1809 		.mode		= 0600,
1810 		.proc_handler	= proc_do_cad_pid,
1811 	},
1812 #endif
1813 	{
1814 		.procname	= "threads-max",
1815 		.data		= NULL,
1816 		.maxlen		= sizeof(int),
1817 		.mode		= 0644,
1818 		.proc_handler	= sysctl_max_threads,
1819 	},
1820 	{
1821 		.procname	= "usermodehelper",
1822 		.mode		= 0555,
1823 		.child		= usermodehelper_table,
1824 	},
1825 	{
1826 		.procname	= "overflowuid",
1827 		.data		= &overflowuid,
1828 		.maxlen		= sizeof(int),
1829 		.mode		= 0644,
1830 		.proc_handler	= proc_dointvec_minmax,
1831 		.extra1		= SYSCTL_ZERO,
1832 		.extra2		= SYSCTL_MAXOLDUID,
1833 	},
1834 	{
1835 		.procname	= "overflowgid",
1836 		.data		= &overflowgid,
1837 		.maxlen		= sizeof(int),
1838 		.mode		= 0644,
1839 		.proc_handler	= proc_dointvec_minmax,
1840 		.extra1		= SYSCTL_ZERO,
1841 		.extra2		= SYSCTL_MAXOLDUID,
1842 	},
1843 #ifdef CONFIG_S390
1844 	{
1845 		.procname	= "userprocess_debug",
1846 		.data		= &show_unhandled_signals,
1847 		.maxlen		= sizeof(int),
1848 		.mode		= 0644,
1849 		.proc_handler	= proc_dointvec,
1850 	},
1851 #endif
1852 	{
1853 		.procname	= "pid_max",
1854 		.data		= &pid_max,
1855 		.maxlen		= sizeof (int),
1856 		.mode		= 0644,
1857 		.proc_handler	= proc_dointvec_minmax,
1858 		.extra1		= &pid_max_min,
1859 		.extra2		= &pid_max_max,
1860 	},
1861 	{
1862 		.procname	= "panic_on_oops",
1863 		.data		= &panic_on_oops,
1864 		.maxlen		= sizeof(int),
1865 		.mode		= 0644,
1866 		.proc_handler	= proc_dointvec,
1867 	},
1868 	{
1869 		.procname	= "panic_print",
1870 		.data		= &panic_print,
1871 		.maxlen		= sizeof(unsigned long),
1872 		.mode		= 0644,
1873 		.proc_handler	= proc_doulongvec_minmax,
1874 	},
1875 	{
1876 		.procname	= "ngroups_max",
1877 		.data		= (void *)&ngroups_max,
1878 		.maxlen		= sizeof (int),
1879 		.mode		= 0444,
1880 		.proc_handler	= proc_dointvec,
1881 	},
1882 	{
1883 		.procname	= "cap_last_cap",
1884 		.data		= (void *)&cap_last_cap,
1885 		.maxlen		= sizeof(int),
1886 		.mode		= 0444,
1887 		.proc_handler	= proc_dointvec,
1888 	},
1889 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
1890 	{
1891 		.procname       = "unknown_nmi_panic",
1892 		.data           = &unknown_nmi_panic,
1893 		.maxlen         = sizeof (int),
1894 		.mode           = 0644,
1895 		.proc_handler   = proc_dointvec,
1896 	},
1897 #endif
1898 
1899 #if (defined(CONFIG_X86_32) || defined(CONFIG_PARISC)) && \
1900 	defined(CONFIG_DEBUG_STACKOVERFLOW)
1901 	{
1902 		.procname	= "panic_on_stackoverflow",
1903 		.data		= &sysctl_panic_on_stackoverflow,
1904 		.maxlen		= sizeof(int),
1905 		.mode		= 0644,
1906 		.proc_handler	= proc_dointvec,
1907 	},
1908 #endif
1909 #if defined(CONFIG_X86)
1910 	{
1911 		.procname	= "panic_on_unrecovered_nmi",
1912 		.data		= &panic_on_unrecovered_nmi,
1913 		.maxlen		= sizeof(int),
1914 		.mode		= 0644,
1915 		.proc_handler	= proc_dointvec,
1916 	},
1917 	{
1918 		.procname	= "panic_on_io_nmi",
1919 		.data		= &panic_on_io_nmi,
1920 		.maxlen		= sizeof(int),
1921 		.mode		= 0644,
1922 		.proc_handler	= proc_dointvec,
1923 	},
1924 	{
1925 		.procname	= "bootloader_type",
1926 		.data		= &bootloader_type,
1927 		.maxlen		= sizeof (int),
1928 		.mode		= 0444,
1929 		.proc_handler	= proc_dointvec,
1930 	},
1931 	{
1932 		.procname	= "bootloader_version",
1933 		.data		= &bootloader_version,
1934 		.maxlen		= sizeof (int),
1935 		.mode		= 0444,
1936 		.proc_handler	= proc_dointvec,
1937 	},
1938 	{
1939 		.procname	= "io_delay_type",
1940 		.data		= &io_delay_type,
1941 		.maxlen		= sizeof(int),
1942 		.mode		= 0644,
1943 		.proc_handler	= proc_dointvec,
1944 	},
1945 #endif
1946 #if defined(CONFIG_MMU)
1947 	{
1948 		.procname	= "randomize_va_space",
1949 		.data		= &randomize_va_space,
1950 		.maxlen		= sizeof(int),
1951 		.mode		= 0644,
1952 		.proc_handler	= proc_dointvec,
1953 	},
1954 #endif
1955 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
1956 	{
1957 		.procname	= "spin_retry",
1958 		.data		= &spin_retry,
1959 		.maxlen		= sizeof (int),
1960 		.mode		= 0644,
1961 		.proc_handler	= proc_dointvec,
1962 	},
1963 #endif
1964 #if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
1965 	{
1966 		.procname	= "acpi_video_flags",
1967 		.data		= &acpi_realmode_flags,
1968 		.maxlen		= sizeof (unsigned long),
1969 		.mode		= 0644,
1970 		.proc_handler	= proc_doulongvec_minmax,
1971 	},
1972 #endif
1973 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
1974 	{
1975 		.procname	= "ignore-unaligned-usertrap",
1976 		.data		= &no_unaligned_warning,
1977 		.maxlen		= sizeof (int),
1978 		.mode		= 0644,
1979 		.proc_handler	= proc_dointvec,
1980 	},
1981 #endif
1982 #ifdef CONFIG_IA64
1983 	{
1984 		.procname	= "unaligned-dump-stack",
1985 		.data		= &unaligned_dump_stack,
1986 		.maxlen		= sizeof (int),
1987 		.mode		= 0644,
1988 		.proc_handler	= proc_dointvec,
1989 	},
1990 #endif
1991 #ifdef CONFIG_RT_MUTEXES
1992 	{
1993 		.procname	= "max_lock_depth",
1994 		.data		= &max_lock_depth,
1995 		.maxlen		= sizeof(int),
1996 		.mode		= 0644,
1997 		.proc_handler	= proc_dointvec,
1998 	},
1999 #endif
2000 #ifdef CONFIG_KEYS
2001 	{
2002 		.procname	= "keys",
2003 		.mode		= 0555,
2004 		.child		= key_sysctls,
2005 	},
2006 #endif
2007 #ifdef CONFIG_PERF_EVENTS
2008 	/*
2009 	 * User-space scripts rely on the existence of this file
2010 	 * as a feature check for perf_events being enabled.
2011 	 *
2012 	 * So it's an ABI, do not remove!
2013 	 */
2014 	{
2015 		.procname	= "perf_event_paranoid",
2016 		.data		= &sysctl_perf_event_paranoid,
2017 		.maxlen		= sizeof(sysctl_perf_event_paranoid),
2018 		.mode		= 0644,
2019 		.proc_handler	= proc_dointvec,
2020 	},
2021 	{
2022 		.procname	= "perf_event_mlock_kb",
2023 		.data		= &sysctl_perf_event_mlock,
2024 		.maxlen		= sizeof(sysctl_perf_event_mlock),
2025 		.mode		= 0644,
2026 		.proc_handler	= proc_dointvec,
2027 	},
2028 	{
2029 		.procname	= "perf_event_max_sample_rate",
2030 		.data		= &sysctl_perf_event_sample_rate,
2031 		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
2032 		.mode		= 0644,
2033 		.proc_handler	= perf_proc_update_handler,
2034 		.extra1		= SYSCTL_ONE,
2035 	},
2036 	{
2037 		.procname	= "perf_cpu_time_max_percent",
2038 		.data		= &sysctl_perf_cpu_time_max_percent,
2039 		.maxlen		= sizeof(sysctl_perf_cpu_time_max_percent),
2040 		.mode		= 0644,
2041 		.proc_handler	= perf_cpu_time_max_percent_handler,
2042 		.extra1		= SYSCTL_ZERO,
2043 		.extra2		= SYSCTL_ONE_HUNDRED,
2044 	},
2045 	{
2046 		.procname	= "perf_event_max_stack",
2047 		.data		= &sysctl_perf_event_max_stack,
2048 		.maxlen		= sizeof(sysctl_perf_event_max_stack),
2049 		.mode		= 0644,
2050 		.proc_handler	= perf_event_max_stack_handler,
2051 		.extra1		= SYSCTL_ZERO,
2052 		.extra2		= (void *)&six_hundred_forty_kb,
2053 	},
2054 	{
2055 		.procname	= "perf_event_max_contexts_per_stack",
2056 		.data		= &sysctl_perf_event_max_contexts_per_stack,
2057 		.maxlen		= sizeof(sysctl_perf_event_max_contexts_per_stack),
2058 		.mode		= 0644,
2059 		.proc_handler	= perf_event_max_stack_handler,
2060 		.extra1		= SYSCTL_ZERO,
2061 		.extra2		= SYSCTL_ONE_THOUSAND,
2062 	},
2063 #endif
2064 	{
2065 		.procname	= "panic_on_warn",
2066 		.data		= &panic_on_warn,
2067 		.maxlen		= sizeof(int),
2068 		.mode		= 0644,
2069 		.proc_handler	= proc_dointvec_minmax,
2070 		.extra1		= SYSCTL_ZERO,
2071 		.extra2		= SYSCTL_ONE,
2072 	},
2073 #ifdef CONFIG_TREE_RCU
2074 	{
2075 		.procname	= "panic_on_rcu_stall",
2076 		.data		= &sysctl_panic_on_rcu_stall,
2077 		.maxlen		= sizeof(sysctl_panic_on_rcu_stall),
2078 		.mode		= 0644,
2079 		.proc_handler	= proc_dointvec_minmax,
2080 		.extra1		= SYSCTL_ZERO,
2081 		.extra2		= SYSCTL_ONE,
2082 	},
2083 	{
2084 		.procname	= "max_rcu_stall_to_panic",
2085 		.data		= &sysctl_max_rcu_stall_to_panic,
2086 		.maxlen		= sizeof(sysctl_max_rcu_stall_to_panic),
2087 		.mode		= 0644,
2088 		.proc_handler	= proc_dointvec_minmax,
2089 		.extra1		= SYSCTL_ONE,
2090 		.extra2		= SYSCTL_INT_MAX,
2091 	},
2092 #endif
2093 	{ }
2094 };
2095 
2096 static struct ctl_table vm_table[] = {
2097 	{
2098 		.procname	= "overcommit_memory",
2099 		.data		= &sysctl_overcommit_memory,
2100 		.maxlen		= sizeof(sysctl_overcommit_memory),
2101 		.mode		= 0644,
2102 		.proc_handler	= overcommit_policy_handler,
2103 		.extra1		= SYSCTL_ZERO,
2104 		.extra2		= SYSCTL_TWO,
2105 	},
2106 	{
2107 		.procname	= "overcommit_ratio",
2108 		.data		= &sysctl_overcommit_ratio,
2109 		.maxlen		= sizeof(sysctl_overcommit_ratio),
2110 		.mode		= 0644,
2111 		.proc_handler	= overcommit_ratio_handler,
2112 	},
2113 	{
2114 		.procname	= "overcommit_kbytes",
2115 		.data		= &sysctl_overcommit_kbytes,
2116 		.maxlen		= sizeof(sysctl_overcommit_kbytes),
2117 		.mode		= 0644,
2118 		.proc_handler	= overcommit_kbytes_handler,
2119 	},
2120 	{
2121 		.procname	= "page-cluster",
2122 		.data		= &page_cluster,
2123 		.maxlen		= sizeof(int),
2124 		.mode		= 0644,
2125 		.proc_handler	= proc_dointvec_minmax,
2126 		.extra1		= SYSCTL_ZERO,
2127 	},
2128 	{
2129 		.procname	= "dirtytime_expire_seconds",
2130 		.data		= &dirtytime_expire_interval,
2131 		.maxlen		= sizeof(dirtytime_expire_interval),
2132 		.mode		= 0644,
2133 		.proc_handler	= dirtytime_interval_handler,
2134 		.extra1		= SYSCTL_ZERO,
2135 	},
2136 	{
2137 		.procname	= "swappiness",
2138 		.data		= &vm_swappiness,
2139 		.maxlen		= sizeof(vm_swappiness),
2140 		.mode		= 0644,
2141 		.proc_handler	= proc_dointvec_minmax,
2142 		.extra1		= SYSCTL_ZERO,
2143 		.extra2		= SYSCTL_TWO_HUNDRED,
2144 	},
2145 #ifdef CONFIG_NUMA
2146 	{
2147 		.procname	= "numa_stat",
2148 		.data		= &sysctl_vm_numa_stat,
2149 		.maxlen		= sizeof(int),
2150 		.mode		= 0644,
2151 		.proc_handler	= sysctl_vm_numa_stat_handler,
2152 		.extra1		= SYSCTL_ZERO,
2153 		.extra2		= SYSCTL_ONE,
2154 	},
2155 #endif
2156 #ifdef CONFIG_HUGETLB_PAGE
2157 	{
2158 		.procname	= "nr_hugepages",
2159 		.data		= NULL,
2160 		.maxlen		= sizeof(unsigned long),
2161 		.mode		= 0644,
2162 		.proc_handler	= hugetlb_sysctl_handler,
2163 	},
2164 #ifdef CONFIG_NUMA
2165 	{
2166 		.procname       = "nr_hugepages_mempolicy",
2167 		.data           = NULL,
2168 		.maxlen         = sizeof(unsigned long),
2169 		.mode           = 0644,
2170 		.proc_handler   = &hugetlb_mempolicy_sysctl_handler,
2171 	},
2172 #endif
2173 	 {
2174 		.procname	= "hugetlb_shm_group",
2175 		.data		= &sysctl_hugetlb_shm_group,
2176 		.maxlen		= sizeof(gid_t),
2177 		.mode		= 0644,
2178 		.proc_handler	= proc_dointvec,
2179 	 },
2180 	{
2181 		.procname	= "nr_overcommit_hugepages",
2182 		.data		= NULL,
2183 		.maxlen		= sizeof(unsigned long),
2184 		.mode		= 0644,
2185 		.proc_handler	= hugetlb_overcommit_handler,
2186 	},
2187 #endif
2188 	{
2189 		.procname	= "lowmem_reserve_ratio",
2190 		.data		= &sysctl_lowmem_reserve_ratio,
2191 		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
2192 		.mode		= 0644,
2193 		.proc_handler	= lowmem_reserve_ratio_sysctl_handler,
2194 	},
2195 	{
2196 		.procname	= "drop_caches",
2197 		.data		= &sysctl_drop_caches,
2198 		.maxlen		= sizeof(int),
2199 		.mode		= 0200,
2200 		.proc_handler	= drop_caches_sysctl_handler,
2201 		.extra1		= SYSCTL_ONE,
2202 		.extra2		= SYSCTL_FOUR,
2203 	},
2204 #ifdef CONFIG_COMPACTION
2205 	{
2206 		.procname	= "compact_memory",
2207 		.data		= NULL,
2208 		.maxlen		= sizeof(int),
2209 		.mode		= 0200,
2210 		.proc_handler	= sysctl_compaction_handler,
2211 	},
2212 	{
2213 		.procname	= "compaction_proactiveness",
2214 		.data		= &sysctl_compaction_proactiveness,
2215 		.maxlen		= sizeof(sysctl_compaction_proactiveness),
2216 		.mode		= 0644,
2217 		.proc_handler	= compaction_proactiveness_sysctl_handler,
2218 		.extra1		= SYSCTL_ZERO,
2219 		.extra2		= SYSCTL_ONE_HUNDRED,
2220 	},
2221 	{
2222 		.procname	= "extfrag_threshold",
2223 		.data		= &sysctl_extfrag_threshold,
2224 		.maxlen		= sizeof(int),
2225 		.mode		= 0644,
2226 		.proc_handler	= proc_dointvec_minmax,
2227 		.extra1		= SYSCTL_ZERO,
2228 		.extra2		= SYSCTL_ONE_THOUSAND,
2229 	},
2230 	{
2231 		.procname	= "compact_unevictable_allowed",
2232 		.data		= &sysctl_compact_unevictable_allowed,
2233 		.maxlen		= sizeof(int),
2234 		.mode		= 0644,
2235 		.proc_handler	= proc_dointvec_minmax_warn_RT_change,
2236 		.extra1		= SYSCTL_ZERO,
2237 		.extra2		= SYSCTL_ONE,
2238 	},
2239 
2240 #endif /* CONFIG_COMPACTION */
2241 	{
2242 		.procname	= "min_free_kbytes",
2243 		.data		= &min_free_kbytes,
2244 		.maxlen		= sizeof(min_free_kbytes),
2245 		.mode		= 0644,
2246 		.proc_handler	= min_free_kbytes_sysctl_handler,
2247 		.extra1		= SYSCTL_ZERO,
2248 	},
2249 	{
2250 		.procname	= "watermark_boost_factor",
2251 		.data		= &watermark_boost_factor,
2252 		.maxlen		= sizeof(watermark_boost_factor),
2253 		.mode		= 0644,
2254 		.proc_handler	= proc_dointvec_minmax,
2255 		.extra1		= SYSCTL_ZERO,
2256 	},
2257 	{
2258 		.procname	= "watermark_scale_factor",
2259 		.data		= &watermark_scale_factor,
2260 		.maxlen		= sizeof(watermark_scale_factor),
2261 		.mode		= 0644,
2262 		.proc_handler	= watermark_scale_factor_sysctl_handler,
2263 		.extra1		= SYSCTL_ONE,
2264 		.extra2		= SYSCTL_THREE_THOUSAND,
2265 	},
2266 	{
2267 		.procname	= "percpu_pagelist_high_fraction",
2268 		.data		= &percpu_pagelist_high_fraction,
2269 		.maxlen		= sizeof(percpu_pagelist_high_fraction),
2270 		.mode		= 0644,
2271 		.proc_handler	= percpu_pagelist_high_fraction_sysctl_handler,
2272 		.extra1		= SYSCTL_ZERO,
2273 	},
2274 	{
2275 		.procname	= "page_lock_unfairness",
2276 		.data		= &sysctl_page_lock_unfairness,
2277 		.maxlen		= sizeof(sysctl_page_lock_unfairness),
2278 		.mode		= 0644,
2279 		.proc_handler	= proc_dointvec_minmax,
2280 		.extra1		= SYSCTL_ZERO,
2281 	},
2282 #ifdef CONFIG_MMU
2283 	{
2284 		.procname	= "max_map_count",
2285 		.data		= &sysctl_max_map_count,
2286 		.maxlen		= sizeof(sysctl_max_map_count),
2287 		.mode		= 0644,
2288 		.proc_handler	= proc_dointvec_minmax,
2289 		.extra1		= SYSCTL_ZERO,
2290 	},
2291 #else
2292 	{
2293 		.procname	= "nr_trim_pages",
2294 		.data		= &sysctl_nr_trim_pages,
2295 		.maxlen		= sizeof(sysctl_nr_trim_pages),
2296 		.mode		= 0644,
2297 		.proc_handler	= proc_dointvec_minmax,
2298 		.extra1		= SYSCTL_ZERO,
2299 	},
2300 #endif
2301 	{
2302 		.procname	= "vfs_cache_pressure",
2303 		.data		= &sysctl_vfs_cache_pressure,
2304 		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
2305 		.mode		= 0644,
2306 		.proc_handler	= proc_dointvec_minmax,
2307 		.extra1		= SYSCTL_ZERO,
2308 	},
2309 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
2310     defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
2311 	{
2312 		.procname	= "legacy_va_layout",
2313 		.data		= &sysctl_legacy_va_layout,
2314 		.maxlen		= sizeof(sysctl_legacy_va_layout),
2315 		.mode		= 0644,
2316 		.proc_handler	= proc_dointvec_minmax,
2317 		.extra1		= SYSCTL_ZERO,
2318 	},
2319 #endif
2320 #ifdef CONFIG_NUMA
2321 	{
2322 		.procname	= "zone_reclaim_mode",
2323 		.data		= &node_reclaim_mode,
2324 		.maxlen		= sizeof(node_reclaim_mode),
2325 		.mode		= 0644,
2326 		.proc_handler	= proc_dointvec_minmax,
2327 		.extra1		= SYSCTL_ZERO,
2328 	},
2329 	{
2330 		.procname	= "min_unmapped_ratio",
2331 		.data		= &sysctl_min_unmapped_ratio,
2332 		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
2333 		.mode		= 0644,
2334 		.proc_handler	= sysctl_min_unmapped_ratio_sysctl_handler,
2335 		.extra1		= SYSCTL_ZERO,
2336 		.extra2		= SYSCTL_ONE_HUNDRED,
2337 	},
2338 	{
2339 		.procname	= "min_slab_ratio",
2340 		.data		= &sysctl_min_slab_ratio,
2341 		.maxlen		= sizeof(sysctl_min_slab_ratio),
2342 		.mode		= 0644,
2343 		.proc_handler	= sysctl_min_slab_ratio_sysctl_handler,
2344 		.extra1		= SYSCTL_ZERO,
2345 		.extra2		= SYSCTL_ONE_HUNDRED,
2346 	},
2347 #endif
2348 #ifdef CONFIG_SMP
2349 	{
2350 		.procname	= "stat_interval",
2351 		.data		= &sysctl_stat_interval,
2352 		.maxlen		= sizeof(sysctl_stat_interval),
2353 		.mode		= 0644,
2354 		.proc_handler	= proc_dointvec_jiffies,
2355 	},
2356 	{
2357 		.procname	= "stat_refresh",
2358 		.data		= NULL,
2359 		.maxlen		= 0,
2360 		.mode		= 0600,
2361 		.proc_handler	= vmstat_refresh,
2362 	},
2363 #endif
2364 #ifdef CONFIG_MMU
2365 	{
2366 		.procname	= "mmap_min_addr",
2367 		.data		= &dac_mmap_min_addr,
2368 		.maxlen		= sizeof(unsigned long),
2369 		.mode		= 0644,
2370 		.proc_handler	= mmap_min_addr_handler,
2371 	},
2372 #endif
2373 #ifdef CONFIG_NUMA
2374 	{
2375 		.procname	= "numa_zonelist_order",
2376 		.data		= &numa_zonelist_order,
2377 		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
2378 		.mode		= 0644,
2379 		.proc_handler	= numa_zonelist_order_handler,
2380 	},
2381 #endif
2382 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
2383    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
2384 	{
2385 		.procname	= "vdso_enabled",
2386 #ifdef CONFIG_X86_32
2387 		.data		= &vdso32_enabled,
2388 		.maxlen		= sizeof(vdso32_enabled),
2389 #else
2390 		.data		= &vdso_enabled,
2391 		.maxlen		= sizeof(vdso_enabled),
2392 #endif
2393 		.mode		= 0644,
2394 		.proc_handler	= proc_dointvec,
2395 		.extra1		= SYSCTL_ZERO,
2396 	},
2397 #endif
2398 #ifdef CONFIG_MEMORY_FAILURE
2399 	{
2400 		.procname	= "memory_failure_early_kill",
2401 		.data		= &sysctl_memory_failure_early_kill,
2402 		.maxlen		= sizeof(sysctl_memory_failure_early_kill),
2403 		.mode		= 0644,
2404 		.proc_handler	= proc_dointvec_minmax,
2405 		.extra1		= SYSCTL_ZERO,
2406 		.extra2		= SYSCTL_ONE,
2407 	},
2408 	{
2409 		.procname	= "memory_failure_recovery",
2410 		.data		= &sysctl_memory_failure_recovery,
2411 		.maxlen		= sizeof(sysctl_memory_failure_recovery),
2412 		.mode		= 0644,
2413 		.proc_handler	= proc_dointvec_minmax,
2414 		.extra1		= SYSCTL_ZERO,
2415 		.extra2		= SYSCTL_ONE,
2416 	},
2417 #endif
2418 	{
2419 		.procname	= "user_reserve_kbytes",
2420 		.data		= &sysctl_user_reserve_kbytes,
2421 		.maxlen		= sizeof(sysctl_user_reserve_kbytes),
2422 		.mode		= 0644,
2423 		.proc_handler	= proc_doulongvec_minmax,
2424 	},
2425 	{
2426 		.procname	= "admin_reserve_kbytes",
2427 		.data		= &sysctl_admin_reserve_kbytes,
2428 		.maxlen		= sizeof(sysctl_admin_reserve_kbytes),
2429 		.mode		= 0644,
2430 		.proc_handler	= proc_doulongvec_minmax,
2431 	},
2432 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
2433 	{
2434 		.procname	= "mmap_rnd_bits",
2435 		.data		= &mmap_rnd_bits,
2436 		.maxlen		= sizeof(mmap_rnd_bits),
2437 		.mode		= 0600,
2438 		.proc_handler	= proc_dointvec_minmax,
2439 		.extra1		= (void *)&mmap_rnd_bits_min,
2440 		.extra2		= (void *)&mmap_rnd_bits_max,
2441 	},
2442 #endif
2443 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
2444 	{
2445 		.procname	= "mmap_rnd_compat_bits",
2446 		.data		= &mmap_rnd_compat_bits,
2447 		.maxlen		= sizeof(mmap_rnd_compat_bits),
2448 		.mode		= 0600,
2449 		.proc_handler	= proc_dointvec_minmax,
2450 		.extra1		= (void *)&mmap_rnd_compat_bits_min,
2451 		.extra2		= (void *)&mmap_rnd_compat_bits_max,
2452 	},
2453 #endif
2454 #ifdef CONFIG_USERFAULTFD
2455 	{
2456 		.procname	= "unprivileged_userfaultfd",
2457 		.data		= &sysctl_unprivileged_userfaultfd,
2458 		.maxlen		= sizeof(sysctl_unprivileged_userfaultfd),
2459 		.mode		= 0644,
2460 		.proc_handler	= proc_dointvec_minmax,
2461 		.extra1		= SYSCTL_ZERO,
2462 		.extra2		= SYSCTL_ONE,
2463 	},
2464 #endif
2465 	{ }
2466 };
2467 
2468 static struct ctl_table debug_table[] = {
2469 #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
2470 	{
2471 		.procname	= "exception-trace",
2472 		.data		= &show_unhandled_signals,
2473 		.maxlen		= sizeof(int),
2474 		.mode		= 0644,
2475 		.proc_handler	= proc_dointvec
2476 	},
2477 #endif
2478 	{ }
2479 };
2480 
2481 static struct ctl_table dev_table[] = {
2482 	{ }
2483 };
2484 
2485 DECLARE_SYSCTL_BASE(kernel, kern_table);
2486 DECLARE_SYSCTL_BASE(vm, vm_table);
2487 DECLARE_SYSCTL_BASE(debug, debug_table);
2488 DECLARE_SYSCTL_BASE(dev, dev_table);
2489 
2490 int __init sysctl_init_bases(void)
2491 {
2492 	register_sysctl_base(kernel);
2493 	register_sysctl_base(vm);
2494 	register_sysctl_base(debug);
2495 	register_sysctl_base(dev);
2496 
2497 	return 0;
2498 }
2499 #endif /* CONFIG_SYSCTL */
2500 /*
2501  * No sense putting this after each symbol definition, twice,
2502  * exception granted :-)
2503  */
2504 EXPORT_SYMBOL(proc_dobool);
2505 EXPORT_SYMBOL(proc_dointvec);
2506 EXPORT_SYMBOL(proc_douintvec);
2507 EXPORT_SYMBOL(proc_dointvec_jiffies);
2508 EXPORT_SYMBOL(proc_dointvec_minmax);
2509 EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
2510 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2511 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2512 EXPORT_SYMBOL(proc_dostring);
2513 EXPORT_SYMBOL(proc_doulongvec_minmax);
2514 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2515 EXPORT_SYMBOL(proc_do_large_bitmap);
2516