xref: /openbmc/linux/kernel/sysctl.c (revision f43e47c090dc7fe32d5410d8740c3a004eb2676f)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * sysctl.c: General linux system control interface
4  *
5  * Begun 24 March 1995, Stephen Tweedie
6  * Added /proc support, Dec 1995
7  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
8  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
9  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
10  * Dynamic registration fixes, Stephen Tweedie.
11  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
12  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
13  *  Horn.
14  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
15  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
16  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
17  *  Wendling.
18  * The list_for_each() macro wasn't appropriate for the sysctl loop.
19  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
20  */
21 
22 #include <linux/module.h>
23 #include <linux/mm.h>
24 #include <linux/swap.h>
25 #include <linux/slab.h>
26 #include <linux/sysctl.h>
27 #include <linux/bitmap.h>
28 #include <linux/signal.h>
29 #include <linux/panic.h>
30 #include <linux/printk.h>
31 #include <linux/proc_fs.h>
32 #include <linux/security.h>
33 #include <linux/ctype.h>
34 #include <linux/kmemleak.h>
35 #include <linux/filter.h>
36 #include <linux/fs.h>
37 #include <linux/init.h>
38 #include <linux/kernel.h>
39 #include <linux/kobject.h>
40 #include <linux/net.h>
41 #include <linux/sysrq.h>
42 #include <linux/highuid.h>
43 #include <linux/writeback.h>
44 #include <linux/ratelimit.h>
45 #include <linux/compaction.h>
46 #include <linux/hugetlb.h>
47 #include <linux/initrd.h>
48 #include <linux/key.h>
49 #include <linux/times.h>
50 #include <linux/limits.h>
51 #include <linux/dcache.h>
52 #include <linux/syscalls.h>
53 #include <linux/vmstat.h>
54 #include <linux/nfs_fs.h>
55 #include <linux/acpi.h>
56 #include <linux/reboot.h>
57 #include <linux/ftrace.h>
58 #include <linux/perf_event.h>
59 #include <linux/oom.h>
60 #include <linux/kmod.h>
61 #include <linux/capability.h>
62 #include <linux/binfmts.h>
63 #include <linux/sched/sysctl.h>
64 #include <linux/mount.h>
65 #include <linux/userfaultfd_k.h>
66 #include <linux/pid.h>
67 
68 #include "../lib/kstrtox.h"
69 
70 #include <linux/uaccess.h>
71 #include <asm/processor.h>
72 
73 #ifdef CONFIG_X86
74 #include <asm/nmi.h>
75 #include <asm/stacktrace.h>
76 #include <asm/io.h>
77 #endif
78 #ifdef CONFIG_SPARC
79 #include <asm/setup.h>
80 #endif
81 #ifdef CONFIG_RT_MUTEXES
82 #include <linux/rtmutex.h>
83 #endif
84 
85 /* shared constants to be used in various sysctls */
86 const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 };
87 EXPORT_SYMBOL(sysctl_vals);
88 
89 const unsigned long sysctl_long_vals[] = { 0, 1, LONG_MAX };
90 EXPORT_SYMBOL_GPL(sysctl_long_vals);
91 
92 #if defined(CONFIG_SYSCTL)
93 
94 /* Constants used for minimum and maximum */
95 
96 #ifdef CONFIG_PERF_EVENTS
97 static const int six_hundred_forty_kb = 640 * 1024;
98 #endif
99 
100 
101 static const int ngroups_max = NGROUPS_MAX;
102 static const int cap_last_cap = CAP_LAST_CAP;
103 
104 #ifdef CONFIG_PROC_SYSCTL
105 
106 /**
107  * enum sysctl_writes_mode - supported sysctl write modes
108  *
109  * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
110  *	to be written, and multiple writes on the same sysctl file descriptor
111  *	will rewrite the sysctl value, regardless of file position. No warning
112  *	is issued when the initial position is not 0.
113  * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
114  *	not 0.
115  * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
116  *	file position 0 and the value must be fully contained in the buffer
117  *	sent to the write syscall. If dealing with strings respect the file
118  *	position, but restrict this to the max length of the buffer, anything
119  *	passed the max length will be ignored. Multiple writes will append
120  *	to the buffer.
121  *
122  * These write modes control how current file position affects the behavior of
123  * updating sysctl values through the proc interface on each write.
124  */
125 enum sysctl_writes_mode {
126 	SYSCTL_WRITES_LEGACY		= -1,
127 	SYSCTL_WRITES_WARN		= 0,
128 	SYSCTL_WRITES_STRICT		= 1,
129 };
130 
131 static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
132 #endif /* CONFIG_PROC_SYSCTL */
133 
134 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
135     defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
136 int sysctl_legacy_va_layout;
137 #endif
138 
139 #endif /* CONFIG_SYSCTL */
140 
141 /*
142  * /proc/sys support
143  */
144 
145 #ifdef CONFIG_PROC_SYSCTL
146 
147 static int _proc_do_string(char *data, int maxlen, int write,
148 		char *buffer, size_t *lenp, loff_t *ppos)
149 {
150 	size_t len;
151 	char c, *p;
152 
153 	if (!data || !maxlen || !*lenp) {
154 		*lenp = 0;
155 		return 0;
156 	}
157 
158 	if (write) {
159 		if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
160 			/* Only continue writes not past the end of buffer. */
161 			len = strlen(data);
162 			if (len > maxlen - 1)
163 				len = maxlen - 1;
164 
165 			if (*ppos > len)
166 				return 0;
167 			len = *ppos;
168 		} else {
169 			/* Start writing from beginning of buffer. */
170 			len = 0;
171 		}
172 
173 		*ppos += *lenp;
174 		p = buffer;
175 		while ((p - buffer) < *lenp && len < maxlen - 1) {
176 			c = *(p++);
177 			if (c == 0 || c == '\n')
178 				break;
179 			data[len++] = c;
180 		}
181 		data[len] = 0;
182 	} else {
183 		len = strlen(data);
184 		if (len > maxlen)
185 			len = maxlen;
186 
187 		if (*ppos > len) {
188 			*lenp = 0;
189 			return 0;
190 		}
191 
192 		data += *ppos;
193 		len  -= *ppos;
194 
195 		if (len > *lenp)
196 			len = *lenp;
197 		if (len)
198 			memcpy(buffer, data, len);
199 		if (len < *lenp) {
200 			buffer[len] = '\n';
201 			len++;
202 		}
203 		*lenp = len;
204 		*ppos += len;
205 	}
206 	return 0;
207 }
208 
209 static void warn_sysctl_write(struct ctl_table *table)
210 {
211 	pr_warn_once("%s wrote to %s when file position was not 0!\n"
212 		"This will not be supported in the future. To silence this\n"
213 		"warning, set kernel.sysctl_writes_strict = -1\n",
214 		current->comm, table->procname);
215 }
216 
217 /**
218  * proc_first_pos_non_zero_ignore - check if first position is allowed
219  * @ppos: file position
220  * @table: the sysctl table
221  *
222  * Returns true if the first position is non-zero and the sysctl_writes_strict
223  * mode indicates this is not allowed for numeric input types. String proc
224  * handlers can ignore the return value.
225  */
226 static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
227 					   struct ctl_table *table)
228 {
229 	if (!*ppos)
230 		return false;
231 
232 	switch (sysctl_writes_strict) {
233 	case SYSCTL_WRITES_STRICT:
234 		return true;
235 	case SYSCTL_WRITES_WARN:
236 		warn_sysctl_write(table);
237 		return false;
238 	default:
239 		return false;
240 	}
241 }
242 
243 /**
244  * proc_dostring - read a string sysctl
245  * @table: the sysctl table
246  * @write: %TRUE if this is a write to the sysctl file
247  * @buffer: the user buffer
248  * @lenp: the size of the user buffer
249  * @ppos: file position
250  *
251  * Reads/writes a string from/to the user buffer. If the kernel
252  * buffer provided is not large enough to hold the string, the
253  * string is truncated. The copied string is %NULL-terminated.
254  * If the string is being read by the user process, it is copied
255  * and a newline '\n' is added. It is truncated if the buffer is
256  * not large enough.
257  *
258  * Returns 0 on success.
259  */
260 int proc_dostring(struct ctl_table *table, int write,
261 		  void *buffer, size_t *lenp, loff_t *ppos)
262 {
263 	if (write)
264 		proc_first_pos_non_zero_ignore(ppos, table);
265 
266 	return _proc_do_string(table->data, table->maxlen, write, buffer, lenp,
267 			ppos);
268 }
269 
270 static size_t proc_skip_spaces(char **buf)
271 {
272 	size_t ret;
273 	char *tmp = skip_spaces(*buf);
274 	ret = tmp - *buf;
275 	*buf = tmp;
276 	return ret;
277 }
278 
279 static void proc_skip_char(char **buf, size_t *size, const char v)
280 {
281 	while (*size) {
282 		if (**buf != v)
283 			break;
284 		(*size)--;
285 		(*buf)++;
286 	}
287 }
288 
289 /**
290  * strtoul_lenient - parse an ASCII formatted integer from a buffer and only
291  *                   fail on overflow
292  *
293  * @cp: kernel buffer containing the string to parse
294  * @endp: pointer to store the trailing characters
295  * @base: the base to use
296  * @res: where the parsed integer will be stored
297  *
298  * In case of success 0 is returned and @res will contain the parsed integer,
299  * @endp will hold any trailing characters.
300  * This function will fail the parse on overflow. If there wasn't an overflow
301  * the function will defer the decision what characters count as invalid to the
302  * caller.
303  */
304 static int strtoul_lenient(const char *cp, char **endp, unsigned int base,
305 			   unsigned long *res)
306 {
307 	unsigned long long result;
308 	unsigned int rv;
309 
310 	cp = _parse_integer_fixup_radix(cp, &base);
311 	rv = _parse_integer(cp, base, &result);
312 	if ((rv & KSTRTOX_OVERFLOW) || (result != (unsigned long)result))
313 		return -ERANGE;
314 
315 	cp += rv;
316 
317 	if (endp)
318 		*endp = (char *)cp;
319 
320 	*res = (unsigned long)result;
321 	return 0;
322 }
323 
324 #define TMPBUFLEN 22
325 /**
326  * proc_get_long - reads an ASCII formatted integer from a user buffer
327  *
328  * @buf: a kernel buffer
329  * @size: size of the kernel buffer
330  * @val: this is where the number will be stored
331  * @neg: set to %TRUE if number is negative
332  * @perm_tr: a vector which contains the allowed trailers
333  * @perm_tr_len: size of the perm_tr vector
334  * @tr: pointer to store the trailer character
335  *
336  * In case of success %0 is returned and @buf and @size are updated with
337  * the amount of bytes read. If @tr is non-NULL and a trailing
338  * character exists (size is non-zero after returning from this
339  * function), @tr is updated with the trailing character.
340  */
341 static int proc_get_long(char **buf, size_t *size,
342 			  unsigned long *val, bool *neg,
343 			  const char *perm_tr, unsigned perm_tr_len, char *tr)
344 {
345 	int len;
346 	char *p, tmp[TMPBUFLEN];
347 
348 	if (!*size)
349 		return -EINVAL;
350 
351 	len = *size;
352 	if (len > TMPBUFLEN - 1)
353 		len = TMPBUFLEN - 1;
354 
355 	memcpy(tmp, *buf, len);
356 
357 	tmp[len] = 0;
358 	p = tmp;
359 	if (*p == '-' && *size > 1) {
360 		*neg = true;
361 		p++;
362 	} else
363 		*neg = false;
364 	if (!isdigit(*p))
365 		return -EINVAL;
366 
367 	if (strtoul_lenient(p, &p, 0, val))
368 		return -EINVAL;
369 
370 	len = p - tmp;
371 
372 	/* We don't know if the next char is whitespace thus we may accept
373 	 * invalid integers (e.g. 1234...a) or two integers instead of one
374 	 * (e.g. 123...1). So lets not allow such large numbers. */
375 	if (len == TMPBUFLEN - 1)
376 		return -EINVAL;
377 
378 	if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
379 		return -EINVAL;
380 
381 	if (tr && (len < *size))
382 		*tr = *p;
383 
384 	*buf += len;
385 	*size -= len;
386 
387 	return 0;
388 }
389 
390 /**
391  * proc_put_long - converts an integer to a decimal ASCII formatted string
392  *
393  * @buf: the user buffer
394  * @size: the size of the user buffer
395  * @val: the integer to be converted
396  * @neg: sign of the number, %TRUE for negative
397  *
398  * In case of success @buf and @size are updated with the amount of bytes
399  * written.
400  */
401 static void proc_put_long(void **buf, size_t *size, unsigned long val, bool neg)
402 {
403 	int len;
404 	char tmp[TMPBUFLEN], *p = tmp;
405 
406 	sprintf(p, "%s%lu", neg ? "-" : "", val);
407 	len = strlen(tmp);
408 	if (len > *size)
409 		len = *size;
410 	memcpy(*buf, tmp, len);
411 	*size -= len;
412 	*buf += len;
413 }
414 #undef TMPBUFLEN
415 
416 static void proc_put_char(void **buf, size_t *size, char c)
417 {
418 	if (*size) {
419 		char **buffer = (char **)buf;
420 		**buffer = c;
421 
422 		(*size)--;
423 		(*buffer)++;
424 		*buf = *buffer;
425 	}
426 }
427 
428 static int do_proc_dobool_conv(bool *negp, unsigned long *lvalp,
429 				int *valp,
430 				int write, void *data)
431 {
432 	if (write) {
433 		*(bool *)valp = *lvalp;
434 	} else {
435 		int val = *(bool *)valp;
436 
437 		*lvalp = (unsigned long)val;
438 		*negp = false;
439 	}
440 	return 0;
441 }
442 
443 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
444 				 int *valp,
445 				 int write, void *data)
446 {
447 	if (write) {
448 		if (*negp) {
449 			if (*lvalp > (unsigned long) INT_MAX + 1)
450 				return -EINVAL;
451 			WRITE_ONCE(*valp, -*lvalp);
452 		} else {
453 			if (*lvalp > (unsigned long) INT_MAX)
454 				return -EINVAL;
455 			WRITE_ONCE(*valp, *lvalp);
456 		}
457 	} else {
458 		int val = READ_ONCE(*valp);
459 		if (val < 0) {
460 			*negp = true;
461 			*lvalp = -(unsigned long)val;
462 		} else {
463 			*negp = false;
464 			*lvalp = (unsigned long)val;
465 		}
466 	}
467 	return 0;
468 }
469 
470 static int do_proc_douintvec_conv(unsigned long *lvalp,
471 				  unsigned int *valp,
472 				  int write, void *data)
473 {
474 	if (write) {
475 		if (*lvalp > UINT_MAX)
476 			return -EINVAL;
477 		WRITE_ONCE(*valp, *lvalp);
478 	} else {
479 		unsigned int val = READ_ONCE(*valp);
480 		*lvalp = (unsigned long)val;
481 	}
482 	return 0;
483 }
484 
485 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
486 
487 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
488 		  int write, void *buffer,
489 		  size_t *lenp, loff_t *ppos,
490 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
491 			      int write, void *data),
492 		  void *data)
493 {
494 	int *i, vleft, first = 1, err = 0;
495 	size_t left;
496 	char *p;
497 
498 	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
499 		*lenp = 0;
500 		return 0;
501 	}
502 
503 	i = (int *) tbl_data;
504 	vleft = table->maxlen / sizeof(*i);
505 	left = *lenp;
506 
507 	if (!conv)
508 		conv = do_proc_dointvec_conv;
509 
510 	if (write) {
511 		if (proc_first_pos_non_zero_ignore(ppos, table))
512 			goto out;
513 
514 		if (left > PAGE_SIZE - 1)
515 			left = PAGE_SIZE - 1;
516 		p = buffer;
517 	}
518 
519 	for (; left && vleft--; i++, first=0) {
520 		unsigned long lval;
521 		bool neg;
522 
523 		if (write) {
524 			left -= proc_skip_spaces(&p);
525 
526 			if (!left)
527 				break;
528 			err = proc_get_long(&p, &left, &lval, &neg,
529 					     proc_wspace_sep,
530 					     sizeof(proc_wspace_sep), NULL);
531 			if (err)
532 				break;
533 			if (conv(&neg, &lval, i, 1, data)) {
534 				err = -EINVAL;
535 				break;
536 			}
537 		} else {
538 			if (conv(&neg, &lval, i, 0, data)) {
539 				err = -EINVAL;
540 				break;
541 			}
542 			if (!first)
543 				proc_put_char(&buffer, &left, '\t');
544 			proc_put_long(&buffer, &left, lval, neg);
545 		}
546 	}
547 
548 	if (!write && !first && left && !err)
549 		proc_put_char(&buffer, &left, '\n');
550 	if (write && !err && left)
551 		left -= proc_skip_spaces(&p);
552 	if (write && first)
553 		return err ? : -EINVAL;
554 	*lenp -= left;
555 out:
556 	*ppos += *lenp;
557 	return err;
558 }
559 
560 static int do_proc_dointvec(struct ctl_table *table, int write,
561 		  void *buffer, size_t *lenp, loff_t *ppos,
562 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
563 			      int write, void *data),
564 		  void *data)
565 {
566 	return __do_proc_dointvec(table->data, table, write,
567 			buffer, lenp, ppos, conv, data);
568 }
569 
570 static int do_proc_douintvec_w(unsigned int *tbl_data,
571 			       struct ctl_table *table,
572 			       void *buffer,
573 			       size_t *lenp, loff_t *ppos,
574 			       int (*conv)(unsigned long *lvalp,
575 					   unsigned int *valp,
576 					   int write, void *data),
577 			       void *data)
578 {
579 	unsigned long lval;
580 	int err = 0;
581 	size_t left;
582 	bool neg;
583 	char *p = buffer;
584 
585 	left = *lenp;
586 
587 	if (proc_first_pos_non_zero_ignore(ppos, table))
588 		goto bail_early;
589 
590 	if (left > PAGE_SIZE - 1)
591 		left = PAGE_SIZE - 1;
592 
593 	left -= proc_skip_spaces(&p);
594 	if (!left) {
595 		err = -EINVAL;
596 		goto out_free;
597 	}
598 
599 	err = proc_get_long(&p, &left, &lval, &neg,
600 			     proc_wspace_sep,
601 			     sizeof(proc_wspace_sep), NULL);
602 	if (err || neg) {
603 		err = -EINVAL;
604 		goto out_free;
605 	}
606 
607 	if (conv(&lval, tbl_data, 1, data)) {
608 		err = -EINVAL;
609 		goto out_free;
610 	}
611 
612 	if (!err && left)
613 		left -= proc_skip_spaces(&p);
614 
615 out_free:
616 	if (err)
617 		return -EINVAL;
618 
619 	return 0;
620 
621 	/* This is in keeping with old __do_proc_dointvec() */
622 bail_early:
623 	*ppos += *lenp;
624 	return err;
625 }
626 
627 static int do_proc_douintvec_r(unsigned int *tbl_data, void *buffer,
628 			       size_t *lenp, loff_t *ppos,
629 			       int (*conv)(unsigned long *lvalp,
630 					   unsigned int *valp,
631 					   int write, void *data),
632 			       void *data)
633 {
634 	unsigned long lval;
635 	int err = 0;
636 	size_t left;
637 
638 	left = *lenp;
639 
640 	if (conv(&lval, tbl_data, 0, data)) {
641 		err = -EINVAL;
642 		goto out;
643 	}
644 
645 	proc_put_long(&buffer, &left, lval, false);
646 	if (!left)
647 		goto out;
648 
649 	proc_put_char(&buffer, &left, '\n');
650 
651 out:
652 	*lenp -= left;
653 	*ppos += *lenp;
654 
655 	return err;
656 }
657 
658 static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
659 			       int write, void *buffer,
660 			       size_t *lenp, loff_t *ppos,
661 			       int (*conv)(unsigned long *lvalp,
662 					   unsigned int *valp,
663 					   int write, void *data),
664 			       void *data)
665 {
666 	unsigned int *i, vleft;
667 
668 	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
669 		*lenp = 0;
670 		return 0;
671 	}
672 
673 	i = (unsigned int *) tbl_data;
674 	vleft = table->maxlen / sizeof(*i);
675 
676 	/*
677 	 * Arrays are not supported, keep this simple. *Do not* add
678 	 * support for them.
679 	 */
680 	if (vleft != 1) {
681 		*lenp = 0;
682 		return -EINVAL;
683 	}
684 
685 	if (!conv)
686 		conv = do_proc_douintvec_conv;
687 
688 	if (write)
689 		return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
690 					   conv, data);
691 	return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
692 }
693 
694 int do_proc_douintvec(struct ctl_table *table, int write,
695 		      void *buffer, size_t *lenp, loff_t *ppos,
696 		      int (*conv)(unsigned long *lvalp,
697 				  unsigned int *valp,
698 				  int write, void *data),
699 		      void *data)
700 {
701 	return __do_proc_douintvec(table->data, table, write,
702 				   buffer, lenp, ppos, conv, data);
703 }
704 
705 /**
706  * proc_dobool - read/write a bool
707  * @table: the sysctl table
708  * @write: %TRUE if this is a write to the sysctl file
709  * @buffer: the user buffer
710  * @lenp: the size of the user buffer
711  * @ppos: file position
712  *
713  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
714  * values from/to the user buffer, treated as an ASCII string.
715  *
716  * Returns 0 on success.
717  */
718 int proc_dobool(struct ctl_table *table, int write, void *buffer,
719 		size_t *lenp, loff_t *ppos)
720 {
721 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
722 				do_proc_dobool_conv, NULL);
723 }
724 
725 /**
726  * proc_dointvec - read a vector of integers
727  * @table: the sysctl table
728  * @write: %TRUE if this is a write to the sysctl file
729  * @buffer: the user buffer
730  * @lenp: the size of the user buffer
731  * @ppos: file position
732  *
733  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
734  * values from/to the user buffer, treated as an ASCII string.
735  *
736  * Returns 0 on success.
737  */
738 int proc_dointvec(struct ctl_table *table, int write, void *buffer,
739 		  size_t *lenp, loff_t *ppos)
740 {
741 	return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
742 }
743 
744 #ifdef CONFIG_COMPACTION
745 static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table,
746 		int write, void *buffer, size_t *lenp, loff_t *ppos)
747 {
748 	int ret, old;
749 
750 	if (!IS_ENABLED(CONFIG_PREEMPT_RT) || !write)
751 		return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
752 
753 	old = *(int *)table->data;
754 	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
755 	if (ret)
756 		return ret;
757 	if (old != *(int *)table->data)
758 		pr_warn_once("sysctl attribute %s changed by %s[%d]\n",
759 			     table->procname, current->comm,
760 			     task_pid_nr(current));
761 	return ret;
762 }
763 #endif
764 
765 /**
766  * proc_douintvec - read a vector of unsigned integers
767  * @table: the sysctl table
768  * @write: %TRUE if this is a write to the sysctl file
769  * @buffer: the user buffer
770  * @lenp: the size of the user buffer
771  * @ppos: file position
772  *
773  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
774  * values from/to the user buffer, treated as an ASCII string.
775  *
776  * Returns 0 on success.
777  */
778 int proc_douintvec(struct ctl_table *table, int write, void *buffer,
779 		size_t *lenp, loff_t *ppos)
780 {
781 	return do_proc_douintvec(table, write, buffer, lenp, ppos,
782 				 do_proc_douintvec_conv, NULL);
783 }
784 
785 /*
786  * Taint values can only be increased
787  * This means we can safely use a temporary.
788  */
789 static int proc_taint(struct ctl_table *table, int write,
790 			       void *buffer, size_t *lenp, loff_t *ppos)
791 {
792 	struct ctl_table t;
793 	unsigned long tmptaint = get_taint();
794 	int err;
795 
796 	if (write && !capable(CAP_SYS_ADMIN))
797 		return -EPERM;
798 
799 	t = *table;
800 	t.data = &tmptaint;
801 	err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
802 	if (err < 0)
803 		return err;
804 
805 	if (write) {
806 		int i;
807 
808 		/*
809 		 * If we are relying on panic_on_taint not producing
810 		 * false positives due to userspace input, bail out
811 		 * before setting the requested taint flags.
812 		 */
813 		if (panic_on_taint_nousertaint && (tmptaint & panic_on_taint))
814 			return -EINVAL;
815 
816 		/*
817 		 * Poor man's atomic or. Not worth adding a primitive
818 		 * to everyone's atomic.h for this
819 		 */
820 		for (i = 0; i < TAINT_FLAGS_COUNT; i++)
821 			if ((1UL << i) & tmptaint)
822 				add_taint(i, LOCKDEP_STILL_OK);
823 	}
824 
825 	return err;
826 }
827 
828 /**
829  * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
830  * @min: pointer to minimum allowable value
831  * @max: pointer to maximum allowable value
832  *
833  * The do_proc_dointvec_minmax_conv_param structure provides the
834  * minimum and maximum values for doing range checking for those sysctl
835  * parameters that use the proc_dointvec_minmax() handler.
836  */
837 struct do_proc_dointvec_minmax_conv_param {
838 	int *min;
839 	int *max;
840 };
841 
842 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
843 					int *valp,
844 					int write, void *data)
845 {
846 	int tmp, ret;
847 	struct do_proc_dointvec_minmax_conv_param *param = data;
848 	/*
849 	 * If writing, first do so via a temporary local int so we can
850 	 * bounds-check it before touching *valp.
851 	 */
852 	int *ip = write ? &tmp : valp;
853 
854 	ret = do_proc_dointvec_conv(negp, lvalp, ip, write, data);
855 	if (ret)
856 		return ret;
857 
858 	if (write) {
859 		if ((param->min && *param->min > tmp) ||
860 		    (param->max && *param->max < tmp))
861 			return -EINVAL;
862 		WRITE_ONCE(*valp, tmp);
863 	}
864 
865 	return 0;
866 }
867 
868 /**
869  * proc_dointvec_minmax - read a vector of integers with min/max values
870  * @table: the sysctl table
871  * @write: %TRUE if this is a write to the sysctl file
872  * @buffer: the user buffer
873  * @lenp: the size of the user buffer
874  * @ppos: file position
875  *
876  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
877  * values from/to the user buffer, treated as an ASCII string.
878  *
879  * This routine will ensure the values are within the range specified by
880  * table->extra1 (min) and table->extra2 (max).
881  *
882  * Returns 0 on success or -EINVAL on write when the range check fails.
883  */
884 int proc_dointvec_minmax(struct ctl_table *table, int write,
885 		  void *buffer, size_t *lenp, loff_t *ppos)
886 {
887 	struct do_proc_dointvec_minmax_conv_param param = {
888 		.min = (int *) table->extra1,
889 		.max = (int *) table->extra2,
890 	};
891 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
892 				do_proc_dointvec_minmax_conv, &param);
893 }
894 
895 /**
896  * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
897  * @min: pointer to minimum allowable value
898  * @max: pointer to maximum allowable value
899  *
900  * The do_proc_douintvec_minmax_conv_param structure provides the
901  * minimum and maximum values for doing range checking for those sysctl
902  * parameters that use the proc_douintvec_minmax() handler.
903  */
904 struct do_proc_douintvec_minmax_conv_param {
905 	unsigned int *min;
906 	unsigned int *max;
907 };
908 
909 static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
910 					 unsigned int *valp,
911 					 int write, void *data)
912 {
913 	int ret;
914 	unsigned int tmp;
915 	struct do_proc_douintvec_minmax_conv_param *param = data;
916 	/* write via temporary local uint for bounds-checking */
917 	unsigned int *up = write ? &tmp : valp;
918 
919 	ret = do_proc_douintvec_conv(lvalp, up, write, data);
920 	if (ret)
921 		return ret;
922 
923 	if (write) {
924 		if ((param->min && *param->min > tmp) ||
925 		    (param->max && *param->max < tmp))
926 			return -ERANGE;
927 
928 		WRITE_ONCE(*valp, tmp);
929 	}
930 
931 	return 0;
932 }
933 
934 /**
935  * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
936  * @table: the sysctl table
937  * @write: %TRUE if this is a write to the sysctl file
938  * @buffer: the user buffer
939  * @lenp: the size of the user buffer
940  * @ppos: file position
941  *
942  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
943  * values from/to the user buffer, treated as an ASCII string. Negative
944  * strings are not allowed.
945  *
946  * This routine will ensure the values are within the range specified by
947  * table->extra1 (min) and table->extra2 (max). There is a final sanity
948  * check for UINT_MAX to avoid having to support wrap around uses from
949  * userspace.
950  *
951  * Returns 0 on success or -ERANGE on write when the range check fails.
952  */
953 int proc_douintvec_minmax(struct ctl_table *table, int write,
954 			  void *buffer, size_t *lenp, loff_t *ppos)
955 {
956 	struct do_proc_douintvec_minmax_conv_param param = {
957 		.min = (unsigned int *) table->extra1,
958 		.max = (unsigned int *) table->extra2,
959 	};
960 	return do_proc_douintvec(table, write, buffer, lenp, ppos,
961 				 do_proc_douintvec_minmax_conv, &param);
962 }
963 
964 /**
965  * proc_dou8vec_minmax - read a vector of unsigned chars with min/max values
966  * @table: the sysctl table
967  * @write: %TRUE if this is a write to the sysctl file
968  * @buffer: the user buffer
969  * @lenp: the size of the user buffer
970  * @ppos: file position
971  *
972  * Reads/writes up to table->maxlen/sizeof(u8) unsigned chars
973  * values from/to the user buffer, treated as an ASCII string. Negative
974  * strings are not allowed.
975  *
976  * This routine will ensure the values are within the range specified by
977  * table->extra1 (min) and table->extra2 (max).
978  *
979  * Returns 0 on success or an error on write when the range check fails.
980  */
981 int proc_dou8vec_minmax(struct ctl_table *table, int write,
982 			void *buffer, size_t *lenp, loff_t *ppos)
983 {
984 	struct ctl_table tmp;
985 	unsigned int min = 0, max = 255U, val;
986 	u8 *data = table->data;
987 	struct do_proc_douintvec_minmax_conv_param param = {
988 		.min = &min,
989 		.max = &max,
990 	};
991 	int res;
992 
993 	/* Do not support arrays yet. */
994 	if (table->maxlen != sizeof(u8))
995 		return -EINVAL;
996 
997 	if (table->extra1) {
998 		min = *(unsigned int *) table->extra1;
999 		if (min > 255U)
1000 			return -EINVAL;
1001 	}
1002 	if (table->extra2) {
1003 		max = *(unsigned int *) table->extra2;
1004 		if (max > 255U)
1005 			return -EINVAL;
1006 	}
1007 
1008 	tmp = *table;
1009 
1010 	tmp.maxlen = sizeof(val);
1011 	tmp.data = &val;
1012 	val = READ_ONCE(*data);
1013 	res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos,
1014 				do_proc_douintvec_minmax_conv, &param);
1015 	if (res)
1016 		return res;
1017 	if (write)
1018 		WRITE_ONCE(*data, val);
1019 	return 0;
1020 }
1021 EXPORT_SYMBOL_GPL(proc_dou8vec_minmax);
1022 
1023 #ifdef CONFIG_MAGIC_SYSRQ
1024 static int sysrq_sysctl_handler(struct ctl_table *table, int write,
1025 				void *buffer, size_t *lenp, loff_t *ppos)
1026 {
1027 	int tmp, ret;
1028 
1029 	tmp = sysrq_mask();
1030 
1031 	ret = __do_proc_dointvec(&tmp, table, write, buffer,
1032 			       lenp, ppos, NULL, NULL);
1033 	if (ret || !write)
1034 		return ret;
1035 
1036 	if (write)
1037 		sysrq_toggle_support(tmp);
1038 
1039 	return 0;
1040 }
1041 #endif
1042 
1043 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table,
1044 		int write, void *buffer, size_t *lenp, loff_t *ppos,
1045 		unsigned long convmul, unsigned long convdiv)
1046 {
1047 	unsigned long *i, *min, *max;
1048 	int vleft, first = 1, err = 0;
1049 	size_t left;
1050 	char *p;
1051 
1052 	if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
1053 		*lenp = 0;
1054 		return 0;
1055 	}
1056 
1057 	i = data;
1058 	min = table->extra1;
1059 	max = table->extra2;
1060 	vleft = table->maxlen / sizeof(unsigned long);
1061 	left = *lenp;
1062 
1063 	if (write) {
1064 		if (proc_first_pos_non_zero_ignore(ppos, table))
1065 			goto out;
1066 
1067 		if (left > PAGE_SIZE - 1)
1068 			left = PAGE_SIZE - 1;
1069 		p = buffer;
1070 	}
1071 
1072 	for (; left && vleft--; i++, first = 0) {
1073 		unsigned long val;
1074 
1075 		if (write) {
1076 			bool neg;
1077 
1078 			left -= proc_skip_spaces(&p);
1079 			if (!left)
1080 				break;
1081 
1082 			err = proc_get_long(&p, &left, &val, &neg,
1083 					     proc_wspace_sep,
1084 					     sizeof(proc_wspace_sep), NULL);
1085 			if (err || neg) {
1086 				err = -EINVAL;
1087 				break;
1088 			}
1089 
1090 			val = convmul * val / convdiv;
1091 			if ((min && val < *min) || (max && val > *max)) {
1092 				err = -EINVAL;
1093 				break;
1094 			}
1095 			WRITE_ONCE(*i, val);
1096 		} else {
1097 			val = convdiv * READ_ONCE(*i) / convmul;
1098 			if (!first)
1099 				proc_put_char(&buffer, &left, '\t');
1100 			proc_put_long(&buffer, &left, val, false);
1101 		}
1102 	}
1103 
1104 	if (!write && !first && left && !err)
1105 		proc_put_char(&buffer, &left, '\n');
1106 	if (write && !err)
1107 		left -= proc_skip_spaces(&p);
1108 	if (write && first)
1109 		return err ? : -EINVAL;
1110 	*lenp -= left;
1111 out:
1112 	*ppos += *lenp;
1113 	return err;
1114 }
1115 
1116 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
1117 		void *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul,
1118 		unsigned long convdiv)
1119 {
1120 	return __do_proc_doulongvec_minmax(table->data, table, write,
1121 			buffer, lenp, ppos, convmul, convdiv);
1122 }
1123 
1124 /**
1125  * proc_doulongvec_minmax - read a vector of long integers with min/max values
1126  * @table: the sysctl table
1127  * @write: %TRUE if this is a write to the sysctl file
1128  * @buffer: the user buffer
1129  * @lenp: the size of the user buffer
1130  * @ppos: file position
1131  *
1132  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1133  * values from/to the user buffer, treated as an ASCII string.
1134  *
1135  * This routine will ensure the values are within the range specified by
1136  * table->extra1 (min) and table->extra2 (max).
1137  *
1138  * Returns 0 on success.
1139  */
1140 int proc_doulongvec_minmax(struct ctl_table *table, int write,
1141 			   void *buffer, size_t *lenp, loff_t *ppos)
1142 {
1143     return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
1144 }
1145 
1146 /**
1147  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
1148  * @table: the sysctl table
1149  * @write: %TRUE if this is a write to the sysctl file
1150  * @buffer: the user buffer
1151  * @lenp: the size of the user buffer
1152  * @ppos: file position
1153  *
1154  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1155  * values from/to the user buffer, treated as an ASCII string. The values
1156  * are treated as milliseconds, and converted to jiffies when they are stored.
1157  *
1158  * This routine will ensure the values are within the range specified by
1159  * table->extra1 (min) and table->extra2 (max).
1160  *
1161  * Returns 0 on success.
1162  */
1163 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1164 				      void *buffer, size_t *lenp, loff_t *ppos)
1165 {
1166     return do_proc_doulongvec_minmax(table, write, buffer,
1167 				     lenp, ppos, HZ, 1000l);
1168 }
1169 
1170 
1171 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
1172 					 int *valp,
1173 					 int write, void *data)
1174 {
1175 	if (write) {
1176 		if (*lvalp > INT_MAX / HZ)
1177 			return 1;
1178 		if (*negp)
1179 			WRITE_ONCE(*valp, -*lvalp * HZ);
1180 		else
1181 			WRITE_ONCE(*valp, *lvalp * HZ);
1182 	} else {
1183 		int val = READ_ONCE(*valp);
1184 		unsigned long lval;
1185 		if (val < 0) {
1186 			*negp = true;
1187 			lval = -(unsigned long)val;
1188 		} else {
1189 			*negp = false;
1190 			lval = (unsigned long)val;
1191 		}
1192 		*lvalp = lval / HZ;
1193 	}
1194 	return 0;
1195 }
1196 
1197 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
1198 						int *valp,
1199 						int write, void *data)
1200 {
1201 	if (write) {
1202 		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
1203 			return 1;
1204 		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
1205 	} else {
1206 		int val = *valp;
1207 		unsigned long lval;
1208 		if (val < 0) {
1209 			*negp = true;
1210 			lval = -(unsigned long)val;
1211 		} else {
1212 			*negp = false;
1213 			lval = (unsigned long)val;
1214 		}
1215 		*lvalp = jiffies_to_clock_t(lval);
1216 	}
1217 	return 0;
1218 }
1219 
1220 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
1221 					    int *valp,
1222 					    int write, void *data)
1223 {
1224 	if (write) {
1225 		unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
1226 
1227 		if (jif > INT_MAX)
1228 			return 1;
1229 		WRITE_ONCE(*valp, (int)jif);
1230 	} else {
1231 		int val = READ_ONCE(*valp);
1232 		unsigned long lval;
1233 		if (val < 0) {
1234 			*negp = true;
1235 			lval = -(unsigned long)val;
1236 		} else {
1237 			*negp = false;
1238 			lval = (unsigned long)val;
1239 		}
1240 		*lvalp = jiffies_to_msecs(lval);
1241 	}
1242 	return 0;
1243 }
1244 
1245 static int do_proc_dointvec_ms_jiffies_minmax_conv(bool *negp, unsigned long *lvalp,
1246 						int *valp, int write, void *data)
1247 {
1248 	int tmp, ret;
1249 	struct do_proc_dointvec_minmax_conv_param *param = data;
1250 	/*
1251 	 * If writing, first do so via a temporary local int so we can
1252 	 * bounds-check it before touching *valp.
1253 	 */
1254 	int *ip = write ? &tmp : valp;
1255 
1256 	ret = do_proc_dointvec_ms_jiffies_conv(negp, lvalp, ip, write, data);
1257 	if (ret)
1258 		return ret;
1259 
1260 	if (write) {
1261 		if ((param->min && *param->min > tmp) ||
1262 				(param->max && *param->max < tmp))
1263 			return -EINVAL;
1264 		*valp = tmp;
1265 	}
1266 	return 0;
1267 }
1268 
1269 /**
1270  * proc_dointvec_jiffies - read a vector of integers as seconds
1271  * @table: the sysctl table
1272  * @write: %TRUE if this is a write to the sysctl file
1273  * @buffer: the user buffer
1274  * @lenp: the size of the user buffer
1275  * @ppos: file position
1276  *
1277  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1278  * values from/to the user buffer, treated as an ASCII string.
1279  * The values read are assumed to be in seconds, and are converted into
1280  * jiffies.
1281  *
1282  * Returns 0 on success.
1283  */
1284 int proc_dointvec_jiffies(struct ctl_table *table, int write,
1285 			  void *buffer, size_t *lenp, loff_t *ppos)
1286 {
1287     return do_proc_dointvec(table,write,buffer,lenp,ppos,
1288 		    	    do_proc_dointvec_jiffies_conv,NULL);
1289 }
1290 
1291 int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1292 			  void *buffer, size_t *lenp, loff_t *ppos)
1293 {
1294 	struct do_proc_dointvec_minmax_conv_param param = {
1295 		.min = (int *) table->extra1,
1296 		.max = (int *) table->extra2,
1297 	};
1298 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
1299 			do_proc_dointvec_ms_jiffies_minmax_conv, &param);
1300 }
1301 
1302 /**
1303  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
1304  * @table: the sysctl table
1305  * @write: %TRUE if this is a write to the sysctl file
1306  * @buffer: the user buffer
1307  * @lenp: the size of the user buffer
1308  * @ppos: pointer to the file position
1309  *
1310  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1311  * values from/to the user buffer, treated as an ASCII string.
1312  * The values read are assumed to be in 1/USER_HZ seconds, and
1313  * are converted into jiffies.
1314  *
1315  * Returns 0 on success.
1316  */
1317 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
1318 				 void *buffer, size_t *lenp, loff_t *ppos)
1319 {
1320 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
1321 				do_proc_dointvec_userhz_jiffies_conv, NULL);
1322 }
1323 
1324 /**
1325  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
1326  * @table: the sysctl table
1327  * @write: %TRUE if this is a write to the sysctl file
1328  * @buffer: the user buffer
1329  * @lenp: the size of the user buffer
1330  * @ppos: file position
1331  * @ppos: the current position in the file
1332  *
1333  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1334  * values from/to the user buffer, treated as an ASCII string.
1335  * The values read are assumed to be in 1/1000 seconds, and
1336  * are converted into jiffies.
1337  *
1338  * Returns 0 on success.
1339  */
1340 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, void *buffer,
1341 		size_t *lenp, loff_t *ppos)
1342 {
1343 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
1344 				do_proc_dointvec_ms_jiffies_conv, NULL);
1345 }
1346 
1347 static int proc_do_cad_pid(struct ctl_table *table, int write, void *buffer,
1348 		size_t *lenp, loff_t *ppos)
1349 {
1350 	struct pid *new_pid;
1351 	pid_t tmp;
1352 	int r;
1353 
1354 	tmp = pid_vnr(cad_pid);
1355 
1356 	r = __do_proc_dointvec(&tmp, table, write, buffer,
1357 			       lenp, ppos, NULL, NULL);
1358 	if (r || !write)
1359 		return r;
1360 
1361 	new_pid = find_get_pid(tmp);
1362 	if (!new_pid)
1363 		return -ESRCH;
1364 
1365 	put_pid(xchg(&cad_pid, new_pid));
1366 	return 0;
1367 }
1368 
1369 /**
1370  * proc_do_large_bitmap - read/write from/to a large bitmap
1371  * @table: the sysctl table
1372  * @write: %TRUE if this is a write to the sysctl file
1373  * @buffer: the user buffer
1374  * @lenp: the size of the user buffer
1375  * @ppos: file position
1376  *
1377  * The bitmap is stored at table->data and the bitmap length (in bits)
1378  * in table->maxlen.
1379  *
1380  * We use a range comma separated format (e.g. 1,3-4,10-10) so that
1381  * large bitmaps may be represented in a compact manner. Writing into
1382  * the file will clear the bitmap then update it with the given input.
1383  *
1384  * Returns 0 on success.
1385  */
1386 int proc_do_large_bitmap(struct ctl_table *table, int write,
1387 			 void *buffer, size_t *lenp, loff_t *ppos)
1388 {
1389 	int err = 0;
1390 	size_t left = *lenp;
1391 	unsigned long bitmap_len = table->maxlen;
1392 	unsigned long *bitmap = *(unsigned long **) table->data;
1393 	unsigned long *tmp_bitmap = NULL;
1394 	char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
1395 
1396 	if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
1397 		*lenp = 0;
1398 		return 0;
1399 	}
1400 
1401 	if (write) {
1402 		char *p = buffer;
1403 		size_t skipped = 0;
1404 
1405 		if (left > PAGE_SIZE - 1) {
1406 			left = PAGE_SIZE - 1;
1407 			/* How much of the buffer we'll skip this pass */
1408 			skipped = *lenp - left;
1409 		}
1410 
1411 		tmp_bitmap = bitmap_zalloc(bitmap_len, GFP_KERNEL);
1412 		if (!tmp_bitmap)
1413 			return -ENOMEM;
1414 		proc_skip_char(&p, &left, '\n');
1415 		while (!err && left) {
1416 			unsigned long val_a, val_b;
1417 			bool neg;
1418 			size_t saved_left;
1419 
1420 			/* In case we stop parsing mid-number, we can reset */
1421 			saved_left = left;
1422 			err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
1423 					     sizeof(tr_a), &c);
1424 			/*
1425 			 * If we consumed the entirety of a truncated buffer or
1426 			 * only one char is left (may be a "-"), then stop here,
1427 			 * reset, & come back for more.
1428 			 */
1429 			if ((left <= 1) && skipped) {
1430 				left = saved_left;
1431 				break;
1432 			}
1433 
1434 			if (err)
1435 				break;
1436 			if (val_a >= bitmap_len || neg) {
1437 				err = -EINVAL;
1438 				break;
1439 			}
1440 
1441 			val_b = val_a;
1442 			if (left) {
1443 				p++;
1444 				left--;
1445 			}
1446 
1447 			if (c == '-') {
1448 				err = proc_get_long(&p, &left, &val_b,
1449 						     &neg, tr_b, sizeof(tr_b),
1450 						     &c);
1451 				/*
1452 				 * If we consumed all of a truncated buffer or
1453 				 * then stop here, reset, & come back for more.
1454 				 */
1455 				if (!left && skipped) {
1456 					left = saved_left;
1457 					break;
1458 				}
1459 
1460 				if (err)
1461 					break;
1462 				if (val_b >= bitmap_len || neg ||
1463 				    val_a > val_b) {
1464 					err = -EINVAL;
1465 					break;
1466 				}
1467 				if (left) {
1468 					p++;
1469 					left--;
1470 				}
1471 			}
1472 
1473 			bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
1474 			proc_skip_char(&p, &left, '\n');
1475 		}
1476 		left += skipped;
1477 	} else {
1478 		unsigned long bit_a, bit_b = 0;
1479 		bool first = 1;
1480 
1481 		while (left) {
1482 			bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
1483 			if (bit_a >= bitmap_len)
1484 				break;
1485 			bit_b = find_next_zero_bit(bitmap, bitmap_len,
1486 						   bit_a + 1) - 1;
1487 
1488 			if (!first)
1489 				proc_put_char(&buffer, &left, ',');
1490 			proc_put_long(&buffer, &left, bit_a, false);
1491 			if (bit_a != bit_b) {
1492 				proc_put_char(&buffer, &left, '-');
1493 				proc_put_long(&buffer, &left, bit_b, false);
1494 			}
1495 
1496 			first = 0; bit_b++;
1497 		}
1498 		proc_put_char(&buffer, &left, '\n');
1499 	}
1500 
1501 	if (!err) {
1502 		if (write) {
1503 			if (*ppos)
1504 				bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
1505 			else
1506 				bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
1507 		}
1508 		*lenp -= left;
1509 		*ppos += *lenp;
1510 	}
1511 
1512 	bitmap_free(tmp_bitmap);
1513 	return err;
1514 }
1515 
1516 #else /* CONFIG_PROC_SYSCTL */
1517 
1518 int proc_dostring(struct ctl_table *table, int write,
1519 		  void *buffer, size_t *lenp, loff_t *ppos)
1520 {
1521 	return -ENOSYS;
1522 }
1523 
1524 int proc_dobool(struct ctl_table *table, int write,
1525 		void *buffer, size_t *lenp, loff_t *ppos)
1526 {
1527 	return -ENOSYS;
1528 }
1529 
1530 int proc_dointvec(struct ctl_table *table, int write,
1531 		  void *buffer, size_t *lenp, loff_t *ppos)
1532 {
1533 	return -ENOSYS;
1534 }
1535 
1536 int proc_douintvec(struct ctl_table *table, int write,
1537 		  void *buffer, size_t *lenp, loff_t *ppos)
1538 {
1539 	return -ENOSYS;
1540 }
1541 
1542 int proc_dointvec_minmax(struct ctl_table *table, int write,
1543 		    void *buffer, size_t *lenp, loff_t *ppos)
1544 {
1545 	return -ENOSYS;
1546 }
1547 
1548 int proc_douintvec_minmax(struct ctl_table *table, int write,
1549 			  void *buffer, size_t *lenp, loff_t *ppos)
1550 {
1551 	return -ENOSYS;
1552 }
1553 
1554 int proc_dou8vec_minmax(struct ctl_table *table, int write,
1555 			void *buffer, size_t *lenp, loff_t *ppos)
1556 {
1557 	return -ENOSYS;
1558 }
1559 
1560 int proc_dointvec_jiffies(struct ctl_table *table, int write,
1561 		    void *buffer, size_t *lenp, loff_t *ppos)
1562 {
1563 	return -ENOSYS;
1564 }
1565 
1566 int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1567 				    void *buffer, size_t *lenp, loff_t *ppos)
1568 {
1569 	return -ENOSYS;
1570 }
1571 
1572 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
1573 		    void *buffer, size_t *lenp, loff_t *ppos)
1574 {
1575 	return -ENOSYS;
1576 }
1577 
1578 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
1579 			     void *buffer, size_t *lenp, loff_t *ppos)
1580 {
1581 	return -ENOSYS;
1582 }
1583 
1584 int proc_doulongvec_minmax(struct ctl_table *table, int write,
1585 		    void *buffer, size_t *lenp, loff_t *ppos)
1586 {
1587 	return -ENOSYS;
1588 }
1589 
1590 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1591 				      void *buffer, size_t *lenp, loff_t *ppos)
1592 {
1593 	return -ENOSYS;
1594 }
1595 
1596 int proc_do_large_bitmap(struct ctl_table *table, int write,
1597 			 void *buffer, size_t *lenp, loff_t *ppos)
1598 {
1599 	return -ENOSYS;
1600 }
1601 
1602 #endif /* CONFIG_PROC_SYSCTL */
1603 
1604 #if defined(CONFIG_SYSCTL)
1605 int proc_do_static_key(struct ctl_table *table, int write,
1606 		       void *buffer, size_t *lenp, loff_t *ppos)
1607 {
1608 	struct static_key *key = (struct static_key *)table->data;
1609 	static DEFINE_MUTEX(static_key_mutex);
1610 	int val, ret;
1611 	struct ctl_table tmp = {
1612 		.data   = &val,
1613 		.maxlen = sizeof(val),
1614 		.mode   = table->mode,
1615 		.extra1 = SYSCTL_ZERO,
1616 		.extra2 = SYSCTL_ONE,
1617 	};
1618 
1619 	if (write && !capable(CAP_SYS_ADMIN))
1620 		return -EPERM;
1621 
1622 	mutex_lock(&static_key_mutex);
1623 	val = static_key_enabled(key);
1624 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
1625 	if (write && !ret) {
1626 		if (val)
1627 			static_key_enable(key);
1628 		else
1629 			static_key_disable(key);
1630 	}
1631 	mutex_unlock(&static_key_mutex);
1632 	return ret;
1633 }
1634 
1635 static struct ctl_table kern_table[] = {
1636 #ifdef CONFIG_NUMA_BALANCING
1637 	{
1638 		.procname	= "numa_balancing",
1639 		.data		= NULL, /* filled in by handler */
1640 		.maxlen		= sizeof(unsigned int),
1641 		.mode		= 0644,
1642 		.proc_handler	= sysctl_numa_balancing,
1643 		.extra1		= SYSCTL_ZERO,
1644 		.extra2		= SYSCTL_FOUR,
1645 	},
1646 	{
1647 		.procname	= "numa_balancing_promote_rate_limit_MBps",
1648 		.data		= &sysctl_numa_balancing_promote_rate_limit,
1649 		.maxlen		= sizeof(unsigned int),
1650 		.mode		= 0644,
1651 		.proc_handler	= proc_dointvec_minmax,
1652 		.extra1		= SYSCTL_ZERO,
1653 	},
1654 #endif /* CONFIG_NUMA_BALANCING */
1655 	{
1656 		.procname	= "panic",
1657 		.data		= &panic_timeout,
1658 		.maxlen		= sizeof(int),
1659 		.mode		= 0644,
1660 		.proc_handler	= proc_dointvec,
1661 	},
1662 #ifdef CONFIG_PROC_SYSCTL
1663 	{
1664 		.procname	= "tainted",
1665 		.maxlen 	= sizeof(long),
1666 		.mode		= 0644,
1667 		.proc_handler	= proc_taint,
1668 	},
1669 	{
1670 		.procname	= "sysctl_writes_strict",
1671 		.data		= &sysctl_writes_strict,
1672 		.maxlen		= sizeof(int),
1673 		.mode		= 0644,
1674 		.proc_handler	= proc_dointvec_minmax,
1675 		.extra1		= SYSCTL_NEG_ONE,
1676 		.extra2		= SYSCTL_ONE,
1677 	},
1678 #endif
1679 	{
1680 		.procname	= "print-fatal-signals",
1681 		.data		= &print_fatal_signals,
1682 		.maxlen		= sizeof(int),
1683 		.mode		= 0644,
1684 		.proc_handler	= proc_dointvec,
1685 	},
1686 #ifdef CONFIG_SPARC
1687 	{
1688 		.procname	= "reboot-cmd",
1689 		.data		= reboot_command,
1690 		.maxlen		= 256,
1691 		.mode		= 0644,
1692 		.proc_handler	= proc_dostring,
1693 	},
1694 	{
1695 		.procname	= "stop-a",
1696 		.data		= &stop_a_enabled,
1697 		.maxlen		= sizeof (int),
1698 		.mode		= 0644,
1699 		.proc_handler	= proc_dointvec,
1700 	},
1701 	{
1702 		.procname	= "scons-poweroff",
1703 		.data		= &scons_pwroff,
1704 		.maxlen		= sizeof (int),
1705 		.mode		= 0644,
1706 		.proc_handler	= proc_dointvec,
1707 	},
1708 #endif
1709 #ifdef CONFIG_SPARC64
1710 	{
1711 		.procname	= "tsb-ratio",
1712 		.data		= &sysctl_tsb_ratio,
1713 		.maxlen		= sizeof (int),
1714 		.mode		= 0644,
1715 		.proc_handler	= proc_dointvec,
1716 	},
1717 #endif
1718 #ifdef CONFIG_PARISC
1719 	{
1720 		.procname	= "soft-power",
1721 		.data		= &pwrsw_enabled,
1722 		.maxlen		= sizeof (int),
1723 		.mode		= 0644,
1724 		.proc_handler	= proc_dointvec,
1725 	},
1726 #endif
1727 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
1728 	{
1729 		.procname	= "unaligned-trap",
1730 		.data		= &unaligned_enabled,
1731 		.maxlen		= sizeof (int),
1732 		.mode		= 0644,
1733 		.proc_handler	= proc_dointvec,
1734 	},
1735 #endif
1736 #ifdef CONFIG_STACK_TRACER
1737 	{
1738 		.procname	= "stack_tracer_enabled",
1739 		.data		= &stack_tracer_enabled,
1740 		.maxlen		= sizeof(int),
1741 		.mode		= 0644,
1742 		.proc_handler	= stack_trace_sysctl,
1743 	},
1744 #endif
1745 #ifdef CONFIG_TRACING
1746 	{
1747 		.procname	= "ftrace_dump_on_oops",
1748 		.data		= &ftrace_dump_on_oops,
1749 		.maxlen		= sizeof(int),
1750 		.mode		= 0644,
1751 		.proc_handler	= proc_dointvec,
1752 	},
1753 	{
1754 		.procname	= "traceoff_on_warning",
1755 		.data		= &__disable_trace_on_warning,
1756 		.maxlen		= sizeof(__disable_trace_on_warning),
1757 		.mode		= 0644,
1758 		.proc_handler	= proc_dointvec,
1759 	},
1760 	{
1761 		.procname	= "tracepoint_printk",
1762 		.data		= &tracepoint_printk,
1763 		.maxlen		= sizeof(tracepoint_printk),
1764 		.mode		= 0644,
1765 		.proc_handler	= tracepoint_printk_sysctl,
1766 	},
1767 #endif
1768 #ifdef CONFIG_MODULES
1769 	{
1770 		.procname	= "modprobe",
1771 		.data		= &modprobe_path,
1772 		.maxlen		= KMOD_PATH_LEN,
1773 		.mode		= 0644,
1774 		.proc_handler	= proc_dostring,
1775 	},
1776 	{
1777 		.procname	= "modules_disabled",
1778 		.data		= &modules_disabled,
1779 		.maxlen		= sizeof(int),
1780 		.mode		= 0644,
1781 		/* only handle a transition from default "0" to "1" */
1782 		.proc_handler	= proc_dointvec_minmax,
1783 		.extra1		= SYSCTL_ONE,
1784 		.extra2		= SYSCTL_ONE,
1785 	},
1786 #endif
1787 #ifdef CONFIG_UEVENT_HELPER
1788 	{
1789 		.procname	= "hotplug",
1790 		.data		= &uevent_helper,
1791 		.maxlen		= UEVENT_HELPER_PATH_LEN,
1792 		.mode		= 0644,
1793 		.proc_handler	= proc_dostring,
1794 	},
1795 #endif
1796 #ifdef CONFIG_MAGIC_SYSRQ
1797 	{
1798 		.procname	= "sysrq",
1799 		.data		= NULL,
1800 		.maxlen		= sizeof (int),
1801 		.mode		= 0644,
1802 		.proc_handler	= sysrq_sysctl_handler,
1803 	},
1804 #endif
1805 #ifdef CONFIG_PROC_SYSCTL
1806 	{
1807 		.procname	= "cad_pid",
1808 		.data		= NULL,
1809 		.maxlen		= sizeof (int),
1810 		.mode		= 0600,
1811 		.proc_handler	= proc_do_cad_pid,
1812 	},
1813 #endif
1814 	{
1815 		.procname	= "threads-max",
1816 		.data		= NULL,
1817 		.maxlen		= sizeof(int),
1818 		.mode		= 0644,
1819 		.proc_handler	= sysctl_max_threads,
1820 	},
1821 	{
1822 		.procname	= "usermodehelper",
1823 		.mode		= 0555,
1824 		.child		= usermodehelper_table,
1825 	},
1826 	{
1827 		.procname	= "overflowuid",
1828 		.data		= &overflowuid,
1829 		.maxlen		= sizeof(int),
1830 		.mode		= 0644,
1831 		.proc_handler	= proc_dointvec_minmax,
1832 		.extra1		= SYSCTL_ZERO,
1833 		.extra2		= SYSCTL_MAXOLDUID,
1834 	},
1835 	{
1836 		.procname	= "overflowgid",
1837 		.data		= &overflowgid,
1838 		.maxlen		= sizeof(int),
1839 		.mode		= 0644,
1840 		.proc_handler	= proc_dointvec_minmax,
1841 		.extra1		= SYSCTL_ZERO,
1842 		.extra2		= SYSCTL_MAXOLDUID,
1843 	},
1844 #ifdef CONFIG_S390
1845 	{
1846 		.procname	= "userprocess_debug",
1847 		.data		= &show_unhandled_signals,
1848 		.maxlen		= sizeof(int),
1849 		.mode		= 0644,
1850 		.proc_handler	= proc_dointvec,
1851 	},
1852 #endif
1853 	{
1854 		.procname	= "pid_max",
1855 		.data		= &pid_max,
1856 		.maxlen		= sizeof (int),
1857 		.mode		= 0644,
1858 		.proc_handler	= proc_dointvec_minmax,
1859 		.extra1		= &pid_max_min,
1860 		.extra2		= &pid_max_max,
1861 	},
1862 	{
1863 		.procname	= "panic_on_oops",
1864 		.data		= &panic_on_oops,
1865 		.maxlen		= sizeof(int),
1866 		.mode		= 0644,
1867 		.proc_handler	= proc_dointvec,
1868 	},
1869 	{
1870 		.procname	= "panic_print",
1871 		.data		= &panic_print,
1872 		.maxlen		= sizeof(unsigned long),
1873 		.mode		= 0644,
1874 		.proc_handler	= proc_doulongvec_minmax,
1875 	},
1876 	{
1877 		.procname	= "ngroups_max",
1878 		.data		= (void *)&ngroups_max,
1879 		.maxlen		= sizeof (int),
1880 		.mode		= 0444,
1881 		.proc_handler	= proc_dointvec,
1882 	},
1883 	{
1884 		.procname	= "cap_last_cap",
1885 		.data		= (void *)&cap_last_cap,
1886 		.maxlen		= sizeof(int),
1887 		.mode		= 0444,
1888 		.proc_handler	= proc_dointvec,
1889 	},
1890 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
1891 	{
1892 		.procname       = "unknown_nmi_panic",
1893 		.data           = &unknown_nmi_panic,
1894 		.maxlen         = sizeof (int),
1895 		.mode           = 0644,
1896 		.proc_handler   = proc_dointvec,
1897 	},
1898 #endif
1899 
1900 #if (defined(CONFIG_X86_32) || defined(CONFIG_PARISC)) && \
1901 	defined(CONFIG_DEBUG_STACKOVERFLOW)
1902 	{
1903 		.procname	= "panic_on_stackoverflow",
1904 		.data		= &sysctl_panic_on_stackoverflow,
1905 		.maxlen		= sizeof(int),
1906 		.mode		= 0644,
1907 		.proc_handler	= proc_dointvec,
1908 	},
1909 #endif
1910 #if defined(CONFIG_X86)
1911 	{
1912 		.procname	= "panic_on_unrecovered_nmi",
1913 		.data		= &panic_on_unrecovered_nmi,
1914 		.maxlen		= sizeof(int),
1915 		.mode		= 0644,
1916 		.proc_handler	= proc_dointvec,
1917 	},
1918 	{
1919 		.procname	= "panic_on_io_nmi",
1920 		.data		= &panic_on_io_nmi,
1921 		.maxlen		= sizeof(int),
1922 		.mode		= 0644,
1923 		.proc_handler	= proc_dointvec,
1924 	},
1925 	{
1926 		.procname	= "bootloader_type",
1927 		.data		= &bootloader_type,
1928 		.maxlen		= sizeof (int),
1929 		.mode		= 0444,
1930 		.proc_handler	= proc_dointvec,
1931 	},
1932 	{
1933 		.procname	= "bootloader_version",
1934 		.data		= &bootloader_version,
1935 		.maxlen		= sizeof (int),
1936 		.mode		= 0444,
1937 		.proc_handler	= proc_dointvec,
1938 	},
1939 	{
1940 		.procname	= "io_delay_type",
1941 		.data		= &io_delay_type,
1942 		.maxlen		= sizeof(int),
1943 		.mode		= 0644,
1944 		.proc_handler	= proc_dointvec,
1945 	},
1946 #endif
1947 #if defined(CONFIG_MMU)
1948 	{
1949 		.procname	= "randomize_va_space",
1950 		.data		= &randomize_va_space,
1951 		.maxlen		= sizeof(int),
1952 		.mode		= 0644,
1953 		.proc_handler	= proc_dointvec,
1954 	},
1955 #endif
1956 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
1957 	{
1958 		.procname	= "spin_retry",
1959 		.data		= &spin_retry,
1960 		.maxlen		= sizeof (int),
1961 		.mode		= 0644,
1962 		.proc_handler	= proc_dointvec,
1963 	},
1964 #endif
1965 #if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
1966 	{
1967 		.procname	= "acpi_video_flags",
1968 		.data		= &acpi_realmode_flags,
1969 		.maxlen		= sizeof (unsigned long),
1970 		.mode		= 0644,
1971 		.proc_handler	= proc_doulongvec_minmax,
1972 	},
1973 #endif
1974 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
1975 	{
1976 		.procname	= "ignore-unaligned-usertrap",
1977 		.data		= &no_unaligned_warning,
1978 		.maxlen		= sizeof (int),
1979 		.mode		= 0644,
1980 		.proc_handler	= proc_dointvec,
1981 	},
1982 #endif
1983 #ifdef CONFIG_IA64
1984 	{
1985 		.procname	= "unaligned-dump-stack",
1986 		.data		= &unaligned_dump_stack,
1987 		.maxlen		= sizeof (int),
1988 		.mode		= 0644,
1989 		.proc_handler	= proc_dointvec,
1990 	},
1991 #endif
1992 #ifdef CONFIG_RT_MUTEXES
1993 	{
1994 		.procname	= "max_lock_depth",
1995 		.data		= &max_lock_depth,
1996 		.maxlen		= sizeof(int),
1997 		.mode		= 0644,
1998 		.proc_handler	= proc_dointvec,
1999 	},
2000 #endif
2001 #ifdef CONFIG_KEYS
2002 	{
2003 		.procname	= "keys",
2004 		.mode		= 0555,
2005 		.child		= key_sysctls,
2006 	},
2007 #endif
2008 #ifdef CONFIG_PERF_EVENTS
2009 	/*
2010 	 * User-space scripts rely on the existence of this file
2011 	 * as a feature check for perf_events being enabled.
2012 	 *
2013 	 * So it's an ABI, do not remove!
2014 	 */
2015 	{
2016 		.procname	= "perf_event_paranoid",
2017 		.data		= &sysctl_perf_event_paranoid,
2018 		.maxlen		= sizeof(sysctl_perf_event_paranoid),
2019 		.mode		= 0644,
2020 		.proc_handler	= proc_dointvec,
2021 	},
2022 	{
2023 		.procname	= "perf_event_mlock_kb",
2024 		.data		= &sysctl_perf_event_mlock,
2025 		.maxlen		= sizeof(sysctl_perf_event_mlock),
2026 		.mode		= 0644,
2027 		.proc_handler	= proc_dointvec,
2028 	},
2029 	{
2030 		.procname	= "perf_event_max_sample_rate",
2031 		.data		= &sysctl_perf_event_sample_rate,
2032 		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
2033 		.mode		= 0644,
2034 		.proc_handler	= perf_proc_update_handler,
2035 		.extra1		= SYSCTL_ONE,
2036 	},
2037 	{
2038 		.procname	= "perf_cpu_time_max_percent",
2039 		.data		= &sysctl_perf_cpu_time_max_percent,
2040 		.maxlen		= sizeof(sysctl_perf_cpu_time_max_percent),
2041 		.mode		= 0644,
2042 		.proc_handler	= perf_cpu_time_max_percent_handler,
2043 		.extra1		= SYSCTL_ZERO,
2044 		.extra2		= SYSCTL_ONE_HUNDRED,
2045 	},
2046 	{
2047 		.procname	= "perf_event_max_stack",
2048 		.data		= &sysctl_perf_event_max_stack,
2049 		.maxlen		= sizeof(sysctl_perf_event_max_stack),
2050 		.mode		= 0644,
2051 		.proc_handler	= perf_event_max_stack_handler,
2052 		.extra1		= SYSCTL_ZERO,
2053 		.extra2		= (void *)&six_hundred_forty_kb,
2054 	},
2055 	{
2056 		.procname	= "perf_event_max_contexts_per_stack",
2057 		.data		= &sysctl_perf_event_max_contexts_per_stack,
2058 		.maxlen		= sizeof(sysctl_perf_event_max_contexts_per_stack),
2059 		.mode		= 0644,
2060 		.proc_handler	= perf_event_max_stack_handler,
2061 		.extra1		= SYSCTL_ZERO,
2062 		.extra2		= SYSCTL_ONE_THOUSAND,
2063 	},
2064 #endif
2065 	{
2066 		.procname	= "panic_on_warn",
2067 		.data		= &panic_on_warn,
2068 		.maxlen		= sizeof(int),
2069 		.mode		= 0644,
2070 		.proc_handler	= proc_dointvec_minmax,
2071 		.extra1		= SYSCTL_ZERO,
2072 		.extra2		= SYSCTL_ONE,
2073 	},
2074 #ifdef CONFIG_TREE_RCU
2075 	{
2076 		.procname	= "panic_on_rcu_stall",
2077 		.data		= &sysctl_panic_on_rcu_stall,
2078 		.maxlen		= sizeof(sysctl_panic_on_rcu_stall),
2079 		.mode		= 0644,
2080 		.proc_handler	= proc_dointvec_minmax,
2081 		.extra1		= SYSCTL_ZERO,
2082 		.extra2		= SYSCTL_ONE,
2083 	},
2084 	{
2085 		.procname	= "max_rcu_stall_to_panic",
2086 		.data		= &sysctl_max_rcu_stall_to_panic,
2087 		.maxlen		= sizeof(sysctl_max_rcu_stall_to_panic),
2088 		.mode		= 0644,
2089 		.proc_handler	= proc_dointvec_minmax,
2090 		.extra1		= SYSCTL_ONE,
2091 		.extra2		= SYSCTL_INT_MAX,
2092 	},
2093 #endif
2094 	{ }
2095 };
2096 
2097 static struct ctl_table vm_table[] = {
2098 	{
2099 		.procname	= "overcommit_memory",
2100 		.data		= &sysctl_overcommit_memory,
2101 		.maxlen		= sizeof(sysctl_overcommit_memory),
2102 		.mode		= 0644,
2103 		.proc_handler	= overcommit_policy_handler,
2104 		.extra1		= SYSCTL_ZERO,
2105 		.extra2		= SYSCTL_TWO,
2106 	},
2107 	{
2108 		.procname	= "overcommit_ratio",
2109 		.data		= &sysctl_overcommit_ratio,
2110 		.maxlen		= sizeof(sysctl_overcommit_ratio),
2111 		.mode		= 0644,
2112 		.proc_handler	= overcommit_ratio_handler,
2113 	},
2114 	{
2115 		.procname	= "overcommit_kbytes",
2116 		.data		= &sysctl_overcommit_kbytes,
2117 		.maxlen		= sizeof(sysctl_overcommit_kbytes),
2118 		.mode		= 0644,
2119 		.proc_handler	= overcommit_kbytes_handler,
2120 	},
2121 	{
2122 		.procname	= "page-cluster",
2123 		.data		= &page_cluster,
2124 		.maxlen		= sizeof(int),
2125 		.mode		= 0644,
2126 		.proc_handler	= proc_dointvec_minmax,
2127 		.extra1		= SYSCTL_ZERO,
2128 	},
2129 	{
2130 		.procname	= "dirtytime_expire_seconds",
2131 		.data		= &dirtytime_expire_interval,
2132 		.maxlen		= sizeof(dirtytime_expire_interval),
2133 		.mode		= 0644,
2134 		.proc_handler	= dirtytime_interval_handler,
2135 		.extra1		= SYSCTL_ZERO,
2136 	},
2137 	{
2138 		.procname	= "swappiness",
2139 		.data		= &vm_swappiness,
2140 		.maxlen		= sizeof(vm_swappiness),
2141 		.mode		= 0644,
2142 		.proc_handler	= proc_dointvec_minmax,
2143 		.extra1		= SYSCTL_ZERO,
2144 		.extra2		= SYSCTL_TWO_HUNDRED,
2145 	},
2146 #ifdef CONFIG_NUMA
2147 	{
2148 		.procname	= "numa_stat",
2149 		.data		= &sysctl_vm_numa_stat,
2150 		.maxlen		= sizeof(int),
2151 		.mode		= 0644,
2152 		.proc_handler	= sysctl_vm_numa_stat_handler,
2153 		.extra1		= SYSCTL_ZERO,
2154 		.extra2		= SYSCTL_ONE,
2155 	},
2156 #endif
2157 #ifdef CONFIG_HUGETLB_PAGE
2158 	{
2159 		.procname	= "nr_hugepages",
2160 		.data		= NULL,
2161 		.maxlen		= sizeof(unsigned long),
2162 		.mode		= 0644,
2163 		.proc_handler	= hugetlb_sysctl_handler,
2164 	},
2165 #ifdef CONFIG_NUMA
2166 	{
2167 		.procname       = "nr_hugepages_mempolicy",
2168 		.data           = NULL,
2169 		.maxlen         = sizeof(unsigned long),
2170 		.mode           = 0644,
2171 		.proc_handler   = &hugetlb_mempolicy_sysctl_handler,
2172 	},
2173 #endif
2174 	 {
2175 		.procname	= "hugetlb_shm_group",
2176 		.data		= &sysctl_hugetlb_shm_group,
2177 		.maxlen		= sizeof(gid_t),
2178 		.mode		= 0644,
2179 		.proc_handler	= proc_dointvec,
2180 	 },
2181 	{
2182 		.procname	= "nr_overcommit_hugepages",
2183 		.data		= NULL,
2184 		.maxlen		= sizeof(unsigned long),
2185 		.mode		= 0644,
2186 		.proc_handler	= hugetlb_overcommit_handler,
2187 	},
2188 #endif
2189 	{
2190 		.procname	= "lowmem_reserve_ratio",
2191 		.data		= &sysctl_lowmem_reserve_ratio,
2192 		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
2193 		.mode		= 0644,
2194 		.proc_handler	= lowmem_reserve_ratio_sysctl_handler,
2195 	},
2196 	{
2197 		.procname	= "drop_caches",
2198 		.data		= &sysctl_drop_caches,
2199 		.maxlen		= sizeof(int),
2200 		.mode		= 0200,
2201 		.proc_handler	= drop_caches_sysctl_handler,
2202 		.extra1		= SYSCTL_ONE,
2203 		.extra2		= SYSCTL_FOUR,
2204 	},
2205 #ifdef CONFIG_COMPACTION
2206 	{
2207 		.procname	= "compact_memory",
2208 		.data		= NULL,
2209 		.maxlen		= sizeof(int),
2210 		.mode		= 0200,
2211 		.proc_handler	= sysctl_compaction_handler,
2212 	},
2213 	{
2214 		.procname	= "compaction_proactiveness",
2215 		.data		= &sysctl_compaction_proactiveness,
2216 		.maxlen		= sizeof(sysctl_compaction_proactiveness),
2217 		.mode		= 0644,
2218 		.proc_handler	= compaction_proactiveness_sysctl_handler,
2219 		.extra1		= SYSCTL_ZERO,
2220 		.extra2		= SYSCTL_ONE_HUNDRED,
2221 	},
2222 	{
2223 		.procname	= "extfrag_threshold",
2224 		.data		= &sysctl_extfrag_threshold,
2225 		.maxlen		= sizeof(int),
2226 		.mode		= 0644,
2227 		.proc_handler	= proc_dointvec_minmax,
2228 		.extra1		= SYSCTL_ZERO,
2229 		.extra2		= SYSCTL_ONE_THOUSAND,
2230 	},
2231 	{
2232 		.procname	= "compact_unevictable_allowed",
2233 		.data		= &sysctl_compact_unevictable_allowed,
2234 		.maxlen		= sizeof(int),
2235 		.mode		= 0644,
2236 		.proc_handler	= proc_dointvec_minmax_warn_RT_change,
2237 		.extra1		= SYSCTL_ZERO,
2238 		.extra2		= SYSCTL_ONE,
2239 	},
2240 
2241 #endif /* CONFIG_COMPACTION */
2242 	{
2243 		.procname	= "min_free_kbytes",
2244 		.data		= &min_free_kbytes,
2245 		.maxlen		= sizeof(min_free_kbytes),
2246 		.mode		= 0644,
2247 		.proc_handler	= min_free_kbytes_sysctl_handler,
2248 		.extra1		= SYSCTL_ZERO,
2249 	},
2250 	{
2251 		.procname	= "watermark_boost_factor",
2252 		.data		= &watermark_boost_factor,
2253 		.maxlen		= sizeof(watermark_boost_factor),
2254 		.mode		= 0644,
2255 		.proc_handler	= proc_dointvec_minmax,
2256 		.extra1		= SYSCTL_ZERO,
2257 	},
2258 	{
2259 		.procname	= "watermark_scale_factor",
2260 		.data		= &watermark_scale_factor,
2261 		.maxlen		= sizeof(watermark_scale_factor),
2262 		.mode		= 0644,
2263 		.proc_handler	= watermark_scale_factor_sysctl_handler,
2264 		.extra1		= SYSCTL_ONE,
2265 		.extra2		= SYSCTL_THREE_THOUSAND,
2266 	},
2267 	{
2268 		.procname	= "percpu_pagelist_high_fraction",
2269 		.data		= &percpu_pagelist_high_fraction,
2270 		.maxlen		= sizeof(percpu_pagelist_high_fraction),
2271 		.mode		= 0644,
2272 		.proc_handler	= percpu_pagelist_high_fraction_sysctl_handler,
2273 		.extra1		= SYSCTL_ZERO,
2274 	},
2275 	{
2276 		.procname	= "page_lock_unfairness",
2277 		.data		= &sysctl_page_lock_unfairness,
2278 		.maxlen		= sizeof(sysctl_page_lock_unfairness),
2279 		.mode		= 0644,
2280 		.proc_handler	= proc_dointvec_minmax,
2281 		.extra1		= SYSCTL_ZERO,
2282 	},
2283 #ifdef CONFIG_MMU
2284 	{
2285 		.procname	= "max_map_count",
2286 		.data		= &sysctl_max_map_count,
2287 		.maxlen		= sizeof(sysctl_max_map_count),
2288 		.mode		= 0644,
2289 		.proc_handler	= proc_dointvec_minmax,
2290 		.extra1		= SYSCTL_ZERO,
2291 	},
2292 #else
2293 	{
2294 		.procname	= "nr_trim_pages",
2295 		.data		= &sysctl_nr_trim_pages,
2296 		.maxlen		= sizeof(sysctl_nr_trim_pages),
2297 		.mode		= 0644,
2298 		.proc_handler	= proc_dointvec_minmax,
2299 		.extra1		= SYSCTL_ZERO,
2300 	},
2301 #endif
2302 	{
2303 		.procname	= "vfs_cache_pressure",
2304 		.data		= &sysctl_vfs_cache_pressure,
2305 		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
2306 		.mode		= 0644,
2307 		.proc_handler	= proc_dointvec_minmax,
2308 		.extra1		= SYSCTL_ZERO,
2309 	},
2310 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
2311     defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
2312 	{
2313 		.procname	= "legacy_va_layout",
2314 		.data		= &sysctl_legacy_va_layout,
2315 		.maxlen		= sizeof(sysctl_legacy_va_layout),
2316 		.mode		= 0644,
2317 		.proc_handler	= proc_dointvec_minmax,
2318 		.extra1		= SYSCTL_ZERO,
2319 	},
2320 #endif
2321 #ifdef CONFIG_NUMA
2322 	{
2323 		.procname	= "zone_reclaim_mode",
2324 		.data		= &node_reclaim_mode,
2325 		.maxlen		= sizeof(node_reclaim_mode),
2326 		.mode		= 0644,
2327 		.proc_handler	= proc_dointvec_minmax,
2328 		.extra1		= SYSCTL_ZERO,
2329 	},
2330 	{
2331 		.procname	= "min_unmapped_ratio",
2332 		.data		= &sysctl_min_unmapped_ratio,
2333 		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
2334 		.mode		= 0644,
2335 		.proc_handler	= sysctl_min_unmapped_ratio_sysctl_handler,
2336 		.extra1		= SYSCTL_ZERO,
2337 		.extra2		= SYSCTL_ONE_HUNDRED,
2338 	},
2339 	{
2340 		.procname	= "min_slab_ratio",
2341 		.data		= &sysctl_min_slab_ratio,
2342 		.maxlen		= sizeof(sysctl_min_slab_ratio),
2343 		.mode		= 0644,
2344 		.proc_handler	= sysctl_min_slab_ratio_sysctl_handler,
2345 		.extra1		= SYSCTL_ZERO,
2346 		.extra2		= SYSCTL_ONE_HUNDRED,
2347 	},
2348 #endif
2349 #ifdef CONFIG_SMP
2350 	{
2351 		.procname	= "stat_interval",
2352 		.data		= &sysctl_stat_interval,
2353 		.maxlen		= sizeof(sysctl_stat_interval),
2354 		.mode		= 0644,
2355 		.proc_handler	= proc_dointvec_jiffies,
2356 	},
2357 	{
2358 		.procname	= "stat_refresh",
2359 		.data		= NULL,
2360 		.maxlen		= 0,
2361 		.mode		= 0600,
2362 		.proc_handler	= vmstat_refresh,
2363 	},
2364 #endif
2365 #ifdef CONFIG_MMU
2366 	{
2367 		.procname	= "mmap_min_addr",
2368 		.data		= &dac_mmap_min_addr,
2369 		.maxlen		= sizeof(unsigned long),
2370 		.mode		= 0644,
2371 		.proc_handler	= mmap_min_addr_handler,
2372 	},
2373 #endif
2374 #ifdef CONFIG_NUMA
2375 	{
2376 		.procname	= "numa_zonelist_order",
2377 		.data		= &numa_zonelist_order,
2378 		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
2379 		.mode		= 0644,
2380 		.proc_handler	= numa_zonelist_order_handler,
2381 	},
2382 #endif
2383 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
2384    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
2385 	{
2386 		.procname	= "vdso_enabled",
2387 #ifdef CONFIG_X86_32
2388 		.data		= &vdso32_enabled,
2389 		.maxlen		= sizeof(vdso32_enabled),
2390 #else
2391 		.data		= &vdso_enabled,
2392 		.maxlen		= sizeof(vdso_enabled),
2393 #endif
2394 		.mode		= 0644,
2395 		.proc_handler	= proc_dointvec,
2396 		.extra1		= SYSCTL_ZERO,
2397 	},
2398 #endif
2399 #ifdef CONFIG_MEMORY_FAILURE
2400 	{
2401 		.procname	= "memory_failure_early_kill",
2402 		.data		= &sysctl_memory_failure_early_kill,
2403 		.maxlen		= sizeof(sysctl_memory_failure_early_kill),
2404 		.mode		= 0644,
2405 		.proc_handler	= proc_dointvec_minmax,
2406 		.extra1		= SYSCTL_ZERO,
2407 		.extra2		= SYSCTL_ONE,
2408 	},
2409 	{
2410 		.procname	= "memory_failure_recovery",
2411 		.data		= &sysctl_memory_failure_recovery,
2412 		.maxlen		= sizeof(sysctl_memory_failure_recovery),
2413 		.mode		= 0644,
2414 		.proc_handler	= proc_dointvec_minmax,
2415 		.extra1		= SYSCTL_ZERO,
2416 		.extra2		= SYSCTL_ONE,
2417 	},
2418 #endif
2419 	{
2420 		.procname	= "user_reserve_kbytes",
2421 		.data		= &sysctl_user_reserve_kbytes,
2422 		.maxlen		= sizeof(sysctl_user_reserve_kbytes),
2423 		.mode		= 0644,
2424 		.proc_handler	= proc_doulongvec_minmax,
2425 	},
2426 	{
2427 		.procname	= "admin_reserve_kbytes",
2428 		.data		= &sysctl_admin_reserve_kbytes,
2429 		.maxlen		= sizeof(sysctl_admin_reserve_kbytes),
2430 		.mode		= 0644,
2431 		.proc_handler	= proc_doulongvec_minmax,
2432 	},
2433 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
2434 	{
2435 		.procname	= "mmap_rnd_bits",
2436 		.data		= &mmap_rnd_bits,
2437 		.maxlen		= sizeof(mmap_rnd_bits),
2438 		.mode		= 0600,
2439 		.proc_handler	= proc_dointvec_minmax,
2440 		.extra1		= (void *)&mmap_rnd_bits_min,
2441 		.extra2		= (void *)&mmap_rnd_bits_max,
2442 	},
2443 #endif
2444 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
2445 	{
2446 		.procname	= "mmap_rnd_compat_bits",
2447 		.data		= &mmap_rnd_compat_bits,
2448 		.maxlen		= sizeof(mmap_rnd_compat_bits),
2449 		.mode		= 0600,
2450 		.proc_handler	= proc_dointvec_minmax,
2451 		.extra1		= (void *)&mmap_rnd_compat_bits_min,
2452 		.extra2		= (void *)&mmap_rnd_compat_bits_max,
2453 	},
2454 #endif
2455 #ifdef CONFIG_USERFAULTFD
2456 	{
2457 		.procname	= "unprivileged_userfaultfd",
2458 		.data		= &sysctl_unprivileged_userfaultfd,
2459 		.maxlen		= sizeof(sysctl_unprivileged_userfaultfd),
2460 		.mode		= 0644,
2461 		.proc_handler	= proc_dointvec_minmax,
2462 		.extra1		= SYSCTL_ZERO,
2463 		.extra2		= SYSCTL_ONE,
2464 	},
2465 #endif
2466 	{ }
2467 };
2468 
2469 static struct ctl_table debug_table[] = {
2470 #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
2471 	{
2472 		.procname	= "exception-trace",
2473 		.data		= &show_unhandled_signals,
2474 		.maxlen		= sizeof(int),
2475 		.mode		= 0644,
2476 		.proc_handler	= proc_dointvec
2477 	},
2478 #endif
2479 	{ }
2480 };
2481 
2482 static struct ctl_table dev_table[] = {
2483 	{ }
2484 };
2485 
2486 DECLARE_SYSCTL_BASE(kernel, kern_table);
2487 DECLARE_SYSCTL_BASE(vm, vm_table);
2488 DECLARE_SYSCTL_BASE(debug, debug_table);
2489 DECLARE_SYSCTL_BASE(dev, dev_table);
2490 
2491 int __init sysctl_init_bases(void)
2492 {
2493 	register_sysctl_base(kernel);
2494 	register_sysctl_base(vm);
2495 	register_sysctl_base(debug);
2496 	register_sysctl_base(dev);
2497 
2498 	return 0;
2499 }
2500 #endif /* CONFIG_SYSCTL */
2501 /*
2502  * No sense putting this after each symbol definition, twice,
2503  * exception granted :-)
2504  */
2505 EXPORT_SYMBOL(proc_dobool);
2506 EXPORT_SYMBOL(proc_dointvec);
2507 EXPORT_SYMBOL(proc_douintvec);
2508 EXPORT_SYMBOL(proc_dointvec_jiffies);
2509 EXPORT_SYMBOL(proc_dointvec_minmax);
2510 EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
2511 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2512 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2513 EXPORT_SYMBOL(proc_dostring);
2514 EXPORT_SYMBOL(proc_doulongvec_minmax);
2515 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2516 EXPORT_SYMBOL(proc_do_large_bitmap);
2517