xref: /openbmc/linux/kernel/sysctl.c (revision 1ac731c529cd4d6adbce134754b51ff7d822b145)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * sysctl.c: General linux system control interface
4   *
5   * Begun 24 March 1995, Stephen Tweedie
6   * Added /proc support, Dec 1995
7   * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
8   * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
9   * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
10   * Dynamic registration fixes, Stephen Tweedie.
11   * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
12   * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
13   *  Horn.
14   * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
15   * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
16   * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
17   *  Wendling.
18   * The list_for_each() macro wasn't appropriate for the sysctl loop.
19   *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
20   */
21  
22  #include <linux/module.h>
23  #include <linux/mm.h>
24  #include <linux/swap.h>
25  #include <linux/slab.h>
26  #include <linux/sysctl.h>
27  #include <linux/bitmap.h>
28  #include <linux/signal.h>
29  #include <linux/panic.h>
30  #include <linux/printk.h>
31  #include <linux/proc_fs.h>
32  #include <linux/security.h>
33  #include <linux/ctype.h>
34  #include <linux/kmemleak.h>
35  #include <linux/filter.h>
36  #include <linux/fs.h>
37  #include <linux/init.h>
38  #include <linux/kernel.h>
39  #include <linux/kobject.h>
40  #include <linux/net.h>
41  #include <linux/sysrq.h>
42  #include <linux/highuid.h>
43  #include <linux/writeback.h>
44  #include <linux/ratelimit.h>
45  #include <linux/hugetlb.h>
46  #include <linux/initrd.h>
47  #include <linux/key.h>
48  #include <linux/times.h>
49  #include <linux/limits.h>
50  #include <linux/dcache.h>
51  #include <linux/syscalls.h>
52  #include <linux/vmstat.h>
53  #include <linux/nfs_fs.h>
54  #include <linux/acpi.h>
55  #include <linux/reboot.h>
56  #include <linux/ftrace.h>
57  #include <linux/perf_event.h>
58  #include <linux/oom.h>
59  #include <linux/kmod.h>
60  #include <linux/capability.h>
61  #include <linux/binfmts.h>
62  #include <linux/sched/sysctl.h>
63  #include <linux/mount.h>
64  #include <linux/userfaultfd_k.h>
65  #include <linux/pid.h>
66  
67  #include "../lib/kstrtox.h"
68  
69  #include <linux/uaccess.h>
70  #include <asm/processor.h>
71  
72  #ifdef CONFIG_X86
73  #include <asm/nmi.h>
74  #include <asm/stacktrace.h>
75  #include <asm/io.h>
76  #endif
77  #ifdef CONFIG_SPARC
78  #include <asm/setup.h>
79  #endif
80  #ifdef CONFIG_RT_MUTEXES
81  #include <linux/rtmutex.h>
82  #endif
83  
84  /* shared constants to be used in various sysctls */
85  const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 };
86  EXPORT_SYMBOL(sysctl_vals);
87  
88  const unsigned long sysctl_long_vals[] = { 0, 1, LONG_MAX };
89  EXPORT_SYMBOL_GPL(sysctl_long_vals);
90  
91  #if defined(CONFIG_SYSCTL)
92  
93  /* Constants used for minimum and maximum */
94  
95  #ifdef CONFIG_PERF_EVENTS
96  static const int six_hundred_forty_kb = 640 * 1024;
97  #endif
98  
99  
100  static const int ngroups_max = NGROUPS_MAX;
101  static const int cap_last_cap = CAP_LAST_CAP;
102  
103  #ifdef CONFIG_PROC_SYSCTL
104  
105  /**
106   * enum sysctl_writes_mode - supported sysctl write modes
107   *
108   * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
109   *	to be written, and multiple writes on the same sysctl file descriptor
110   *	will rewrite the sysctl value, regardless of file position. No warning
111   *	is issued when the initial position is not 0.
112   * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
113   *	not 0.
114   * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
115   *	file position 0 and the value must be fully contained in the buffer
116   *	sent to the write syscall. If dealing with strings respect the file
117   *	position, but restrict this to the max length of the buffer, anything
118   *	passed the max length will be ignored. Multiple writes will append
119   *	to the buffer.
120   *
121   * These write modes control how current file position affects the behavior of
122   * updating sysctl values through the proc interface on each write.
123   */
124  enum sysctl_writes_mode {
125  	SYSCTL_WRITES_LEGACY		= -1,
126  	SYSCTL_WRITES_WARN		= 0,
127  	SYSCTL_WRITES_STRICT		= 1,
128  };
129  
130  static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
131  #endif /* CONFIG_PROC_SYSCTL */
132  
133  #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
134      defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
135  int sysctl_legacy_va_layout;
136  #endif
137  
138  #endif /* CONFIG_SYSCTL */
139  
140  /*
141   * /proc/sys support
142   */
143  
144  #ifdef CONFIG_PROC_SYSCTL
145  
_proc_do_string(char * data,int maxlen,int write,char * buffer,size_t * lenp,loff_t * ppos)146  static int _proc_do_string(char *data, int maxlen, int write,
147  		char *buffer, size_t *lenp, loff_t *ppos)
148  {
149  	size_t len;
150  	char c, *p;
151  
152  	if (!data || !maxlen || !*lenp) {
153  		*lenp = 0;
154  		return 0;
155  	}
156  
157  	if (write) {
158  		if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
159  			/* Only continue writes not past the end of buffer. */
160  			len = strlen(data);
161  			if (len > maxlen - 1)
162  				len = maxlen - 1;
163  
164  			if (*ppos > len)
165  				return 0;
166  			len = *ppos;
167  		} else {
168  			/* Start writing from beginning of buffer. */
169  			len = 0;
170  		}
171  
172  		*ppos += *lenp;
173  		p = buffer;
174  		while ((p - buffer) < *lenp && len < maxlen - 1) {
175  			c = *(p++);
176  			if (c == 0 || c == '\n')
177  				break;
178  			data[len++] = c;
179  		}
180  		data[len] = 0;
181  	} else {
182  		len = strlen(data);
183  		if (len > maxlen)
184  			len = maxlen;
185  
186  		if (*ppos > len) {
187  			*lenp = 0;
188  			return 0;
189  		}
190  
191  		data += *ppos;
192  		len  -= *ppos;
193  
194  		if (len > *lenp)
195  			len = *lenp;
196  		if (len)
197  			memcpy(buffer, data, len);
198  		if (len < *lenp) {
199  			buffer[len] = '\n';
200  			len++;
201  		}
202  		*lenp = len;
203  		*ppos += len;
204  	}
205  	return 0;
206  }
207  
warn_sysctl_write(struct ctl_table * table)208  static void warn_sysctl_write(struct ctl_table *table)
209  {
210  	pr_warn_once("%s wrote to %s when file position was not 0!\n"
211  		"This will not be supported in the future. To silence this\n"
212  		"warning, set kernel.sysctl_writes_strict = -1\n",
213  		current->comm, table->procname);
214  }
215  
216  /**
217   * proc_first_pos_non_zero_ignore - check if first position is allowed
218   * @ppos: file position
219   * @table: the sysctl table
220   *
221   * Returns true if the first position is non-zero and the sysctl_writes_strict
222   * mode indicates this is not allowed for numeric input types. String proc
223   * handlers can ignore the return value.
224   */
proc_first_pos_non_zero_ignore(loff_t * ppos,struct ctl_table * table)225  static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
226  					   struct ctl_table *table)
227  {
228  	if (!*ppos)
229  		return false;
230  
231  	switch (sysctl_writes_strict) {
232  	case SYSCTL_WRITES_STRICT:
233  		return true;
234  	case SYSCTL_WRITES_WARN:
235  		warn_sysctl_write(table);
236  		return false;
237  	default:
238  		return false;
239  	}
240  }
241  
242  /**
243   * proc_dostring - read a string sysctl
244   * @table: the sysctl table
245   * @write: %TRUE if this is a write to the sysctl file
246   * @buffer: the user buffer
247   * @lenp: the size of the user buffer
248   * @ppos: file position
249   *
250   * Reads/writes a string from/to the user buffer. If the kernel
251   * buffer provided is not large enough to hold the string, the
252   * string is truncated. The copied string is %NULL-terminated.
253   * If the string is being read by the user process, it is copied
254   * and a newline '\n' is added. It is truncated if the buffer is
255   * not large enough.
256   *
257   * Returns 0 on success.
258   */
proc_dostring(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)259  int proc_dostring(struct ctl_table *table, int write,
260  		  void *buffer, size_t *lenp, loff_t *ppos)
261  {
262  	if (write)
263  		proc_first_pos_non_zero_ignore(ppos, table);
264  
265  	return _proc_do_string(table->data, table->maxlen, write, buffer, lenp,
266  			ppos);
267  }
268  
proc_skip_spaces(char ** buf,size_t * size)269  static void proc_skip_spaces(char **buf, size_t *size)
270  {
271  	while (*size) {
272  		if (!isspace(**buf))
273  			break;
274  		(*size)--;
275  		(*buf)++;
276  	}
277  }
278  
proc_skip_char(char ** buf,size_t * size,const char v)279  static void proc_skip_char(char **buf, size_t *size, const char v)
280  {
281  	while (*size) {
282  		if (**buf != v)
283  			break;
284  		(*size)--;
285  		(*buf)++;
286  	}
287  }
288  
289  /**
290   * strtoul_lenient - parse an ASCII formatted integer from a buffer and only
291   *                   fail on overflow
292   *
293   * @cp: kernel buffer containing the string to parse
294   * @endp: pointer to store the trailing characters
295   * @base: the base to use
296   * @res: where the parsed integer will be stored
297   *
298   * In case of success 0 is returned and @res will contain the parsed integer,
299   * @endp will hold any trailing characters.
300   * This function will fail the parse on overflow. If there wasn't an overflow
301   * the function will defer the decision what characters count as invalid to the
302   * caller.
303   */
strtoul_lenient(const char * cp,char ** endp,unsigned int base,unsigned long * res)304  static int strtoul_lenient(const char *cp, char **endp, unsigned int base,
305  			   unsigned long *res)
306  {
307  	unsigned long long result;
308  	unsigned int rv;
309  
310  	cp = _parse_integer_fixup_radix(cp, &base);
311  	rv = _parse_integer(cp, base, &result);
312  	if ((rv & KSTRTOX_OVERFLOW) || (result != (unsigned long)result))
313  		return -ERANGE;
314  
315  	cp += rv;
316  
317  	if (endp)
318  		*endp = (char *)cp;
319  
320  	*res = (unsigned long)result;
321  	return 0;
322  }
323  
324  #define TMPBUFLEN 22
325  /**
326   * proc_get_long - reads an ASCII formatted integer from a user buffer
327   *
328   * @buf: a kernel buffer
329   * @size: size of the kernel buffer
330   * @val: this is where the number will be stored
331   * @neg: set to %TRUE if number is negative
332   * @perm_tr: a vector which contains the allowed trailers
333   * @perm_tr_len: size of the perm_tr vector
334   * @tr: pointer to store the trailer character
335   *
336   * In case of success %0 is returned and @buf and @size are updated with
337   * the amount of bytes read. If @tr is non-NULL and a trailing
338   * character exists (size is non-zero after returning from this
339   * function), @tr is updated with the trailing character.
340   */
proc_get_long(char ** buf,size_t * size,unsigned long * val,bool * neg,const char * perm_tr,unsigned perm_tr_len,char * tr)341  static int proc_get_long(char **buf, size_t *size,
342  			  unsigned long *val, bool *neg,
343  			  const char *perm_tr, unsigned perm_tr_len, char *tr)
344  {
345  	char *p, tmp[TMPBUFLEN];
346  	ssize_t len = *size;
347  
348  	if (len <= 0)
349  		return -EINVAL;
350  
351  	if (len > TMPBUFLEN - 1)
352  		len = TMPBUFLEN - 1;
353  
354  	memcpy(tmp, *buf, len);
355  
356  	tmp[len] = 0;
357  	p = tmp;
358  	if (*p == '-' && *size > 1) {
359  		*neg = true;
360  		p++;
361  	} else
362  		*neg = false;
363  	if (!isdigit(*p))
364  		return -EINVAL;
365  
366  	if (strtoul_lenient(p, &p, 0, val))
367  		return -EINVAL;
368  
369  	len = p - tmp;
370  
371  	/* We don't know if the next char is whitespace thus we may accept
372  	 * invalid integers (e.g. 1234...a) or two integers instead of one
373  	 * (e.g. 123...1). So lets not allow such large numbers. */
374  	if (len == TMPBUFLEN - 1)
375  		return -EINVAL;
376  
377  	if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
378  		return -EINVAL;
379  
380  	if (tr && (len < *size))
381  		*tr = *p;
382  
383  	*buf += len;
384  	*size -= len;
385  
386  	return 0;
387  }
388  
389  /**
390   * proc_put_long - converts an integer to a decimal ASCII formatted string
391   *
392   * @buf: the user buffer
393   * @size: the size of the user buffer
394   * @val: the integer to be converted
395   * @neg: sign of the number, %TRUE for negative
396   *
397   * In case of success @buf and @size are updated with the amount of bytes
398   * written.
399   */
proc_put_long(void ** buf,size_t * size,unsigned long val,bool neg)400  static void proc_put_long(void **buf, size_t *size, unsigned long val, bool neg)
401  {
402  	int len;
403  	char tmp[TMPBUFLEN], *p = tmp;
404  
405  	sprintf(p, "%s%lu", neg ? "-" : "", val);
406  	len = strlen(tmp);
407  	if (len > *size)
408  		len = *size;
409  	memcpy(*buf, tmp, len);
410  	*size -= len;
411  	*buf += len;
412  }
413  #undef TMPBUFLEN
414  
proc_put_char(void ** buf,size_t * size,char c)415  static void proc_put_char(void **buf, size_t *size, char c)
416  {
417  	if (*size) {
418  		char **buffer = (char **)buf;
419  		**buffer = c;
420  
421  		(*size)--;
422  		(*buffer)++;
423  		*buf = *buffer;
424  	}
425  }
426  
do_proc_dointvec_conv(bool * negp,unsigned long * lvalp,int * valp,int write,void * data)427  static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
428  				 int *valp,
429  				 int write, void *data)
430  {
431  	if (write) {
432  		if (*negp) {
433  			if (*lvalp > (unsigned long) INT_MAX + 1)
434  				return -EINVAL;
435  			WRITE_ONCE(*valp, -*lvalp);
436  		} else {
437  			if (*lvalp > (unsigned long) INT_MAX)
438  				return -EINVAL;
439  			WRITE_ONCE(*valp, *lvalp);
440  		}
441  	} else {
442  		int val = READ_ONCE(*valp);
443  		if (val < 0) {
444  			*negp = true;
445  			*lvalp = -(unsigned long)val;
446  		} else {
447  			*negp = false;
448  			*lvalp = (unsigned long)val;
449  		}
450  	}
451  	return 0;
452  }
453  
do_proc_douintvec_conv(unsigned long * lvalp,unsigned int * valp,int write,void * data)454  static int do_proc_douintvec_conv(unsigned long *lvalp,
455  				  unsigned int *valp,
456  				  int write, void *data)
457  {
458  	if (write) {
459  		if (*lvalp > UINT_MAX)
460  			return -EINVAL;
461  		WRITE_ONCE(*valp, *lvalp);
462  	} else {
463  		unsigned int val = READ_ONCE(*valp);
464  		*lvalp = (unsigned long)val;
465  	}
466  	return 0;
467  }
468  
469  static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
470  
__do_proc_dointvec(void * tbl_data,struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos,int (* conv)(bool * negp,unsigned long * lvalp,int * valp,int write,void * data),void * data)471  static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
472  		  int write, void *buffer,
473  		  size_t *lenp, loff_t *ppos,
474  		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
475  			      int write, void *data),
476  		  void *data)
477  {
478  	int *i, vleft, first = 1, err = 0;
479  	size_t left;
480  	char *p;
481  
482  	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
483  		*lenp = 0;
484  		return 0;
485  	}
486  
487  	i = (int *) tbl_data;
488  	vleft = table->maxlen / sizeof(*i);
489  	left = *lenp;
490  
491  	if (!conv)
492  		conv = do_proc_dointvec_conv;
493  
494  	if (write) {
495  		if (proc_first_pos_non_zero_ignore(ppos, table))
496  			goto out;
497  
498  		if (left > PAGE_SIZE - 1)
499  			left = PAGE_SIZE - 1;
500  		p = buffer;
501  	}
502  
503  	for (; left && vleft--; i++, first=0) {
504  		unsigned long lval;
505  		bool neg;
506  
507  		if (write) {
508  			proc_skip_spaces(&p, &left);
509  
510  			if (!left)
511  				break;
512  			err = proc_get_long(&p, &left, &lval, &neg,
513  					     proc_wspace_sep,
514  					     sizeof(proc_wspace_sep), NULL);
515  			if (err)
516  				break;
517  			if (conv(&neg, &lval, i, 1, data)) {
518  				err = -EINVAL;
519  				break;
520  			}
521  		} else {
522  			if (conv(&neg, &lval, i, 0, data)) {
523  				err = -EINVAL;
524  				break;
525  			}
526  			if (!first)
527  				proc_put_char(&buffer, &left, '\t');
528  			proc_put_long(&buffer, &left, lval, neg);
529  		}
530  	}
531  
532  	if (!write && !first && left && !err)
533  		proc_put_char(&buffer, &left, '\n');
534  	if (write && !err && left)
535  		proc_skip_spaces(&p, &left);
536  	if (write && first)
537  		return err ? : -EINVAL;
538  	*lenp -= left;
539  out:
540  	*ppos += *lenp;
541  	return err;
542  }
543  
do_proc_dointvec(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos,int (* conv)(bool * negp,unsigned long * lvalp,int * valp,int write,void * data),void * data)544  static int do_proc_dointvec(struct ctl_table *table, int write,
545  		  void *buffer, size_t *lenp, loff_t *ppos,
546  		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
547  			      int write, void *data),
548  		  void *data)
549  {
550  	return __do_proc_dointvec(table->data, table, write,
551  			buffer, lenp, ppos, conv, data);
552  }
553  
do_proc_douintvec_w(unsigned int * tbl_data,struct ctl_table * table,void * buffer,size_t * lenp,loff_t * ppos,int (* conv)(unsigned long * lvalp,unsigned int * valp,int write,void * data),void * data)554  static int do_proc_douintvec_w(unsigned int *tbl_data,
555  			       struct ctl_table *table,
556  			       void *buffer,
557  			       size_t *lenp, loff_t *ppos,
558  			       int (*conv)(unsigned long *lvalp,
559  					   unsigned int *valp,
560  					   int write, void *data),
561  			       void *data)
562  {
563  	unsigned long lval;
564  	int err = 0;
565  	size_t left;
566  	bool neg;
567  	char *p = buffer;
568  
569  	left = *lenp;
570  
571  	if (proc_first_pos_non_zero_ignore(ppos, table))
572  		goto bail_early;
573  
574  	if (left > PAGE_SIZE - 1)
575  		left = PAGE_SIZE - 1;
576  
577  	proc_skip_spaces(&p, &left);
578  	if (!left) {
579  		err = -EINVAL;
580  		goto out_free;
581  	}
582  
583  	err = proc_get_long(&p, &left, &lval, &neg,
584  			     proc_wspace_sep,
585  			     sizeof(proc_wspace_sep), NULL);
586  	if (err || neg) {
587  		err = -EINVAL;
588  		goto out_free;
589  	}
590  
591  	if (conv(&lval, tbl_data, 1, data)) {
592  		err = -EINVAL;
593  		goto out_free;
594  	}
595  
596  	if (!err && left)
597  		proc_skip_spaces(&p, &left);
598  
599  out_free:
600  	if (err)
601  		return -EINVAL;
602  
603  	return 0;
604  
605  	/* This is in keeping with old __do_proc_dointvec() */
606  bail_early:
607  	*ppos += *lenp;
608  	return err;
609  }
610  
do_proc_douintvec_r(unsigned int * tbl_data,void * buffer,size_t * lenp,loff_t * ppos,int (* conv)(unsigned long * lvalp,unsigned int * valp,int write,void * data),void * data)611  static int do_proc_douintvec_r(unsigned int *tbl_data, void *buffer,
612  			       size_t *lenp, loff_t *ppos,
613  			       int (*conv)(unsigned long *lvalp,
614  					   unsigned int *valp,
615  					   int write, void *data),
616  			       void *data)
617  {
618  	unsigned long lval;
619  	int err = 0;
620  	size_t left;
621  
622  	left = *lenp;
623  
624  	if (conv(&lval, tbl_data, 0, data)) {
625  		err = -EINVAL;
626  		goto out;
627  	}
628  
629  	proc_put_long(&buffer, &left, lval, false);
630  	if (!left)
631  		goto out;
632  
633  	proc_put_char(&buffer, &left, '\n');
634  
635  out:
636  	*lenp -= left;
637  	*ppos += *lenp;
638  
639  	return err;
640  }
641  
__do_proc_douintvec(void * tbl_data,struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos,int (* conv)(unsigned long * lvalp,unsigned int * valp,int write,void * data),void * data)642  static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
643  			       int write, void *buffer,
644  			       size_t *lenp, loff_t *ppos,
645  			       int (*conv)(unsigned long *lvalp,
646  					   unsigned int *valp,
647  					   int write, void *data),
648  			       void *data)
649  {
650  	unsigned int *i, vleft;
651  
652  	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
653  		*lenp = 0;
654  		return 0;
655  	}
656  
657  	i = (unsigned int *) tbl_data;
658  	vleft = table->maxlen / sizeof(*i);
659  
660  	/*
661  	 * Arrays are not supported, keep this simple. *Do not* add
662  	 * support for them.
663  	 */
664  	if (vleft != 1) {
665  		*lenp = 0;
666  		return -EINVAL;
667  	}
668  
669  	if (!conv)
670  		conv = do_proc_douintvec_conv;
671  
672  	if (write)
673  		return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
674  					   conv, data);
675  	return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
676  }
677  
do_proc_douintvec(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos,int (* conv)(unsigned long * lvalp,unsigned int * valp,int write,void * data),void * data)678  int do_proc_douintvec(struct ctl_table *table, int write,
679  		      void *buffer, size_t *lenp, loff_t *ppos,
680  		      int (*conv)(unsigned long *lvalp,
681  				  unsigned int *valp,
682  				  int write, void *data),
683  		      void *data)
684  {
685  	return __do_proc_douintvec(table->data, table, write,
686  				   buffer, lenp, ppos, conv, data);
687  }
688  
689  /**
690   * proc_dobool - read/write a bool
691   * @table: the sysctl table
692   * @write: %TRUE if this is a write to the sysctl file
693   * @buffer: the user buffer
694   * @lenp: the size of the user buffer
695   * @ppos: file position
696   *
697   * Reads/writes one integer value from/to the user buffer,
698   * treated as an ASCII string.
699   *
700   * table->data must point to a bool variable and table->maxlen must
701   * be sizeof(bool).
702   *
703   * Returns 0 on success.
704   */
proc_dobool(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)705  int proc_dobool(struct ctl_table *table, int write, void *buffer,
706  		size_t *lenp, loff_t *ppos)
707  {
708  	struct ctl_table tmp;
709  	bool *data = table->data;
710  	int res, val;
711  
712  	/* Do not support arrays yet. */
713  	if (table->maxlen != sizeof(bool))
714  		return -EINVAL;
715  
716  	tmp = *table;
717  	tmp.maxlen = sizeof(val);
718  	tmp.data = &val;
719  
720  	val = READ_ONCE(*data);
721  	res = proc_dointvec(&tmp, write, buffer, lenp, ppos);
722  	if (res)
723  		return res;
724  	if (write)
725  		WRITE_ONCE(*data, val);
726  	return 0;
727  }
728  
729  /**
730   * proc_dointvec - read a vector of integers
731   * @table: the sysctl table
732   * @write: %TRUE if this is a write to the sysctl file
733   * @buffer: the user buffer
734   * @lenp: the size of the user buffer
735   * @ppos: file position
736   *
737   * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
738   * values from/to the user buffer, treated as an ASCII string.
739   *
740   * Returns 0 on success.
741   */
proc_dointvec(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)742  int proc_dointvec(struct ctl_table *table, int write, void *buffer,
743  		  size_t *lenp, loff_t *ppos)
744  {
745  	return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
746  }
747  
748  /**
749   * proc_douintvec - read a vector of unsigned integers
750   * @table: the sysctl table
751   * @write: %TRUE if this is a write to the sysctl file
752   * @buffer: the user buffer
753   * @lenp: the size of the user buffer
754   * @ppos: file position
755   *
756   * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
757   * values from/to the user buffer, treated as an ASCII string.
758   *
759   * Returns 0 on success.
760   */
proc_douintvec(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)761  int proc_douintvec(struct ctl_table *table, int write, void *buffer,
762  		size_t *lenp, loff_t *ppos)
763  {
764  	return do_proc_douintvec(table, write, buffer, lenp, ppos,
765  				 do_proc_douintvec_conv, NULL);
766  }
767  
768  /*
769   * Taint values can only be increased
770   * This means we can safely use a temporary.
771   */
proc_taint(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)772  static int proc_taint(struct ctl_table *table, int write,
773  			       void *buffer, size_t *lenp, loff_t *ppos)
774  {
775  	struct ctl_table t;
776  	unsigned long tmptaint = get_taint();
777  	int err;
778  
779  	if (write && !capable(CAP_SYS_ADMIN))
780  		return -EPERM;
781  
782  	t = *table;
783  	t.data = &tmptaint;
784  	err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
785  	if (err < 0)
786  		return err;
787  
788  	if (write) {
789  		int i;
790  
791  		/*
792  		 * If we are relying on panic_on_taint not producing
793  		 * false positives due to userspace input, bail out
794  		 * before setting the requested taint flags.
795  		 */
796  		if (panic_on_taint_nousertaint && (tmptaint & panic_on_taint))
797  			return -EINVAL;
798  
799  		/*
800  		 * Poor man's atomic or. Not worth adding a primitive
801  		 * to everyone's atomic.h for this
802  		 */
803  		for (i = 0; i < TAINT_FLAGS_COUNT; i++)
804  			if ((1UL << i) & tmptaint)
805  				add_taint(i, LOCKDEP_STILL_OK);
806  	}
807  
808  	return err;
809  }
810  
811  /**
812   * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
813   * @min: pointer to minimum allowable value
814   * @max: pointer to maximum allowable value
815   *
816   * The do_proc_dointvec_minmax_conv_param structure provides the
817   * minimum and maximum values for doing range checking for those sysctl
818   * parameters that use the proc_dointvec_minmax() handler.
819   */
820  struct do_proc_dointvec_minmax_conv_param {
821  	int *min;
822  	int *max;
823  };
824  
do_proc_dointvec_minmax_conv(bool * negp,unsigned long * lvalp,int * valp,int write,void * data)825  static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
826  					int *valp,
827  					int write, void *data)
828  {
829  	int tmp, ret;
830  	struct do_proc_dointvec_minmax_conv_param *param = data;
831  	/*
832  	 * If writing, first do so via a temporary local int so we can
833  	 * bounds-check it before touching *valp.
834  	 */
835  	int *ip = write ? &tmp : valp;
836  
837  	ret = do_proc_dointvec_conv(negp, lvalp, ip, write, data);
838  	if (ret)
839  		return ret;
840  
841  	if (write) {
842  		if ((param->min && *param->min > tmp) ||
843  		    (param->max && *param->max < tmp))
844  			return -EINVAL;
845  		WRITE_ONCE(*valp, tmp);
846  	}
847  
848  	return 0;
849  }
850  
851  /**
852   * proc_dointvec_minmax - read a vector of integers with min/max values
853   * @table: the sysctl table
854   * @write: %TRUE if this is a write to the sysctl file
855   * @buffer: the user buffer
856   * @lenp: the size of the user buffer
857   * @ppos: file position
858   *
859   * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
860   * values from/to the user buffer, treated as an ASCII string.
861   *
862   * This routine will ensure the values are within the range specified by
863   * table->extra1 (min) and table->extra2 (max).
864   *
865   * Returns 0 on success or -EINVAL on write when the range check fails.
866   */
proc_dointvec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)867  int proc_dointvec_minmax(struct ctl_table *table, int write,
868  		  void *buffer, size_t *lenp, loff_t *ppos)
869  {
870  	struct do_proc_dointvec_minmax_conv_param param = {
871  		.min = (int *) table->extra1,
872  		.max = (int *) table->extra2,
873  	};
874  	return do_proc_dointvec(table, write, buffer, lenp, ppos,
875  				do_proc_dointvec_minmax_conv, &param);
876  }
877  
878  /**
879   * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
880   * @min: pointer to minimum allowable value
881   * @max: pointer to maximum allowable value
882   *
883   * The do_proc_douintvec_minmax_conv_param structure provides the
884   * minimum and maximum values for doing range checking for those sysctl
885   * parameters that use the proc_douintvec_minmax() handler.
886   */
887  struct do_proc_douintvec_minmax_conv_param {
888  	unsigned int *min;
889  	unsigned int *max;
890  };
891  
do_proc_douintvec_minmax_conv(unsigned long * lvalp,unsigned int * valp,int write,void * data)892  static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
893  					 unsigned int *valp,
894  					 int write, void *data)
895  {
896  	int ret;
897  	unsigned int tmp;
898  	struct do_proc_douintvec_minmax_conv_param *param = data;
899  	/* write via temporary local uint for bounds-checking */
900  	unsigned int *up = write ? &tmp : valp;
901  
902  	ret = do_proc_douintvec_conv(lvalp, up, write, data);
903  	if (ret)
904  		return ret;
905  
906  	if (write) {
907  		if ((param->min && *param->min > tmp) ||
908  		    (param->max && *param->max < tmp))
909  			return -ERANGE;
910  
911  		WRITE_ONCE(*valp, tmp);
912  	}
913  
914  	return 0;
915  }
916  
917  /**
918   * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
919   * @table: the sysctl table
920   * @write: %TRUE if this is a write to the sysctl file
921   * @buffer: the user buffer
922   * @lenp: the size of the user buffer
923   * @ppos: file position
924   *
925   * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
926   * values from/to the user buffer, treated as an ASCII string. Negative
927   * strings are not allowed.
928   *
929   * This routine will ensure the values are within the range specified by
930   * table->extra1 (min) and table->extra2 (max). There is a final sanity
931   * check for UINT_MAX to avoid having to support wrap around uses from
932   * userspace.
933   *
934   * Returns 0 on success or -ERANGE on write when the range check fails.
935   */
proc_douintvec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)936  int proc_douintvec_minmax(struct ctl_table *table, int write,
937  			  void *buffer, size_t *lenp, loff_t *ppos)
938  {
939  	struct do_proc_douintvec_minmax_conv_param param = {
940  		.min = (unsigned int *) table->extra1,
941  		.max = (unsigned int *) table->extra2,
942  	};
943  	return do_proc_douintvec(table, write, buffer, lenp, ppos,
944  				 do_proc_douintvec_minmax_conv, &param);
945  }
946  
947  /**
948   * proc_dou8vec_minmax - read a vector of unsigned chars with min/max values
949   * @table: the sysctl table
950   * @write: %TRUE if this is a write to the sysctl file
951   * @buffer: the user buffer
952   * @lenp: the size of the user buffer
953   * @ppos: file position
954   *
955   * Reads/writes up to table->maxlen/sizeof(u8) unsigned chars
956   * values from/to the user buffer, treated as an ASCII string. Negative
957   * strings are not allowed.
958   *
959   * This routine will ensure the values are within the range specified by
960   * table->extra1 (min) and table->extra2 (max).
961   *
962   * Returns 0 on success or an error on write when the range check fails.
963   */
proc_dou8vec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)964  int proc_dou8vec_minmax(struct ctl_table *table, int write,
965  			void *buffer, size_t *lenp, loff_t *ppos)
966  {
967  	struct ctl_table tmp;
968  	unsigned int min = 0, max = 255U, val;
969  	u8 *data = table->data;
970  	struct do_proc_douintvec_minmax_conv_param param = {
971  		.min = &min,
972  		.max = &max,
973  	};
974  	int res;
975  
976  	/* Do not support arrays yet. */
977  	if (table->maxlen != sizeof(u8))
978  		return -EINVAL;
979  
980  	if (table->extra1) {
981  		min = *(unsigned int *) table->extra1;
982  		if (min > 255U)
983  			return -EINVAL;
984  	}
985  	if (table->extra2) {
986  		max = *(unsigned int *) table->extra2;
987  		if (max > 255U)
988  			return -EINVAL;
989  	}
990  
991  	tmp = *table;
992  
993  	tmp.maxlen = sizeof(val);
994  	tmp.data = &val;
995  	val = READ_ONCE(*data);
996  	res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos,
997  				do_proc_douintvec_minmax_conv, &param);
998  	if (res)
999  		return res;
1000  	if (write)
1001  		WRITE_ONCE(*data, val);
1002  	return 0;
1003  }
1004  EXPORT_SYMBOL_GPL(proc_dou8vec_minmax);
1005  
1006  #ifdef CONFIG_MAGIC_SYSRQ
sysrq_sysctl_handler(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1007  static int sysrq_sysctl_handler(struct ctl_table *table, int write,
1008  				void *buffer, size_t *lenp, loff_t *ppos)
1009  {
1010  	int tmp, ret;
1011  
1012  	tmp = sysrq_mask();
1013  
1014  	ret = __do_proc_dointvec(&tmp, table, write, buffer,
1015  			       lenp, ppos, NULL, NULL);
1016  	if (ret || !write)
1017  		return ret;
1018  
1019  	if (write)
1020  		sysrq_toggle_support(tmp);
1021  
1022  	return 0;
1023  }
1024  #endif
1025  
__do_proc_doulongvec_minmax(void * data,struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos,unsigned long convmul,unsigned long convdiv)1026  static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table,
1027  		int write, void *buffer, size_t *lenp, loff_t *ppos,
1028  		unsigned long convmul, unsigned long convdiv)
1029  {
1030  	unsigned long *i, *min, *max;
1031  	int vleft, first = 1, err = 0;
1032  	size_t left;
1033  	char *p;
1034  
1035  	if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
1036  		*lenp = 0;
1037  		return 0;
1038  	}
1039  
1040  	i = data;
1041  	min = table->extra1;
1042  	max = table->extra2;
1043  	vleft = table->maxlen / sizeof(unsigned long);
1044  	left = *lenp;
1045  
1046  	if (write) {
1047  		if (proc_first_pos_non_zero_ignore(ppos, table))
1048  			goto out;
1049  
1050  		if (left > PAGE_SIZE - 1)
1051  			left = PAGE_SIZE - 1;
1052  		p = buffer;
1053  	}
1054  
1055  	for (; left && vleft--; i++, first = 0) {
1056  		unsigned long val;
1057  
1058  		if (write) {
1059  			bool neg;
1060  
1061  			proc_skip_spaces(&p, &left);
1062  			if (!left)
1063  				break;
1064  
1065  			err = proc_get_long(&p, &left, &val, &neg,
1066  					     proc_wspace_sep,
1067  					     sizeof(proc_wspace_sep), NULL);
1068  			if (err || neg) {
1069  				err = -EINVAL;
1070  				break;
1071  			}
1072  
1073  			val = convmul * val / convdiv;
1074  			if ((min && val < *min) || (max && val > *max)) {
1075  				err = -EINVAL;
1076  				break;
1077  			}
1078  			WRITE_ONCE(*i, val);
1079  		} else {
1080  			val = convdiv * READ_ONCE(*i) / convmul;
1081  			if (!first)
1082  				proc_put_char(&buffer, &left, '\t');
1083  			proc_put_long(&buffer, &left, val, false);
1084  		}
1085  	}
1086  
1087  	if (!write && !first && left && !err)
1088  		proc_put_char(&buffer, &left, '\n');
1089  	if (write && !err)
1090  		proc_skip_spaces(&p, &left);
1091  	if (write && first)
1092  		return err ? : -EINVAL;
1093  	*lenp -= left;
1094  out:
1095  	*ppos += *lenp;
1096  	return err;
1097  }
1098  
do_proc_doulongvec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos,unsigned long convmul,unsigned long convdiv)1099  static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
1100  		void *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul,
1101  		unsigned long convdiv)
1102  {
1103  	return __do_proc_doulongvec_minmax(table->data, table, write,
1104  			buffer, lenp, ppos, convmul, convdiv);
1105  }
1106  
1107  /**
1108   * proc_doulongvec_minmax - read a vector of long integers with min/max values
1109   * @table: the sysctl table
1110   * @write: %TRUE if this is a write to the sysctl file
1111   * @buffer: the user buffer
1112   * @lenp: the size of the user buffer
1113   * @ppos: file position
1114   *
1115   * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1116   * values from/to the user buffer, treated as an ASCII string.
1117   *
1118   * This routine will ensure the values are within the range specified by
1119   * table->extra1 (min) and table->extra2 (max).
1120   *
1121   * Returns 0 on success.
1122   */
proc_doulongvec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1123  int proc_doulongvec_minmax(struct ctl_table *table, int write,
1124  			   void *buffer, size_t *lenp, loff_t *ppos)
1125  {
1126      return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
1127  }
1128  
1129  /**
1130   * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
1131   * @table: the sysctl table
1132   * @write: %TRUE if this is a write to the sysctl file
1133   * @buffer: the user buffer
1134   * @lenp: the size of the user buffer
1135   * @ppos: file position
1136   *
1137   * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1138   * values from/to the user buffer, treated as an ASCII string. The values
1139   * are treated as milliseconds, and converted to jiffies when they are stored.
1140   *
1141   * This routine will ensure the values are within the range specified by
1142   * table->extra1 (min) and table->extra2 (max).
1143   *
1144   * Returns 0 on success.
1145   */
proc_doulongvec_ms_jiffies_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1146  int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1147  				      void *buffer, size_t *lenp, loff_t *ppos)
1148  {
1149      return do_proc_doulongvec_minmax(table, write, buffer,
1150  				     lenp, ppos, HZ, 1000l);
1151  }
1152  
1153  
do_proc_dointvec_jiffies_conv(bool * negp,unsigned long * lvalp,int * valp,int write,void * data)1154  static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
1155  					 int *valp,
1156  					 int write, void *data)
1157  {
1158  	if (write) {
1159  		if (*lvalp > INT_MAX / HZ)
1160  			return 1;
1161  		if (*negp)
1162  			WRITE_ONCE(*valp, -*lvalp * HZ);
1163  		else
1164  			WRITE_ONCE(*valp, *lvalp * HZ);
1165  	} else {
1166  		int val = READ_ONCE(*valp);
1167  		unsigned long lval;
1168  		if (val < 0) {
1169  			*negp = true;
1170  			lval = -(unsigned long)val;
1171  		} else {
1172  			*negp = false;
1173  			lval = (unsigned long)val;
1174  		}
1175  		*lvalp = lval / HZ;
1176  	}
1177  	return 0;
1178  }
1179  
do_proc_dointvec_userhz_jiffies_conv(bool * negp,unsigned long * lvalp,int * valp,int write,void * data)1180  static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
1181  						int *valp,
1182  						int write, void *data)
1183  {
1184  	if (write) {
1185  		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
1186  			return 1;
1187  		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
1188  	} else {
1189  		int val = *valp;
1190  		unsigned long lval;
1191  		if (val < 0) {
1192  			*negp = true;
1193  			lval = -(unsigned long)val;
1194  		} else {
1195  			*negp = false;
1196  			lval = (unsigned long)val;
1197  		}
1198  		*lvalp = jiffies_to_clock_t(lval);
1199  	}
1200  	return 0;
1201  }
1202  
do_proc_dointvec_ms_jiffies_conv(bool * negp,unsigned long * lvalp,int * valp,int write,void * data)1203  static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
1204  					    int *valp,
1205  					    int write, void *data)
1206  {
1207  	if (write) {
1208  		unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
1209  
1210  		if (jif > INT_MAX)
1211  			return 1;
1212  		WRITE_ONCE(*valp, (int)jif);
1213  	} else {
1214  		int val = READ_ONCE(*valp);
1215  		unsigned long lval;
1216  		if (val < 0) {
1217  			*negp = true;
1218  			lval = -(unsigned long)val;
1219  		} else {
1220  			*negp = false;
1221  			lval = (unsigned long)val;
1222  		}
1223  		*lvalp = jiffies_to_msecs(lval);
1224  	}
1225  	return 0;
1226  }
1227  
do_proc_dointvec_ms_jiffies_minmax_conv(bool * negp,unsigned long * lvalp,int * valp,int write,void * data)1228  static int do_proc_dointvec_ms_jiffies_minmax_conv(bool *negp, unsigned long *lvalp,
1229  						int *valp, int write, void *data)
1230  {
1231  	int tmp, ret;
1232  	struct do_proc_dointvec_minmax_conv_param *param = data;
1233  	/*
1234  	 * If writing, first do so via a temporary local int so we can
1235  	 * bounds-check it before touching *valp.
1236  	 */
1237  	int *ip = write ? &tmp : valp;
1238  
1239  	ret = do_proc_dointvec_ms_jiffies_conv(negp, lvalp, ip, write, data);
1240  	if (ret)
1241  		return ret;
1242  
1243  	if (write) {
1244  		if ((param->min && *param->min > tmp) ||
1245  				(param->max && *param->max < tmp))
1246  			return -EINVAL;
1247  		*valp = tmp;
1248  	}
1249  	return 0;
1250  }
1251  
1252  /**
1253   * proc_dointvec_jiffies - read a vector of integers as seconds
1254   * @table: the sysctl table
1255   * @write: %TRUE if this is a write to the sysctl file
1256   * @buffer: the user buffer
1257   * @lenp: the size of the user buffer
1258   * @ppos: file position
1259   *
1260   * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1261   * values from/to the user buffer, treated as an ASCII string.
1262   * The values read are assumed to be in seconds, and are converted into
1263   * jiffies.
1264   *
1265   * Returns 0 on success.
1266   */
proc_dointvec_jiffies(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1267  int proc_dointvec_jiffies(struct ctl_table *table, int write,
1268  			  void *buffer, size_t *lenp, loff_t *ppos)
1269  {
1270      return do_proc_dointvec(table,write,buffer,lenp,ppos,
1271  		    	    do_proc_dointvec_jiffies_conv,NULL);
1272  }
1273  
proc_dointvec_ms_jiffies_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1274  int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1275  			  void *buffer, size_t *lenp, loff_t *ppos)
1276  {
1277  	struct do_proc_dointvec_minmax_conv_param param = {
1278  		.min = (int *) table->extra1,
1279  		.max = (int *) table->extra2,
1280  	};
1281  	return do_proc_dointvec(table, write, buffer, lenp, ppos,
1282  			do_proc_dointvec_ms_jiffies_minmax_conv, &param);
1283  }
1284  
1285  /**
1286   * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
1287   * @table: the sysctl table
1288   * @write: %TRUE if this is a write to the sysctl file
1289   * @buffer: the user buffer
1290   * @lenp: the size of the user buffer
1291   * @ppos: pointer to the file position
1292   *
1293   * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1294   * values from/to the user buffer, treated as an ASCII string.
1295   * The values read are assumed to be in 1/USER_HZ seconds, and
1296   * are converted into jiffies.
1297   *
1298   * Returns 0 on success.
1299   */
proc_dointvec_userhz_jiffies(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1300  int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
1301  				 void *buffer, size_t *lenp, loff_t *ppos)
1302  {
1303  	return do_proc_dointvec(table, write, buffer, lenp, ppos,
1304  				do_proc_dointvec_userhz_jiffies_conv, NULL);
1305  }
1306  
1307  /**
1308   * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
1309   * @table: the sysctl table
1310   * @write: %TRUE if this is a write to the sysctl file
1311   * @buffer: the user buffer
1312   * @lenp: the size of the user buffer
1313   * @ppos: file position
1314   * @ppos: the current position in the file
1315   *
1316   * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1317   * values from/to the user buffer, treated as an ASCII string.
1318   * The values read are assumed to be in 1/1000 seconds, and
1319   * are converted into jiffies.
1320   *
1321   * Returns 0 on success.
1322   */
proc_dointvec_ms_jiffies(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1323  int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, void *buffer,
1324  		size_t *lenp, loff_t *ppos)
1325  {
1326  	return do_proc_dointvec(table, write, buffer, lenp, ppos,
1327  				do_proc_dointvec_ms_jiffies_conv, NULL);
1328  }
1329  
proc_do_cad_pid(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1330  static int proc_do_cad_pid(struct ctl_table *table, int write, void *buffer,
1331  		size_t *lenp, loff_t *ppos)
1332  {
1333  	struct pid *new_pid;
1334  	pid_t tmp;
1335  	int r;
1336  
1337  	tmp = pid_vnr(cad_pid);
1338  
1339  	r = __do_proc_dointvec(&tmp, table, write, buffer,
1340  			       lenp, ppos, NULL, NULL);
1341  	if (r || !write)
1342  		return r;
1343  
1344  	new_pid = find_get_pid(tmp);
1345  	if (!new_pid)
1346  		return -ESRCH;
1347  
1348  	put_pid(xchg(&cad_pid, new_pid));
1349  	return 0;
1350  }
1351  
1352  /**
1353   * proc_do_large_bitmap - read/write from/to a large bitmap
1354   * @table: the sysctl table
1355   * @write: %TRUE if this is a write to the sysctl file
1356   * @buffer: the user buffer
1357   * @lenp: the size of the user buffer
1358   * @ppos: file position
1359   *
1360   * The bitmap is stored at table->data and the bitmap length (in bits)
1361   * in table->maxlen.
1362   *
1363   * We use a range comma separated format (e.g. 1,3-4,10-10) so that
1364   * large bitmaps may be represented in a compact manner. Writing into
1365   * the file will clear the bitmap then update it with the given input.
1366   *
1367   * Returns 0 on success.
1368   */
proc_do_large_bitmap(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1369  int proc_do_large_bitmap(struct ctl_table *table, int write,
1370  			 void *buffer, size_t *lenp, loff_t *ppos)
1371  {
1372  	int err = 0;
1373  	size_t left = *lenp;
1374  	unsigned long bitmap_len = table->maxlen;
1375  	unsigned long *bitmap = *(unsigned long **) table->data;
1376  	unsigned long *tmp_bitmap = NULL;
1377  	char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
1378  
1379  	if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
1380  		*lenp = 0;
1381  		return 0;
1382  	}
1383  
1384  	if (write) {
1385  		char *p = buffer;
1386  		size_t skipped = 0;
1387  
1388  		if (left > PAGE_SIZE - 1) {
1389  			left = PAGE_SIZE - 1;
1390  			/* How much of the buffer we'll skip this pass */
1391  			skipped = *lenp - left;
1392  		}
1393  
1394  		tmp_bitmap = bitmap_zalloc(bitmap_len, GFP_KERNEL);
1395  		if (!tmp_bitmap)
1396  			return -ENOMEM;
1397  		proc_skip_char(&p, &left, '\n');
1398  		while (!err && left) {
1399  			unsigned long val_a, val_b;
1400  			bool neg;
1401  			size_t saved_left;
1402  
1403  			/* In case we stop parsing mid-number, we can reset */
1404  			saved_left = left;
1405  			err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
1406  					     sizeof(tr_a), &c);
1407  			/*
1408  			 * If we consumed the entirety of a truncated buffer or
1409  			 * only one char is left (may be a "-"), then stop here,
1410  			 * reset, & come back for more.
1411  			 */
1412  			if ((left <= 1) && skipped) {
1413  				left = saved_left;
1414  				break;
1415  			}
1416  
1417  			if (err)
1418  				break;
1419  			if (val_a >= bitmap_len || neg) {
1420  				err = -EINVAL;
1421  				break;
1422  			}
1423  
1424  			val_b = val_a;
1425  			if (left) {
1426  				p++;
1427  				left--;
1428  			}
1429  
1430  			if (c == '-') {
1431  				err = proc_get_long(&p, &left, &val_b,
1432  						     &neg, tr_b, sizeof(tr_b),
1433  						     &c);
1434  				/*
1435  				 * If we consumed all of a truncated buffer or
1436  				 * then stop here, reset, & come back for more.
1437  				 */
1438  				if (!left && skipped) {
1439  					left = saved_left;
1440  					break;
1441  				}
1442  
1443  				if (err)
1444  					break;
1445  				if (val_b >= bitmap_len || neg ||
1446  				    val_a > val_b) {
1447  					err = -EINVAL;
1448  					break;
1449  				}
1450  				if (left) {
1451  					p++;
1452  					left--;
1453  				}
1454  			}
1455  
1456  			bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
1457  			proc_skip_char(&p, &left, '\n');
1458  		}
1459  		left += skipped;
1460  	} else {
1461  		unsigned long bit_a, bit_b = 0;
1462  		bool first = 1;
1463  
1464  		while (left) {
1465  			bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
1466  			if (bit_a >= bitmap_len)
1467  				break;
1468  			bit_b = find_next_zero_bit(bitmap, bitmap_len,
1469  						   bit_a + 1) - 1;
1470  
1471  			if (!first)
1472  				proc_put_char(&buffer, &left, ',');
1473  			proc_put_long(&buffer, &left, bit_a, false);
1474  			if (bit_a != bit_b) {
1475  				proc_put_char(&buffer, &left, '-');
1476  				proc_put_long(&buffer, &left, bit_b, false);
1477  			}
1478  
1479  			first = 0; bit_b++;
1480  		}
1481  		proc_put_char(&buffer, &left, '\n');
1482  	}
1483  
1484  	if (!err) {
1485  		if (write) {
1486  			if (*ppos)
1487  				bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
1488  			else
1489  				bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
1490  		}
1491  		*lenp -= left;
1492  		*ppos += *lenp;
1493  	}
1494  
1495  	bitmap_free(tmp_bitmap);
1496  	return err;
1497  }
1498  
1499  #else /* CONFIG_PROC_SYSCTL */
1500  
proc_dostring(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1501  int proc_dostring(struct ctl_table *table, int write,
1502  		  void *buffer, size_t *lenp, loff_t *ppos)
1503  {
1504  	return -ENOSYS;
1505  }
1506  
proc_dobool(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1507  int proc_dobool(struct ctl_table *table, int write,
1508  		void *buffer, size_t *lenp, loff_t *ppos)
1509  {
1510  	return -ENOSYS;
1511  }
1512  
proc_dointvec(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1513  int proc_dointvec(struct ctl_table *table, int write,
1514  		  void *buffer, size_t *lenp, loff_t *ppos)
1515  {
1516  	return -ENOSYS;
1517  }
1518  
proc_douintvec(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1519  int proc_douintvec(struct ctl_table *table, int write,
1520  		  void *buffer, size_t *lenp, loff_t *ppos)
1521  {
1522  	return -ENOSYS;
1523  }
1524  
proc_dointvec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1525  int proc_dointvec_minmax(struct ctl_table *table, int write,
1526  		    void *buffer, size_t *lenp, loff_t *ppos)
1527  {
1528  	return -ENOSYS;
1529  }
1530  
proc_douintvec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1531  int proc_douintvec_minmax(struct ctl_table *table, int write,
1532  			  void *buffer, size_t *lenp, loff_t *ppos)
1533  {
1534  	return -ENOSYS;
1535  }
1536  
proc_dou8vec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1537  int proc_dou8vec_minmax(struct ctl_table *table, int write,
1538  			void *buffer, size_t *lenp, loff_t *ppos)
1539  {
1540  	return -ENOSYS;
1541  }
1542  
proc_dointvec_jiffies(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1543  int proc_dointvec_jiffies(struct ctl_table *table, int write,
1544  		    void *buffer, size_t *lenp, loff_t *ppos)
1545  {
1546  	return -ENOSYS;
1547  }
1548  
proc_dointvec_ms_jiffies_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1549  int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1550  				    void *buffer, size_t *lenp, loff_t *ppos)
1551  {
1552  	return -ENOSYS;
1553  }
1554  
proc_dointvec_userhz_jiffies(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1555  int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
1556  		    void *buffer, size_t *lenp, loff_t *ppos)
1557  {
1558  	return -ENOSYS;
1559  }
1560  
proc_dointvec_ms_jiffies(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1561  int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
1562  			     void *buffer, size_t *lenp, loff_t *ppos)
1563  {
1564  	return -ENOSYS;
1565  }
1566  
proc_doulongvec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1567  int proc_doulongvec_minmax(struct ctl_table *table, int write,
1568  		    void *buffer, size_t *lenp, loff_t *ppos)
1569  {
1570  	return -ENOSYS;
1571  }
1572  
proc_doulongvec_ms_jiffies_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1573  int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1574  				      void *buffer, size_t *lenp, loff_t *ppos)
1575  {
1576  	return -ENOSYS;
1577  }
1578  
proc_do_large_bitmap(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1579  int proc_do_large_bitmap(struct ctl_table *table, int write,
1580  			 void *buffer, size_t *lenp, loff_t *ppos)
1581  {
1582  	return -ENOSYS;
1583  }
1584  
1585  #endif /* CONFIG_PROC_SYSCTL */
1586  
1587  #if defined(CONFIG_SYSCTL)
proc_do_static_key(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1588  int proc_do_static_key(struct ctl_table *table, int write,
1589  		       void *buffer, size_t *lenp, loff_t *ppos)
1590  {
1591  	struct static_key *key = (struct static_key *)table->data;
1592  	static DEFINE_MUTEX(static_key_mutex);
1593  	int val, ret;
1594  	struct ctl_table tmp = {
1595  		.data   = &val,
1596  		.maxlen = sizeof(val),
1597  		.mode   = table->mode,
1598  		.extra1 = SYSCTL_ZERO,
1599  		.extra2 = SYSCTL_ONE,
1600  	};
1601  
1602  	if (write && !capable(CAP_SYS_ADMIN))
1603  		return -EPERM;
1604  
1605  	mutex_lock(&static_key_mutex);
1606  	val = static_key_enabled(key);
1607  	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
1608  	if (write && !ret) {
1609  		if (val)
1610  			static_key_enable(key);
1611  		else
1612  			static_key_disable(key);
1613  	}
1614  	mutex_unlock(&static_key_mutex);
1615  	return ret;
1616  }
1617  
1618  static struct ctl_table kern_table[] = {
1619  	{
1620  		.procname	= "panic",
1621  		.data		= &panic_timeout,
1622  		.maxlen		= sizeof(int),
1623  		.mode		= 0644,
1624  		.proc_handler	= proc_dointvec,
1625  	},
1626  #ifdef CONFIG_PROC_SYSCTL
1627  	{
1628  		.procname	= "tainted",
1629  		.maxlen 	= sizeof(long),
1630  		.mode		= 0644,
1631  		.proc_handler	= proc_taint,
1632  	},
1633  	{
1634  		.procname	= "sysctl_writes_strict",
1635  		.data		= &sysctl_writes_strict,
1636  		.maxlen		= sizeof(int),
1637  		.mode		= 0644,
1638  		.proc_handler	= proc_dointvec_minmax,
1639  		.extra1		= SYSCTL_NEG_ONE,
1640  		.extra2		= SYSCTL_ONE,
1641  	},
1642  #endif
1643  	{
1644  		.procname	= "print-fatal-signals",
1645  		.data		= &print_fatal_signals,
1646  		.maxlen		= sizeof(int),
1647  		.mode		= 0644,
1648  		.proc_handler	= proc_dointvec,
1649  	},
1650  #ifdef CONFIG_SPARC
1651  	{
1652  		.procname	= "reboot-cmd",
1653  		.data		= reboot_command,
1654  		.maxlen		= 256,
1655  		.mode		= 0644,
1656  		.proc_handler	= proc_dostring,
1657  	},
1658  	{
1659  		.procname	= "stop-a",
1660  		.data		= &stop_a_enabled,
1661  		.maxlen		= sizeof (int),
1662  		.mode		= 0644,
1663  		.proc_handler	= proc_dointvec,
1664  	},
1665  	{
1666  		.procname	= "scons-poweroff",
1667  		.data		= &scons_pwroff,
1668  		.maxlen		= sizeof (int),
1669  		.mode		= 0644,
1670  		.proc_handler	= proc_dointvec,
1671  	},
1672  #endif
1673  #ifdef CONFIG_SPARC64
1674  	{
1675  		.procname	= "tsb-ratio",
1676  		.data		= &sysctl_tsb_ratio,
1677  		.maxlen		= sizeof (int),
1678  		.mode		= 0644,
1679  		.proc_handler	= proc_dointvec,
1680  	},
1681  #endif
1682  #ifdef CONFIG_PARISC
1683  	{
1684  		.procname	= "soft-power",
1685  		.data		= &pwrsw_enabled,
1686  		.maxlen		= sizeof (int),
1687  		.mode		= 0644,
1688  		.proc_handler	= proc_dointvec,
1689  	},
1690  #endif
1691  #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
1692  	{
1693  		.procname	= "unaligned-trap",
1694  		.data		= &unaligned_enabled,
1695  		.maxlen		= sizeof (int),
1696  		.mode		= 0644,
1697  		.proc_handler	= proc_dointvec,
1698  	},
1699  #endif
1700  #ifdef CONFIG_STACK_TRACER
1701  	{
1702  		.procname	= "stack_tracer_enabled",
1703  		.data		= &stack_tracer_enabled,
1704  		.maxlen		= sizeof(int),
1705  		.mode		= 0644,
1706  		.proc_handler	= stack_trace_sysctl,
1707  	},
1708  #endif
1709  #ifdef CONFIG_TRACING
1710  	{
1711  		.procname	= "ftrace_dump_on_oops",
1712  		.data		= &ftrace_dump_on_oops,
1713  		.maxlen		= sizeof(int),
1714  		.mode		= 0644,
1715  		.proc_handler	= proc_dointvec,
1716  	},
1717  	{
1718  		.procname	= "traceoff_on_warning",
1719  		.data		= &__disable_trace_on_warning,
1720  		.maxlen		= sizeof(__disable_trace_on_warning),
1721  		.mode		= 0644,
1722  		.proc_handler	= proc_dointvec,
1723  	},
1724  	{
1725  		.procname	= "tracepoint_printk",
1726  		.data		= &tracepoint_printk,
1727  		.maxlen		= sizeof(tracepoint_printk),
1728  		.mode		= 0644,
1729  		.proc_handler	= tracepoint_printk_sysctl,
1730  	},
1731  #endif
1732  #ifdef CONFIG_MODULES
1733  	{
1734  		.procname	= "modprobe",
1735  		.data		= &modprobe_path,
1736  		.maxlen		= KMOD_PATH_LEN,
1737  		.mode		= 0644,
1738  		.proc_handler	= proc_dostring,
1739  	},
1740  	{
1741  		.procname	= "modules_disabled",
1742  		.data		= &modules_disabled,
1743  		.maxlen		= sizeof(int),
1744  		.mode		= 0644,
1745  		/* only handle a transition from default "0" to "1" */
1746  		.proc_handler	= proc_dointvec_minmax,
1747  		.extra1		= SYSCTL_ONE,
1748  		.extra2		= SYSCTL_ONE,
1749  	},
1750  #endif
1751  #ifdef CONFIG_UEVENT_HELPER
1752  	{
1753  		.procname	= "hotplug",
1754  		.data		= &uevent_helper,
1755  		.maxlen		= UEVENT_HELPER_PATH_LEN,
1756  		.mode		= 0644,
1757  		.proc_handler	= proc_dostring,
1758  	},
1759  #endif
1760  #ifdef CONFIG_MAGIC_SYSRQ
1761  	{
1762  		.procname	= "sysrq",
1763  		.data		= NULL,
1764  		.maxlen		= sizeof (int),
1765  		.mode		= 0644,
1766  		.proc_handler	= sysrq_sysctl_handler,
1767  	},
1768  #endif
1769  #ifdef CONFIG_PROC_SYSCTL
1770  	{
1771  		.procname	= "cad_pid",
1772  		.data		= NULL,
1773  		.maxlen		= sizeof (int),
1774  		.mode		= 0600,
1775  		.proc_handler	= proc_do_cad_pid,
1776  	},
1777  #endif
1778  	{
1779  		.procname	= "threads-max",
1780  		.data		= NULL,
1781  		.maxlen		= sizeof(int),
1782  		.mode		= 0644,
1783  		.proc_handler	= sysctl_max_threads,
1784  	},
1785  	{
1786  		.procname	= "overflowuid",
1787  		.data		= &overflowuid,
1788  		.maxlen		= sizeof(int),
1789  		.mode		= 0644,
1790  		.proc_handler	= proc_dointvec_minmax,
1791  		.extra1		= SYSCTL_ZERO,
1792  		.extra2		= SYSCTL_MAXOLDUID,
1793  	},
1794  	{
1795  		.procname	= "overflowgid",
1796  		.data		= &overflowgid,
1797  		.maxlen		= sizeof(int),
1798  		.mode		= 0644,
1799  		.proc_handler	= proc_dointvec_minmax,
1800  		.extra1		= SYSCTL_ZERO,
1801  		.extra2		= SYSCTL_MAXOLDUID,
1802  	},
1803  #ifdef CONFIG_S390
1804  	{
1805  		.procname	= "userprocess_debug",
1806  		.data		= &show_unhandled_signals,
1807  		.maxlen		= sizeof(int),
1808  		.mode		= 0644,
1809  		.proc_handler	= proc_dointvec,
1810  	},
1811  #endif
1812  	{
1813  		.procname	= "pid_max",
1814  		.data		= &pid_max,
1815  		.maxlen		= sizeof (int),
1816  		.mode		= 0644,
1817  		.proc_handler	= proc_dointvec_minmax,
1818  		.extra1		= &pid_max_min,
1819  		.extra2		= &pid_max_max,
1820  	},
1821  	{
1822  		.procname	= "panic_on_oops",
1823  		.data		= &panic_on_oops,
1824  		.maxlen		= sizeof(int),
1825  		.mode		= 0644,
1826  		.proc_handler	= proc_dointvec,
1827  	},
1828  	{
1829  		.procname	= "panic_print",
1830  		.data		= &panic_print,
1831  		.maxlen		= sizeof(unsigned long),
1832  		.mode		= 0644,
1833  		.proc_handler	= proc_doulongvec_minmax,
1834  	},
1835  	{
1836  		.procname	= "ngroups_max",
1837  		.data		= (void *)&ngroups_max,
1838  		.maxlen		= sizeof (int),
1839  		.mode		= 0444,
1840  		.proc_handler	= proc_dointvec,
1841  	},
1842  	{
1843  		.procname	= "cap_last_cap",
1844  		.data		= (void *)&cap_last_cap,
1845  		.maxlen		= sizeof(int),
1846  		.mode		= 0444,
1847  		.proc_handler	= proc_dointvec,
1848  	},
1849  #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
1850  	{
1851  		.procname       = "unknown_nmi_panic",
1852  		.data           = &unknown_nmi_panic,
1853  		.maxlen         = sizeof (int),
1854  		.mode           = 0644,
1855  		.proc_handler   = proc_dointvec,
1856  	},
1857  #endif
1858  
1859  #if (defined(CONFIG_X86_32) || defined(CONFIG_PARISC)) && \
1860  	defined(CONFIG_DEBUG_STACKOVERFLOW)
1861  	{
1862  		.procname	= "panic_on_stackoverflow",
1863  		.data		= &sysctl_panic_on_stackoverflow,
1864  		.maxlen		= sizeof(int),
1865  		.mode		= 0644,
1866  		.proc_handler	= proc_dointvec,
1867  	},
1868  #endif
1869  #if defined(CONFIG_X86)
1870  	{
1871  		.procname	= "panic_on_unrecovered_nmi",
1872  		.data		= &panic_on_unrecovered_nmi,
1873  		.maxlen		= sizeof(int),
1874  		.mode		= 0644,
1875  		.proc_handler	= proc_dointvec,
1876  	},
1877  	{
1878  		.procname	= "panic_on_io_nmi",
1879  		.data		= &panic_on_io_nmi,
1880  		.maxlen		= sizeof(int),
1881  		.mode		= 0644,
1882  		.proc_handler	= proc_dointvec,
1883  	},
1884  	{
1885  		.procname	= "bootloader_type",
1886  		.data		= &bootloader_type,
1887  		.maxlen		= sizeof (int),
1888  		.mode		= 0444,
1889  		.proc_handler	= proc_dointvec,
1890  	},
1891  	{
1892  		.procname	= "bootloader_version",
1893  		.data		= &bootloader_version,
1894  		.maxlen		= sizeof (int),
1895  		.mode		= 0444,
1896  		.proc_handler	= proc_dointvec,
1897  	},
1898  	{
1899  		.procname	= "io_delay_type",
1900  		.data		= &io_delay_type,
1901  		.maxlen		= sizeof(int),
1902  		.mode		= 0644,
1903  		.proc_handler	= proc_dointvec,
1904  	},
1905  #endif
1906  #if defined(CONFIG_MMU)
1907  	{
1908  		.procname	= "randomize_va_space",
1909  		.data		= &randomize_va_space,
1910  		.maxlen		= sizeof(int),
1911  		.mode		= 0644,
1912  		.proc_handler	= proc_dointvec,
1913  	},
1914  #endif
1915  #if defined(CONFIG_S390) && defined(CONFIG_SMP)
1916  	{
1917  		.procname	= "spin_retry",
1918  		.data		= &spin_retry,
1919  		.maxlen		= sizeof (int),
1920  		.mode		= 0644,
1921  		.proc_handler	= proc_dointvec,
1922  	},
1923  #endif
1924  #if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
1925  	{
1926  		.procname	= "acpi_video_flags",
1927  		.data		= &acpi_realmode_flags,
1928  		.maxlen		= sizeof (unsigned long),
1929  		.mode		= 0644,
1930  		.proc_handler	= proc_doulongvec_minmax,
1931  	},
1932  #endif
1933  #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
1934  	{
1935  		.procname	= "ignore-unaligned-usertrap",
1936  		.data		= &no_unaligned_warning,
1937  		.maxlen		= sizeof (int),
1938  		.mode		= 0644,
1939  		.proc_handler	= proc_dointvec,
1940  	},
1941  #endif
1942  #ifdef CONFIG_IA64
1943  	{
1944  		.procname	= "unaligned-dump-stack",
1945  		.data		= &unaligned_dump_stack,
1946  		.maxlen		= sizeof (int),
1947  		.mode		= 0644,
1948  		.proc_handler	= proc_dointvec,
1949  	},
1950  #endif
1951  #ifdef CONFIG_RT_MUTEXES
1952  	{
1953  		.procname	= "max_lock_depth",
1954  		.data		= &max_lock_depth,
1955  		.maxlen		= sizeof(int),
1956  		.mode		= 0644,
1957  		.proc_handler	= proc_dointvec,
1958  	},
1959  #endif
1960  #ifdef CONFIG_PERF_EVENTS
1961  	/*
1962  	 * User-space scripts rely on the existence of this file
1963  	 * as a feature check for perf_events being enabled.
1964  	 *
1965  	 * So it's an ABI, do not remove!
1966  	 */
1967  	{
1968  		.procname	= "perf_event_paranoid",
1969  		.data		= &sysctl_perf_event_paranoid,
1970  		.maxlen		= sizeof(sysctl_perf_event_paranoid),
1971  		.mode		= 0644,
1972  		.proc_handler	= proc_dointvec,
1973  	},
1974  	{
1975  		.procname	= "perf_event_mlock_kb",
1976  		.data		= &sysctl_perf_event_mlock,
1977  		.maxlen		= sizeof(sysctl_perf_event_mlock),
1978  		.mode		= 0644,
1979  		.proc_handler	= proc_dointvec,
1980  	},
1981  	{
1982  		.procname	= "perf_event_max_sample_rate",
1983  		.data		= &sysctl_perf_event_sample_rate,
1984  		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
1985  		.mode		= 0644,
1986  		.proc_handler	= perf_proc_update_handler,
1987  		.extra1		= SYSCTL_ONE,
1988  	},
1989  	{
1990  		.procname	= "perf_cpu_time_max_percent",
1991  		.data		= &sysctl_perf_cpu_time_max_percent,
1992  		.maxlen		= sizeof(sysctl_perf_cpu_time_max_percent),
1993  		.mode		= 0644,
1994  		.proc_handler	= perf_cpu_time_max_percent_handler,
1995  		.extra1		= SYSCTL_ZERO,
1996  		.extra2		= SYSCTL_ONE_HUNDRED,
1997  	},
1998  	{
1999  		.procname	= "perf_event_max_stack",
2000  		.data		= &sysctl_perf_event_max_stack,
2001  		.maxlen		= sizeof(sysctl_perf_event_max_stack),
2002  		.mode		= 0644,
2003  		.proc_handler	= perf_event_max_stack_handler,
2004  		.extra1		= SYSCTL_ZERO,
2005  		.extra2		= (void *)&six_hundred_forty_kb,
2006  	},
2007  	{
2008  		.procname	= "perf_event_max_contexts_per_stack",
2009  		.data		= &sysctl_perf_event_max_contexts_per_stack,
2010  		.maxlen		= sizeof(sysctl_perf_event_max_contexts_per_stack),
2011  		.mode		= 0644,
2012  		.proc_handler	= perf_event_max_stack_handler,
2013  		.extra1		= SYSCTL_ZERO,
2014  		.extra2		= SYSCTL_ONE_THOUSAND,
2015  	},
2016  #endif
2017  	{
2018  		.procname	= "panic_on_warn",
2019  		.data		= &panic_on_warn,
2020  		.maxlen		= sizeof(int),
2021  		.mode		= 0644,
2022  		.proc_handler	= proc_dointvec_minmax,
2023  		.extra1		= SYSCTL_ZERO,
2024  		.extra2		= SYSCTL_ONE,
2025  	},
2026  #ifdef CONFIG_TREE_RCU
2027  	{
2028  		.procname	= "panic_on_rcu_stall",
2029  		.data		= &sysctl_panic_on_rcu_stall,
2030  		.maxlen		= sizeof(sysctl_panic_on_rcu_stall),
2031  		.mode		= 0644,
2032  		.proc_handler	= proc_dointvec_minmax,
2033  		.extra1		= SYSCTL_ZERO,
2034  		.extra2		= SYSCTL_ONE,
2035  	},
2036  	{
2037  		.procname	= "max_rcu_stall_to_panic",
2038  		.data		= &sysctl_max_rcu_stall_to_panic,
2039  		.maxlen		= sizeof(sysctl_max_rcu_stall_to_panic),
2040  		.mode		= 0644,
2041  		.proc_handler	= proc_dointvec_minmax,
2042  		.extra1		= SYSCTL_ONE,
2043  		.extra2		= SYSCTL_INT_MAX,
2044  	},
2045  #endif
2046  	{ }
2047  };
2048  
2049  static struct ctl_table vm_table[] = {
2050  	{
2051  		.procname	= "overcommit_memory",
2052  		.data		= &sysctl_overcommit_memory,
2053  		.maxlen		= sizeof(sysctl_overcommit_memory),
2054  		.mode		= 0644,
2055  		.proc_handler	= overcommit_policy_handler,
2056  		.extra1		= SYSCTL_ZERO,
2057  		.extra2		= SYSCTL_TWO,
2058  	},
2059  	{
2060  		.procname	= "overcommit_ratio",
2061  		.data		= &sysctl_overcommit_ratio,
2062  		.maxlen		= sizeof(sysctl_overcommit_ratio),
2063  		.mode		= 0644,
2064  		.proc_handler	= overcommit_ratio_handler,
2065  	},
2066  	{
2067  		.procname	= "overcommit_kbytes",
2068  		.data		= &sysctl_overcommit_kbytes,
2069  		.maxlen		= sizeof(sysctl_overcommit_kbytes),
2070  		.mode		= 0644,
2071  		.proc_handler	= overcommit_kbytes_handler,
2072  	},
2073  	{
2074  		.procname	= "page-cluster",
2075  		.data		= &page_cluster,
2076  		.maxlen		= sizeof(int),
2077  		.mode		= 0644,
2078  		.proc_handler	= proc_dointvec_minmax,
2079  		.extra1		= SYSCTL_ZERO,
2080  		.extra2		= (void *)&page_cluster_max,
2081  	},
2082  	{
2083  		.procname	= "dirtytime_expire_seconds",
2084  		.data		= &dirtytime_expire_interval,
2085  		.maxlen		= sizeof(dirtytime_expire_interval),
2086  		.mode		= 0644,
2087  		.proc_handler	= dirtytime_interval_handler,
2088  		.extra1		= SYSCTL_ZERO,
2089  	},
2090  	{
2091  		.procname	= "swappiness",
2092  		.data		= &vm_swappiness,
2093  		.maxlen		= sizeof(vm_swappiness),
2094  		.mode		= 0644,
2095  		.proc_handler	= proc_dointvec_minmax,
2096  		.extra1		= SYSCTL_ZERO,
2097  		.extra2		= SYSCTL_TWO_HUNDRED,
2098  	},
2099  #ifdef CONFIG_NUMA
2100  	{
2101  		.procname	= "numa_stat",
2102  		.data		= &sysctl_vm_numa_stat,
2103  		.maxlen		= sizeof(int),
2104  		.mode		= 0644,
2105  		.proc_handler	= sysctl_vm_numa_stat_handler,
2106  		.extra1		= SYSCTL_ZERO,
2107  		.extra2		= SYSCTL_ONE,
2108  	},
2109  #endif
2110  	{
2111  		.procname	= "drop_caches",
2112  		.data		= &sysctl_drop_caches,
2113  		.maxlen		= sizeof(int),
2114  		.mode		= 0200,
2115  		.proc_handler	= drop_caches_sysctl_handler,
2116  		.extra1		= SYSCTL_ONE,
2117  		.extra2		= SYSCTL_FOUR,
2118  	},
2119  	{
2120  		.procname	= "page_lock_unfairness",
2121  		.data		= &sysctl_page_lock_unfairness,
2122  		.maxlen		= sizeof(sysctl_page_lock_unfairness),
2123  		.mode		= 0644,
2124  		.proc_handler	= proc_dointvec_minmax,
2125  		.extra1		= SYSCTL_ZERO,
2126  	},
2127  #ifdef CONFIG_MMU
2128  	{
2129  		.procname	= "max_map_count",
2130  		.data		= &sysctl_max_map_count,
2131  		.maxlen		= sizeof(sysctl_max_map_count),
2132  		.mode		= 0644,
2133  		.proc_handler	= proc_dointvec_minmax,
2134  		.extra1		= SYSCTL_ZERO,
2135  	},
2136  #else
2137  	{
2138  		.procname	= "nr_trim_pages",
2139  		.data		= &sysctl_nr_trim_pages,
2140  		.maxlen		= sizeof(sysctl_nr_trim_pages),
2141  		.mode		= 0644,
2142  		.proc_handler	= proc_dointvec_minmax,
2143  		.extra1		= SYSCTL_ZERO,
2144  	},
2145  #endif
2146  	{
2147  		.procname	= "vfs_cache_pressure",
2148  		.data		= &sysctl_vfs_cache_pressure,
2149  		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
2150  		.mode		= 0644,
2151  		.proc_handler	= proc_dointvec_minmax,
2152  		.extra1		= SYSCTL_ZERO,
2153  	},
2154  #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
2155      defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
2156  	{
2157  		.procname	= "legacy_va_layout",
2158  		.data		= &sysctl_legacy_va_layout,
2159  		.maxlen		= sizeof(sysctl_legacy_va_layout),
2160  		.mode		= 0644,
2161  		.proc_handler	= proc_dointvec_minmax,
2162  		.extra1		= SYSCTL_ZERO,
2163  	},
2164  #endif
2165  #ifdef CONFIG_NUMA
2166  	{
2167  		.procname	= "zone_reclaim_mode",
2168  		.data		= &node_reclaim_mode,
2169  		.maxlen		= sizeof(node_reclaim_mode),
2170  		.mode		= 0644,
2171  		.proc_handler	= proc_dointvec_minmax,
2172  		.extra1		= SYSCTL_ZERO,
2173  	},
2174  #endif
2175  #ifdef CONFIG_SMP
2176  	{
2177  		.procname	= "stat_interval",
2178  		.data		= &sysctl_stat_interval,
2179  		.maxlen		= sizeof(sysctl_stat_interval),
2180  		.mode		= 0644,
2181  		.proc_handler	= proc_dointvec_jiffies,
2182  	},
2183  	{
2184  		.procname	= "stat_refresh",
2185  		.data		= NULL,
2186  		.maxlen		= 0,
2187  		.mode		= 0600,
2188  		.proc_handler	= vmstat_refresh,
2189  	},
2190  #endif
2191  #ifdef CONFIG_MMU
2192  	{
2193  		.procname	= "mmap_min_addr",
2194  		.data		= &dac_mmap_min_addr,
2195  		.maxlen		= sizeof(unsigned long),
2196  		.mode		= 0644,
2197  		.proc_handler	= mmap_min_addr_handler,
2198  	},
2199  #endif
2200  #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
2201     (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
2202  	{
2203  		.procname	= "vdso_enabled",
2204  #ifdef CONFIG_X86_32
2205  		.data		= &vdso32_enabled,
2206  		.maxlen		= sizeof(vdso32_enabled),
2207  #else
2208  		.data		= &vdso_enabled,
2209  		.maxlen		= sizeof(vdso_enabled),
2210  #endif
2211  		.mode		= 0644,
2212  		.proc_handler	= proc_dointvec,
2213  		.extra1		= SYSCTL_ZERO,
2214  	},
2215  #endif
2216  	{
2217  		.procname	= "user_reserve_kbytes",
2218  		.data		= &sysctl_user_reserve_kbytes,
2219  		.maxlen		= sizeof(sysctl_user_reserve_kbytes),
2220  		.mode		= 0644,
2221  		.proc_handler	= proc_doulongvec_minmax,
2222  	},
2223  	{
2224  		.procname	= "admin_reserve_kbytes",
2225  		.data		= &sysctl_admin_reserve_kbytes,
2226  		.maxlen		= sizeof(sysctl_admin_reserve_kbytes),
2227  		.mode		= 0644,
2228  		.proc_handler	= proc_doulongvec_minmax,
2229  	},
2230  #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
2231  	{
2232  		.procname	= "mmap_rnd_bits",
2233  		.data		= &mmap_rnd_bits,
2234  		.maxlen		= sizeof(mmap_rnd_bits),
2235  		.mode		= 0600,
2236  		.proc_handler	= proc_dointvec_minmax,
2237  		.extra1		= (void *)&mmap_rnd_bits_min,
2238  		.extra2		= (void *)&mmap_rnd_bits_max,
2239  	},
2240  #endif
2241  #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
2242  	{
2243  		.procname	= "mmap_rnd_compat_bits",
2244  		.data		= &mmap_rnd_compat_bits,
2245  		.maxlen		= sizeof(mmap_rnd_compat_bits),
2246  		.mode		= 0600,
2247  		.proc_handler	= proc_dointvec_minmax,
2248  		.extra1		= (void *)&mmap_rnd_compat_bits_min,
2249  		.extra2		= (void *)&mmap_rnd_compat_bits_max,
2250  	},
2251  #endif
2252  	{ }
2253  };
2254  
sysctl_init_bases(void)2255  int __init sysctl_init_bases(void)
2256  {
2257  	register_sysctl_init("kernel", kern_table);
2258  	register_sysctl_init("vm", vm_table);
2259  
2260  	return 0;
2261  }
2262  #endif /* CONFIG_SYSCTL */
2263  /*
2264   * No sense putting this after each symbol definition, twice,
2265   * exception granted :-)
2266   */
2267  EXPORT_SYMBOL(proc_dobool);
2268  EXPORT_SYMBOL(proc_dointvec);
2269  EXPORT_SYMBOL(proc_douintvec);
2270  EXPORT_SYMBOL(proc_dointvec_jiffies);
2271  EXPORT_SYMBOL(proc_dointvec_minmax);
2272  EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
2273  EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2274  EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2275  EXPORT_SYMBOL(proc_dostring);
2276  EXPORT_SYMBOL(proc_doulongvec_minmax);
2277  EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2278  EXPORT_SYMBOL(proc_do_large_bitmap);
2279