1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * sysctl.c: General linux system control interface 4 * 5 * Begun 24 March 1995, Stephen Tweedie 6 * Added /proc support, Dec 1995 7 * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas. 8 * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver. 9 * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver. 10 * Dynamic registration fixes, Stephen Tweedie. 11 * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn. 12 * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris 13 * Horn. 14 * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer. 15 * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer. 16 * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill 17 * Wendling. 18 * The list_for_each() macro wasn't appropriate for the sysctl loop. 19 * Removed it and replaced it with older style, 03/23/00, Bill Wendling 20 */ 21 22 #include <linux/module.h> 23 #include <linux/aio.h> 24 #include <linux/mm.h> 25 #include <linux/swap.h> 26 #include <linux/slab.h> 27 #include <linux/sysctl.h> 28 #include <linux/bitmap.h> 29 #include <linux/signal.h> 30 #include <linux/panic.h> 31 #include <linux/printk.h> 32 #include <linux/proc_fs.h> 33 #include <linux/security.h> 34 #include <linux/ctype.h> 35 #include <linux/kmemleak.h> 36 #include <linux/filter.h> 37 #include <linux/fs.h> 38 #include <linux/init.h> 39 #include <linux/kernel.h> 40 #include <linux/kobject.h> 41 #include <linux/net.h> 42 #include <linux/sysrq.h> 43 #include <linux/highuid.h> 44 #include <linux/writeback.h> 45 #include <linux/ratelimit.h> 46 #include <linux/compaction.h> 47 #include <linux/hugetlb.h> 48 #include <linux/initrd.h> 49 #include <linux/key.h> 50 #include <linux/times.h> 51 #include <linux/limits.h> 52 #include <linux/dcache.h> 53 #include <linux/dnotify.h> 54 #include <linux/syscalls.h> 55 #include <linux/vmstat.h> 56 #include <linux/nfs_fs.h> 57 #include <linux/acpi.h> 58 #include <linux/reboot.h> 59 #include <linux/ftrace.h> 60 #include <linux/perf_event.h> 61 #include <linux/kprobes.h> 62 #include <linux/pipe_fs_i.h> 63 #include <linux/oom.h> 64 #include <linux/kmod.h> 65 #include <linux/capability.h> 66 #include <linux/binfmts.h> 67 #include <linux/sched/sysctl.h> 68 #include <linux/sched/coredump.h> 69 #include <linux/kexec.h> 70 #include <linux/bpf.h> 71 #include <linux/mount.h> 72 #include <linux/userfaultfd_k.h> 73 #include <linux/coredump.h> 74 #include <linux/latencytop.h> 75 #include <linux/pid.h> 76 #include <linux/delayacct.h> 77 78 #include "../lib/kstrtox.h" 79 80 #include <linux/uaccess.h> 81 #include <asm/processor.h> 82 83 #ifdef CONFIG_X86 84 #include <asm/nmi.h> 85 #include <asm/stacktrace.h> 86 #include <asm/io.h> 87 #endif 88 #ifdef CONFIG_SPARC 89 #include <asm/setup.h> 90 #endif 91 #ifdef CONFIG_BSD_PROCESS_ACCT 92 #include <linux/acct.h> 93 #endif 94 #ifdef CONFIG_RT_MUTEXES 95 #include <linux/rtmutex.h> 96 #endif 97 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT) 98 #include <linux/lockdep.h> 99 #endif 100 #ifdef CONFIG_CHR_DEV_SG 101 #include <scsi/sg.h> 102 #endif 103 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE 104 #include <linux/stackleak.h> 105 #endif 106 #ifdef CONFIG_LOCKUP_DETECTOR 107 #include <linux/nmi.h> 108 #endif 109 110 #if defined(CONFIG_SYSCTL) 111 112 /* Constants used for minimum and maximum */ 113 #ifdef CONFIG_LOCKUP_DETECTOR 114 static int sixty = 60; 115 #endif 116 117 static int __maybe_unused neg_one = -1; 118 static int __maybe_unused two = 2; 119 static int __maybe_unused four = 4; 120 static unsigned long zero_ul; 121 static unsigned long one_ul = 1; 122 static unsigned long long_max = LONG_MAX; 123 static int one_hundred = 100; 124 static int two_hundred = 200; 125 static int one_thousand = 1000; 126 #ifdef CONFIG_PRINTK 127 static int ten_thousand = 10000; 128 #endif 129 #ifdef CONFIG_PERF_EVENTS 130 static int six_hundred_forty_kb = 640 * 1024; 131 #endif 132 133 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */ 134 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE; 135 136 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ 137 static int maxolduid = 65535; 138 static int minolduid; 139 140 static int ngroups_max = NGROUPS_MAX; 141 static const int cap_last_cap = CAP_LAST_CAP; 142 143 /* 144 * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs 145 * and hung_task_check_interval_secs 146 */ 147 #ifdef CONFIG_DETECT_HUNG_TASK 148 static unsigned long hung_task_timeout_max = (LONG_MAX/HZ); 149 #endif 150 151 #ifdef CONFIG_INOTIFY_USER 152 #include <linux/inotify.h> 153 #endif 154 #ifdef CONFIG_FANOTIFY 155 #include <linux/fanotify.h> 156 #endif 157 158 #ifdef CONFIG_PROC_SYSCTL 159 160 /** 161 * enum sysctl_writes_mode - supported sysctl write modes 162 * 163 * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value 164 * to be written, and multiple writes on the same sysctl file descriptor 165 * will rewrite the sysctl value, regardless of file position. No warning 166 * is issued when the initial position is not 0. 167 * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is 168 * not 0. 169 * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at 170 * file position 0 and the value must be fully contained in the buffer 171 * sent to the write syscall. If dealing with strings respect the file 172 * position, but restrict this to the max length of the buffer, anything 173 * passed the max length will be ignored. Multiple writes will append 174 * to the buffer. 175 * 176 * These write modes control how current file position affects the behavior of 177 * updating sysctl values through the proc interface on each write. 178 */ 179 enum sysctl_writes_mode { 180 SYSCTL_WRITES_LEGACY = -1, 181 SYSCTL_WRITES_WARN = 0, 182 SYSCTL_WRITES_STRICT = 1, 183 }; 184 185 static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT; 186 #endif /* CONFIG_PROC_SYSCTL */ 187 188 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \ 189 defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT) 190 int sysctl_legacy_va_layout; 191 #endif 192 193 #ifdef CONFIG_COMPACTION 194 static int min_extfrag_threshold; 195 static int max_extfrag_threshold = 1000; 196 #endif 197 198 #endif /* CONFIG_SYSCTL */ 199 200 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_SYSCTL) 201 static int bpf_stats_handler(struct ctl_table *table, int write, 202 void *buffer, size_t *lenp, loff_t *ppos) 203 { 204 struct static_key *key = (struct static_key *)table->data; 205 static int saved_val; 206 int val, ret; 207 struct ctl_table tmp = { 208 .data = &val, 209 .maxlen = sizeof(val), 210 .mode = table->mode, 211 .extra1 = SYSCTL_ZERO, 212 .extra2 = SYSCTL_ONE, 213 }; 214 215 if (write && !capable(CAP_SYS_ADMIN)) 216 return -EPERM; 217 218 mutex_lock(&bpf_stats_enabled_mutex); 219 val = saved_val; 220 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 221 if (write && !ret && val != saved_val) { 222 if (val) 223 static_key_slow_inc(key); 224 else 225 static_key_slow_dec(key); 226 saved_val = val; 227 } 228 mutex_unlock(&bpf_stats_enabled_mutex); 229 return ret; 230 } 231 232 static int bpf_unpriv_handler(struct ctl_table *table, int write, 233 void *buffer, size_t *lenp, loff_t *ppos) 234 { 235 int ret, unpriv_enable = *(int *)table->data; 236 bool locked_state = unpriv_enable == 1; 237 struct ctl_table tmp = *table; 238 239 if (write && !capable(CAP_SYS_ADMIN)) 240 return -EPERM; 241 242 tmp.data = &unpriv_enable; 243 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 244 if (write && !ret) { 245 if (locked_state && unpriv_enable != 1) 246 return -EPERM; 247 *(int *)table->data = unpriv_enable; 248 } 249 return ret; 250 } 251 #endif /* CONFIG_BPF_SYSCALL && CONFIG_SYSCTL */ 252 253 /* 254 * /proc/sys support 255 */ 256 257 #ifdef CONFIG_PROC_SYSCTL 258 259 static int _proc_do_string(char *data, int maxlen, int write, 260 char *buffer, size_t *lenp, loff_t *ppos) 261 { 262 size_t len; 263 char c, *p; 264 265 if (!data || !maxlen || !*lenp) { 266 *lenp = 0; 267 return 0; 268 } 269 270 if (write) { 271 if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) { 272 /* Only continue writes not past the end of buffer. */ 273 len = strlen(data); 274 if (len > maxlen - 1) 275 len = maxlen - 1; 276 277 if (*ppos > len) 278 return 0; 279 len = *ppos; 280 } else { 281 /* Start writing from beginning of buffer. */ 282 len = 0; 283 } 284 285 *ppos += *lenp; 286 p = buffer; 287 while ((p - buffer) < *lenp && len < maxlen - 1) { 288 c = *(p++); 289 if (c == 0 || c == '\n') 290 break; 291 data[len++] = c; 292 } 293 data[len] = 0; 294 } else { 295 len = strlen(data); 296 if (len > maxlen) 297 len = maxlen; 298 299 if (*ppos > len) { 300 *lenp = 0; 301 return 0; 302 } 303 304 data += *ppos; 305 len -= *ppos; 306 307 if (len > *lenp) 308 len = *lenp; 309 if (len) 310 memcpy(buffer, data, len); 311 if (len < *lenp) { 312 buffer[len] = '\n'; 313 len++; 314 } 315 *lenp = len; 316 *ppos += len; 317 } 318 return 0; 319 } 320 321 static void warn_sysctl_write(struct ctl_table *table) 322 { 323 pr_warn_once("%s wrote to %s when file position was not 0!\n" 324 "This will not be supported in the future. To silence this\n" 325 "warning, set kernel.sysctl_writes_strict = -1\n", 326 current->comm, table->procname); 327 } 328 329 /** 330 * proc_first_pos_non_zero_ignore - check if first position is allowed 331 * @ppos: file position 332 * @table: the sysctl table 333 * 334 * Returns true if the first position is non-zero and the sysctl_writes_strict 335 * mode indicates this is not allowed for numeric input types. String proc 336 * handlers can ignore the return value. 337 */ 338 static bool proc_first_pos_non_zero_ignore(loff_t *ppos, 339 struct ctl_table *table) 340 { 341 if (!*ppos) 342 return false; 343 344 switch (sysctl_writes_strict) { 345 case SYSCTL_WRITES_STRICT: 346 return true; 347 case SYSCTL_WRITES_WARN: 348 warn_sysctl_write(table); 349 return false; 350 default: 351 return false; 352 } 353 } 354 355 /** 356 * proc_dostring - read a string sysctl 357 * @table: the sysctl table 358 * @write: %TRUE if this is a write to the sysctl file 359 * @buffer: the user buffer 360 * @lenp: the size of the user buffer 361 * @ppos: file position 362 * 363 * Reads/writes a string from/to the user buffer. If the kernel 364 * buffer provided is not large enough to hold the string, the 365 * string is truncated. The copied string is %NULL-terminated. 366 * If the string is being read by the user process, it is copied 367 * and a newline '\n' is added. It is truncated if the buffer is 368 * not large enough. 369 * 370 * Returns 0 on success. 371 */ 372 int proc_dostring(struct ctl_table *table, int write, 373 void *buffer, size_t *lenp, loff_t *ppos) 374 { 375 if (write) 376 proc_first_pos_non_zero_ignore(ppos, table); 377 378 return _proc_do_string(table->data, table->maxlen, write, buffer, lenp, 379 ppos); 380 } 381 382 static size_t proc_skip_spaces(char **buf) 383 { 384 size_t ret; 385 char *tmp = skip_spaces(*buf); 386 ret = tmp - *buf; 387 *buf = tmp; 388 return ret; 389 } 390 391 static void proc_skip_char(char **buf, size_t *size, const char v) 392 { 393 while (*size) { 394 if (**buf != v) 395 break; 396 (*size)--; 397 (*buf)++; 398 } 399 } 400 401 /** 402 * strtoul_lenient - parse an ASCII formatted integer from a buffer and only 403 * fail on overflow 404 * 405 * @cp: kernel buffer containing the string to parse 406 * @endp: pointer to store the trailing characters 407 * @base: the base to use 408 * @res: where the parsed integer will be stored 409 * 410 * In case of success 0 is returned and @res will contain the parsed integer, 411 * @endp will hold any trailing characters. 412 * This function will fail the parse on overflow. If there wasn't an overflow 413 * the function will defer the decision what characters count as invalid to the 414 * caller. 415 */ 416 static int strtoul_lenient(const char *cp, char **endp, unsigned int base, 417 unsigned long *res) 418 { 419 unsigned long long result; 420 unsigned int rv; 421 422 cp = _parse_integer_fixup_radix(cp, &base); 423 rv = _parse_integer(cp, base, &result); 424 if ((rv & KSTRTOX_OVERFLOW) || (result != (unsigned long)result)) 425 return -ERANGE; 426 427 cp += rv; 428 429 if (endp) 430 *endp = (char *)cp; 431 432 *res = (unsigned long)result; 433 return 0; 434 } 435 436 #define TMPBUFLEN 22 437 /** 438 * proc_get_long - reads an ASCII formatted integer from a user buffer 439 * 440 * @buf: a kernel buffer 441 * @size: size of the kernel buffer 442 * @val: this is where the number will be stored 443 * @neg: set to %TRUE if number is negative 444 * @perm_tr: a vector which contains the allowed trailers 445 * @perm_tr_len: size of the perm_tr vector 446 * @tr: pointer to store the trailer character 447 * 448 * In case of success %0 is returned and @buf and @size are updated with 449 * the amount of bytes read. If @tr is non-NULL and a trailing 450 * character exists (size is non-zero after returning from this 451 * function), @tr is updated with the trailing character. 452 */ 453 static int proc_get_long(char **buf, size_t *size, 454 unsigned long *val, bool *neg, 455 const char *perm_tr, unsigned perm_tr_len, char *tr) 456 { 457 int len; 458 char *p, tmp[TMPBUFLEN]; 459 460 if (!*size) 461 return -EINVAL; 462 463 len = *size; 464 if (len > TMPBUFLEN - 1) 465 len = TMPBUFLEN - 1; 466 467 memcpy(tmp, *buf, len); 468 469 tmp[len] = 0; 470 p = tmp; 471 if (*p == '-' && *size > 1) { 472 *neg = true; 473 p++; 474 } else 475 *neg = false; 476 if (!isdigit(*p)) 477 return -EINVAL; 478 479 if (strtoul_lenient(p, &p, 0, val)) 480 return -EINVAL; 481 482 len = p - tmp; 483 484 /* We don't know if the next char is whitespace thus we may accept 485 * invalid integers (e.g. 1234...a) or two integers instead of one 486 * (e.g. 123...1). So lets not allow such large numbers. */ 487 if (len == TMPBUFLEN - 1) 488 return -EINVAL; 489 490 if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len)) 491 return -EINVAL; 492 493 if (tr && (len < *size)) 494 *tr = *p; 495 496 *buf += len; 497 *size -= len; 498 499 return 0; 500 } 501 502 /** 503 * proc_put_long - converts an integer to a decimal ASCII formatted string 504 * 505 * @buf: the user buffer 506 * @size: the size of the user buffer 507 * @val: the integer to be converted 508 * @neg: sign of the number, %TRUE for negative 509 * 510 * In case of success @buf and @size are updated with the amount of bytes 511 * written. 512 */ 513 static void proc_put_long(void **buf, size_t *size, unsigned long val, bool neg) 514 { 515 int len; 516 char tmp[TMPBUFLEN], *p = tmp; 517 518 sprintf(p, "%s%lu", neg ? "-" : "", val); 519 len = strlen(tmp); 520 if (len > *size) 521 len = *size; 522 memcpy(*buf, tmp, len); 523 *size -= len; 524 *buf += len; 525 } 526 #undef TMPBUFLEN 527 528 static void proc_put_char(void **buf, size_t *size, char c) 529 { 530 if (*size) { 531 char **buffer = (char **)buf; 532 **buffer = c; 533 534 (*size)--; 535 (*buffer)++; 536 *buf = *buffer; 537 } 538 } 539 540 static int do_proc_dobool_conv(bool *negp, unsigned long *lvalp, 541 int *valp, 542 int write, void *data) 543 { 544 if (write) { 545 *(bool *)valp = *lvalp; 546 } else { 547 int val = *(bool *)valp; 548 549 *lvalp = (unsigned long)val; 550 *negp = false; 551 } 552 return 0; 553 } 554 555 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp, 556 int *valp, 557 int write, void *data) 558 { 559 if (write) { 560 if (*negp) { 561 if (*lvalp > (unsigned long) INT_MAX + 1) 562 return -EINVAL; 563 *valp = -*lvalp; 564 } else { 565 if (*lvalp > (unsigned long) INT_MAX) 566 return -EINVAL; 567 *valp = *lvalp; 568 } 569 } else { 570 int val = *valp; 571 if (val < 0) { 572 *negp = true; 573 *lvalp = -(unsigned long)val; 574 } else { 575 *negp = false; 576 *lvalp = (unsigned long)val; 577 } 578 } 579 return 0; 580 } 581 582 static int do_proc_douintvec_conv(unsigned long *lvalp, 583 unsigned int *valp, 584 int write, void *data) 585 { 586 if (write) { 587 if (*lvalp > UINT_MAX) 588 return -EINVAL; 589 *valp = *lvalp; 590 } else { 591 unsigned int val = *valp; 592 *lvalp = (unsigned long)val; 593 } 594 return 0; 595 } 596 597 static const char proc_wspace_sep[] = { ' ', '\t', '\n' }; 598 599 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, 600 int write, void *buffer, 601 size_t *lenp, loff_t *ppos, 602 int (*conv)(bool *negp, unsigned long *lvalp, int *valp, 603 int write, void *data), 604 void *data) 605 { 606 int *i, vleft, first = 1, err = 0; 607 size_t left; 608 char *p; 609 610 if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) { 611 *lenp = 0; 612 return 0; 613 } 614 615 i = (int *) tbl_data; 616 vleft = table->maxlen / sizeof(*i); 617 left = *lenp; 618 619 if (!conv) 620 conv = do_proc_dointvec_conv; 621 622 if (write) { 623 if (proc_first_pos_non_zero_ignore(ppos, table)) 624 goto out; 625 626 if (left > PAGE_SIZE - 1) 627 left = PAGE_SIZE - 1; 628 p = buffer; 629 } 630 631 for (; left && vleft--; i++, first=0) { 632 unsigned long lval; 633 bool neg; 634 635 if (write) { 636 left -= proc_skip_spaces(&p); 637 638 if (!left) 639 break; 640 err = proc_get_long(&p, &left, &lval, &neg, 641 proc_wspace_sep, 642 sizeof(proc_wspace_sep), NULL); 643 if (err) 644 break; 645 if (conv(&neg, &lval, i, 1, data)) { 646 err = -EINVAL; 647 break; 648 } 649 } else { 650 if (conv(&neg, &lval, i, 0, data)) { 651 err = -EINVAL; 652 break; 653 } 654 if (!first) 655 proc_put_char(&buffer, &left, '\t'); 656 proc_put_long(&buffer, &left, lval, neg); 657 } 658 } 659 660 if (!write && !first && left && !err) 661 proc_put_char(&buffer, &left, '\n'); 662 if (write && !err && left) 663 left -= proc_skip_spaces(&p); 664 if (write && first) 665 return err ? : -EINVAL; 666 *lenp -= left; 667 out: 668 *ppos += *lenp; 669 return err; 670 } 671 672 static int do_proc_dointvec(struct ctl_table *table, int write, 673 void *buffer, size_t *lenp, loff_t *ppos, 674 int (*conv)(bool *negp, unsigned long *lvalp, int *valp, 675 int write, void *data), 676 void *data) 677 { 678 return __do_proc_dointvec(table->data, table, write, 679 buffer, lenp, ppos, conv, data); 680 } 681 682 static int do_proc_douintvec_w(unsigned int *tbl_data, 683 struct ctl_table *table, 684 void *buffer, 685 size_t *lenp, loff_t *ppos, 686 int (*conv)(unsigned long *lvalp, 687 unsigned int *valp, 688 int write, void *data), 689 void *data) 690 { 691 unsigned long lval; 692 int err = 0; 693 size_t left; 694 bool neg; 695 char *p = buffer; 696 697 left = *lenp; 698 699 if (proc_first_pos_non_zero_ignore(ppos, table)) 700 goto bail_early; 701 702 if (left > PAGE_SIZE - 1) 703 left = PAGE_SIZE - 1; 704 705 left -= proc_skip_spaces(&p); 706 if (!left) { 707 err = -EINVAL; 708 goto out_free; 709 } 710 711 err = proc_get_long(&p, &left, &lval, &neg, 712 proc_wspace_sep, 713 sizeof(proc_wspace_sep), NULL); 714 if (err || neg) { 715 err = -EINVAL; 716 goto out_free; 717 } 718 719 if (conv(&lval, tbl_data, 1, data)) { 720 err = -EINVAL; 721 goto out_free; 722 } 723 724 if (!err && left) 725 left -= proc_skip_spaces(&p); 726 727 out_free: 728 if (err) 729 return -EINVAL; 730 731 return 0; 732 733 /* This is in keeping with old __do_proc_dointvec() */ 734 bail_early: 735 *ppos += *lenp; 736 return err; 737 } 738 739 static int do_proc_douintvec_r(unsigned int *tbl_data, void *buffer, 740 size_t *lenp, loff_t *ppos, 741 int (*conv)(unsigned long *lvalp, 742 unsigned int *valp, 743 int write, void *data), 744 void *data) 745 { 746 unsigned long lval; 747 int err = 0; 748 size_t left; 749 750 left = *lenp; 751 752 if (conv(&lval, tbl_data, 0, data)) { 753 err = -EINVAL; 754 goto out; 755 } 756 757 proc_put_long(&buffer, &left, lval, false); 758 if (!left) 759 goto out; 760 761 proc_put_char(&buffer, &left, '\n'); 762 763 out: 764 *lenp -= left; 765 *ppos += *lenp; 766 767 return err; 768 } 769 770 static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table, 771 int write, void *buffer, 772 size_t *lenp, loff_t *ppos, 773 int (*conv)(unsigned long *lvalp, 774 unsigned int *valp, 775 int write, void *data), 776 void *data) 777 { 778 unsigned int *i, vleft; 779 780 if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) { 781 *lenp = 0; 782 return 0; 783 } 784 785 i = (unsigned int *) tbl_data; 786 vleft = table->maxlen / sizeof(*i); 787 788 /* 789 * Arrays are not supported, keep this simple. *Do not* add 790 * support for them. 791 */ 792 if (vleft != 1) { 793 *lenp = 0; 794 return -EINVAL; 795 } 796 797 if (!conv) 798 conv = do_proc_douintvec_conv; 799 800 if (write) 801 return do_proc_douintvec_w(i, table, buffer, lenp, ppos, 802 conv, data); 803 return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data); 804 } 805 806 static int do_proc_douintvec(struct ctl_table *table, int write, 807 void *buffer, size_t *lenp, loff_t *ppos, 808 int (*conv)(unsigned long *lvalp, 809 unsigned int *valp, 810 int write, void *data), 811 void *data) 812 { 813 return __do_proc_douintvec(table->data, table, write, 814 buffer, lenp, ppos, conv, data); 815 } 816 817 /** 818 * proc_dobool - read/write a bool 819 * @table: the sysctl table 820 * @write: %TRUE if this is a write to the sysctl file 821 * @buffer: the user buffer 822 * @lenp: the size of the user buffer 823 * @ppos: file position 824 * 825 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer 826 * values from/to the user buffer, treated as an ASCII string. 827 * 828 * Returns 0 on success. 829 */ 830 int proc_dobool(struct ctl_table *table, int write, void *buffer, 831 size_t *lenp, loff_t *ppos) 832 { 833 return do_proc_dointvec(table, write, buffer, lenp, ppos, 834 do_proc_dobool_conv, NULL); 835 } 836 837 /** 838 * proc_dointvec - read a vector of integers 839 * @table: the sysctl table 840 * @write: %TRUE if this is a write to the sysctl file 841 * @buffer: the user buffer 842 * @lenp: the size of the user buffer 843 * @ppos: file position 844 * 845 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer 846 * values from/to the user buffer, treated as an ASCII string. 847 * 848 * Returns 0 on success. 849 */ 850 int proc_dointvec(struct ctl_table *table, int write, void *buffer, 851 size_t *lenp, loff_t *ppos) 852 { 853 return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL); 854 } 855 856 #ifdef CONFIG_COMPACTION 857 static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table, 858 int write, void *buffer, size_t *lenp, loff_t *ppos) 859 { 860 int ret, old; 861 862 if (!IS_ENABLED(CONFIG_PREEMPT_RT) || !write) 863 return proc_dointvec_minmax(table, write, buffer, lenp, ppos); 864 865 old = *(int *)table->data; 866 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 867 if (ret) 868 return ret; 869 if (old != *(int *)table->data) 870 pr_warn_once("sysctl attribute %s changed by %s[%d]\n", 871 table->procname, current->comm, 872 task_pid_nr(current)); 873 return ret; 874 } 875 #endif 876 877 /** 878 * proc_douintvec - read a vector of unsigned integers 879 * @table: the sysctl table 880 * @write: %TRUE if this is a write to the sysctl file 881 * @buffer: the user buffer 882 * @lenp: the size of the user buffer 883 * @ppos: file position 884 * 885 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer 886 * values from/to the user buffer, treated as an ASCII string. 887 * 888 * Returns 0 on success. 889 */ 890 int proc_douintvec(struct ctl_table *table, int write, void *buffer, 891 size_t *lenp, loff_t *ppos) 892 { 893 return do_proc_douintvec(table, write, buffer, lenp, ppos, 894 do_proc_douintvec_conv, NULL); 895 } 896 897 /* 898 * Taint values can only be increased 899 * This means we can safely use a temporary. 900 */ 901 static int proc_taint(struct ctl_table *table, int write, 902 void *buffer, size_t *lenp, loff_t *ppos) 903 { 904 struct ctl_table t; 905 unsigned long tmptaint = get_taint(); 906 int err; 907 908 if (write && !capable(CAP_SYS_ADMIN)) 909 return -EPERM; 910 911 t = *table; 912 t.data = &tmptaint; 913 err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos); 914 if (err < 0) 915 return err; 916 917 if (write) { 918 int i; 919 920 /* 921 * If we are relying on panic_on_taint not producing 922 * false positives due to userspace input, bail out 923 * before setting the requested taint flags. 924 */ 925 if (panic_on_taint_nousertaint && (tmptaint & panic_on_taint)) 926 return -EINVAL; 927 928 /* 929 * Poor man's atomic or. Not worth adding a primitive 930 * to everyone's atomic.h for this 931 */ 932 for (i = 0; i < TAINT_FLAGS_COUNT; i++) 933 if ((1UL << i) & tmptaint) 934 add_taint(i, LOCKDEP_STILL_OK); 935 } 936 937 return err; 938 } 939 940 #ifdef CONFIG_PRINTK 941 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, 942 void *buffer, size_t *lenp, loff_t *ppos) 943 { 944 if (write && !capable(CAP_SYS_ADMIN)) 945 return -EPERM; 946 947 return proc_dointvec_minmax(table, write, buffer, lenp, ppos); 948 } 949 #endif 950 951 /** 952 * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure 953 * @min: pointer to minimum allowable value 954 * @max: pointer to maximum allowable value 955 * 956 * The do_proc_dointvec_minmax_conv_param structure provides the 957 * minimum and maximum values for doing range checking for those sysctl 958 * parameters that use the proc_dointvec_minmax() handler. 959 */ 960 struct do_proc_dointvec_minmax_conv_param { 961 int *min; 962 int *max; 963 }; 964 965 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp, 966 int *valp, 967 int write, void *data) 968 { 969 int tmp, ret; 970 struct do_proc_dointvec_minmax_conv_param *param = data; 971 /* 972 * If writing, first do so via a temporary local int so we can 973 * bounds-check it before touching *valp. 974 */ 975 int *ip = write ? &tmp : valp; 976 977 ret = do_proc_dointvec_conv(negp, lvalp, ip, write, data); 978 if (ret) 979 return ret; 980 981 if (write) { 982 if ((param->min && *param->min > tmp) || 983 (param->max && *param->max < tmp)) 984 return -EINVAL; 985 *valp = tmp; 986 } 987 988 return 0; 989 } 990 991 /** 992 * proc_dointvec_minmax - read a vector of integers with min/max values 993 * @table: the sysctl table 994 * @write: %TRUE if this is a write to the sysctl file 995 * @buffer: the user buffer 996 * @lenp: the size of the user buffer 997 * @ppos: file position 998 * 999 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer 1000 * values from/to the user buffer, treated as an ASCII string. 1001 * 1002 * This routine will ensure the values are within the range specified by 1003 * table->extra1 (min) and table->extra2 (max). 1004 * 1005 * Returns 0 on success or -EINVAL on write when the range check fails. 1006 */ 1007 int proc_dointvec_minmax(struct ctl_table *table, int write, 1008 void *buffer, size_t *lenp, loff_t *ppos) 1009 { 1010 struct do_proc_dointvec_minmax_conv_param param = { 1011 .min = (int *) table->extra1, 1012 .max = (int *) table->extra2, 1013 }; 1014 return do_proc_dointvec(table, write, buffer, lenp, ppos, 1015 do_proc_dointvec_minmax_conv, ¶m); 1016 } 1017 1018 /** 1019 * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure 1020 * @min: pointer to minimum allowable value 1021 * @max: pointer to maximum allowable value 1022 * 1023 * The do_proc_douintvec_minmax_conv_param structure provides the 1024 * minimum and maximum values for doing range checking for those sysctl 1025 * parameters that use the proc_douintvec_minmax() handler. 1026 */ 1027 struct do_proc_douintvec_minmax_conv_param { 1028 unsigned int *min; 1029 unsigned int *max; 1030 }; 1031 1032 static int do_proc_douintvec_minmax_conv(unsigned long *lvalp, 1033 unsigned int *valp, 1034 int write, void *data) 1035 { 1036 int ret; 1037 unsigned int tmp; 1038 struct do_proc_douintvec_minmax_conv_param *param = data; 1039 /* write via temporary local uint for bounds-checking */ 1040 unsigned int *up = write ? &tmp : valp; 1041 1042 ret = do_proc_douintvec_conv(lvalp, up, write, data); 1043 if (ret) 1044 return ret; 1045 1046 if (write) { 1047 if ((param->min && *param->min > tmp) || 1048 (param->max && *param->max < tmp)) 1049 return -ERANGE; 1050 1051 *valp = tmp; 1052 } 1053 1054 return 0; 1055 } 1056 1057 /** 1058 * proc_douintvec_minmax - read a vector of unsigned ints with min/max values 1059 * @table: the sysctl table 1060 * @write: %TRUE if this is a write to the sysctl file 1061 * @buffer: the user buffer 1062 * @lenp: the size of the user buffer 1063 * @ppos: file position 1064 * 1065 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer 1066 * values from/to the user buffer, treated as an ASCII string. Negative 1067 * strings are not allowed. 1068 * 1069 * This routine will ensure the values are within the range specified by 1070 * table->extra1 (min) and table->extra2 (max). There is a final sanity 1071 * check for UINT_MAX to avoid having to support wrap around uses from 1072 * userspace. 1073 * 1074 * Returns 0 on success or -ERANGE on write when the range check fails. 1075 */ 1076 int proc_douintvec_minmax(struct ctl_table *table, int write, 1077 void *buffer, size_t *lenp, loff_t *ppos) 1078 { 1079 struct do_proc_douintvec_minmax_conv_param param = { 1080 .min = (unsigned int *) table->extra1, 1081 .max = (unsigned int *) table->extra2, 1082 }; 1083 return do_proc_douintvec(table, write, buffer, lenp, ppos, 1084 do_proc_douintvec_minmax_conv, ¶m); 1085 } 1086 1087 /** 1088 * proc_dou8vec_minmax - read a vector of unsigned chars with min/max values 1089 * @table: the sysctl table 1090 * @write: %TRUE if this is a write to the sysctl file 1091 * @buffer: the user buffer 1092 * @lenp: the size of the user buffer 1093 * @ppos: file position 1094 * 1095 * Reads/writes up to table->maxlen/sizeof(u8) unsigned chars 1096 * values from/to the user buffer, treated as an ASCII string. Negative 1097 * strings are not allowed. 1098 * 1099 * This routine will ensure the values are within the range specified by 1100 * table->extra1 (min) and table->extra2 (max). 1101 * 1102 * Returns 0 on success or an error on write when the range check fails. 1103 */ 1104 int proc_dou8vec_minmax(struct ctl_table *table, int write, 1105 void *buffer, size_t *lenp, loff_t *ppos) 1106 { 1107 struct ctl_table tmp; 1108 unsigned int min = 0, max = 255U, val; 1109 u8 *data = table->data; 1110 struct do_proc_douintvec_minmax_conv_param param = { 1111 .min = &min, 1112 .max = &max, 1113 }; 1114 int res; 1115 1116 /* Do not support arrays yet. */ 1117 if (table->maxlen != sizeof(u8)) 1118 return -EINVAL; 1119 1120 if (table->extra1) { 1121 min = *(unsigned int *) table->extra1; 1122 if (min > 255U) 1123 return -EINVAL; 1124 } 1125 if (table->extra2) { 1126 max = *(unsigned int *) table->extra2; 1127 if (max > 255U) 1128 return -EINVAL; 1129 } 1130 1131 tmp = *table; 1132 1133 tmp.maxlen = sizeof(val); 1134 tmp.data = &val; 1135 val = *data; 1136 res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos, 1137 do_proc_douintvec_minmax_conv, ¶m); 1138 if (res) 1139 return res; 1140 if (write) 1141 *data = val; 1142 return 0; 1143 } 1144 EXPORT_SYMBOL_GPL(proc_dou8vec_minmax); 1145 1146 static int do_proc_dopipe_max_size_conv(unsigned long *lvalp, 1147 unsigned int *valp, 1148 int write, void *data) 1149 { 1150 if (write) { 1151 unsigned int val; 1152 1153 val = round_pipe_size(*lvalp); 1154 if (val == 0) 1155 return -EINVAL; 1156 1157 *valp = val; 1158 } else { 1159 unsigned int val = *valp; 1160 *lvalp = (unsigned long) val; 1161 } 1162 1163 return 0; 1164 } 1165 1166 static int proc_dopipe_max_size(struct ctl_table *table, int write, 1167 void *buffer, size_t *lenp, loff_t *ppos) 1168 { 1169 return do_proc_douintvec(table, write, buffer, lenp, ppos, 1170 do_proc_dopipe_max_size_conv, NULL); 1171 } 1172 1173 static void validate_coredump_safety(void) 1174 { 1175 #ifdef CONFIG_COREDUMP 1176 if (suid_dumpable == SUID_DUMP_ROOT && 1177 core_pattern[0] != '/' && core_pattern[0] != '|') { 1178 printk(KERN_WARNING 1179 "Unsafe core_pattern used with fs.suid_dumpable=2.\n" 1180 "Pipe handler or fully qualified core dump path required.\n" 1181 "Set kernel.core_pattern before fs.suid_dumpable.\n" 1182 ); 1183 } 1184 #endif 1185 } 1186 1187 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, 1188 void *buffer, size_t *lenp, loff_t *ppos) 1189 { 1190 int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 1191 if (!error) 1192 validate_coredump_safety(); 1193 return error; 1194 } 1195 1196 #ifdef CONFIG_COREDUMP 1197 static int proc_dostring_coredump(struct ctl_table *table, int write, 1198 void *buffer, size_t *lenp, loff_t *ppos) 1199 { 1200 int error = proc_dostring(table, write, buffer, lenp, ppos); 1201 if (!error) 1202 validate_coredump_safety(); 1203 return error; 1204 } 1205 #endif 1206 1207 #ifdef CONFIG_MAGIC_SYSRQ 1208 static int sysrq_sysctl_handler(struct ctl_table *table, int write, 1209 void *buffer, size_t *lenp, loff_t *ppos) 1210 { 1211 int tmp, ret; 1212 1213 tmp = sysrq_mask(); 1214 1215 ret = __do_proc_dointvec(&tmp, table, write, buffer, 1216 lenp, ppos, NULL, NULL); 1217 if (ret || !write) 1218 return ret; 1219 1220 if (write) 1221 sysrq_toggle_support(tmp); 1222 1223 return 0; 1224 } 1225 #endif 1226 1227 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, 1228 int write, void *buffer, size_t *lenp, loff_t *ppos, 1229 unsigned long convmul, unsigned long convdiv) 1230 { 1231 unsigned long *i, *min, *max; 1232 int vleft, first = 1, err = 0; 1233 size_t left; 1234 char *p; 1235 1236 if (!data || !table->maxlen || !*lenp || (*ppos && !write)) { 1237 *lenp = 0; 1238 return 0; 1239 } 1240 1241 i = (unsigned long *) data; 1242 min = (unsigned long *) table->extra1; 1243 max = (unsigned long *) table->extra2; 1244 vleft = table->maxlen / sizeof(unsigned long); 1245 left = *lenp; 1246 1247 if (write) { 1248 if (proc_first_pos_non_zero_ignore(ppos, table)) 1249 goto out; 1250 1251 if (left > PAGE_SIZE - 1) 1252 left = PAGE_SIZE - 1; 1253 p = buffer; 1254 } 1255 1256 for (; left && vleft--; i++, first = 0) { 1257 unsigned long val; 1258 1259 if (write) { 1260 bool neg; 1261 1262 left -= proc_skip_spaces(&p); 1263 if (!left) 1264 break; 1265 1266 err = proc_get_long(&p, &left, &val, &neg, 1267 proc_wspace_sep, 1268 sizeof(proc_wspace_sep), NULL); 1269 if (err) 1270 break; 1271 if (neg) 1272 continue; 1273 val = convmul * val / convdiv; 1274 if ((min && val < *min) || (max && val > *max)) { 1275 err = -EINVAL; 1276 break; 1277 } 1278 *i = val; 1279 } else { 1280 val = convdiv * (*i) / convmul; 1281 if (!first) 1282 proc_put_char(&buffer, &left, '\t'); 1283 proc_put_long(&buffer, &left, val, false); 1284 } 1285 } 1286 1287 if (!write && !first && left && !err) 1288 proc_put_char(&buffer, &left, '\n'); 1289 if (write && !err) 1290 left -= proc_skip_spaces(&p); 1291 if (write && first) 1292 return err ? : -EINVAL; 1293 *lenp -= left; 1294 out: 1295 *ppos += *lenp; 1296 return err; 1297 } 1298 1299 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write, 1300 void *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul, 1301 unsigned long convdiv) 1302 { 1303 return __do_proc_doulongvec_minmax(table->data, table, write, 1304 buffer, lenp, ppos, convmul, convdiv); 1305 } 1306 1307 /** 1308 * proc_doulongvec_minmax - read a vector of long integers with min/max values 1309 * @table: the sysctl table 1310 * @write: %TRUE if this is a write to the sysctl file 1311 * @buffer: the user buffer 1312 * @lenp: the size of the user buffer 1313 * @ppos: file position 1314 * 1315 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long 1316 * values from/to the user buffer, treated as an ASCII string. 1317 * 1318 * This routine will ensure the values are within the range specified by 1319 * table->extra1 (min) and table->extra2 (max). 1320 * 1321 * Returns 0 on success. 1322 */ 1323 int proc_doulongvec_minmax(struct ctl_table *table, int write, 1324 void *buffer, size_t *lenp, loff_t *ppos) 1325 { 1326 return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l); 1327 } 1328 1329 /** 1330 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values 1331 * @table: the sysctl table 1332 * @write: %TRUE if this is a write to the sysctl file 1333 * @buffer: the user buffer 1334 * @lenp: the size of the user buffer 1335 * @ppos: file position 1336 * 1337 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long 1338 * values from/to the user buffer, treated as an ASCII string. The values 1339 * are treated as milliseconds, and converted to jiffies when they are stored. 1340 * 1341 * This routine will ensure the values are within the range specified by 1342 * table->extra1 (min) and table->extra2 (max). 1343 * 1344 * Returns 0 on success. 1345 */ 1346 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, 1347 void *buffer, size_t *lenp, loff_t *ppos) 1348 { 1349 return do_proc_doulongvec_minmax(table, write, buffer, 1350 lenp, ppos, HZ, 1000l); 1351 } 1352 1353 1354 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp, 1355 int *valp, 1356 int write, void *data) 1357 { 1358 if (write) { 1359 if (*lvalp > INT_MAX / HZ) 1360 return 1; 1361 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ); 1362 } else { 1363 int val = *valp; 1364 unsigned long lval; 1365 if (val < 0) { 1366 *negp = true; 1367 lval = -(unsigned long)val; 1368 } else { 1369 *negp = false; 1370 lval = (unsigned long)val; 1371 } 1372 *lvalp = lval / HZ; 1373 } 1374 return 0; 1375 } 1376 1377 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp, 1378 int *valp, 1379 int write, void *data) 1380 { 1381 if (write) { 1382 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ) 1383 return 1; 1384 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp); 1385 } else { 1386 int val = *valp; 1387 unsigned long lval; 1388 if (val < 0) { 1389 *negp = true; 1390 lval = -(unsigned long)val; 1391 } else { 1392 *negp = false; 1393 lval = (unsigned long)val; 1394 } 1395 *lvalp = jiffies_to_clock_t(lval); 1396 } 1397 return 0; 1398 } 1399 1400 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp, 1401 int *valp, 1402 int write, void *data) 1403 { 1404 if (write) { 1405 unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp); 1406 1407 if (jif > INT_MAX) 1408 return 1; 1409 *valp = (int)jif; 1410 } else { 1411 int val = *valp; 1412 unsigned long lval; 1413 if (val < 0) { 1414 *negp = true; 1415 lval = -(unsigned long)val; 1416 } else { 1417 *negp = false; 1418 lval = (unsigned long)val; 1419 } 1420 *lvalp = jiffies_to_msecs(lval); 1421 } 1422 return 0; 1423 } 1424 1425 /** 1426 * proc_dointvec_jiffies - read a vector of integers as seconds 1427 * @table: the sysctl table 1428 * @write: %TRUE if this is a write to the sysctl file 1429 * @buffer: the user buffer 1430 * @lenp: the size of the user buffer 1431 * @ppos: file position 1432 * 1433 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer 1434 * values from/to the user buffer, treated as an ASCII string. 1435 * The values read are assumed to be in seconds, and are converted into 1436 * jiffies. 1437 * 1438 * Returns 0 on success. 1439 */ 1440 int proc_dointvec_jiffies(struct ctl_table *table, int write, 1441 void *buffer, size_t *lenp, loff_t *ppos) 1442 { 1443 return do_proc_dointvec(table,write,buffer,lenp,ppos, 1444 do_proc_dointvec_jiffies_conv,NULL); 1445 } 1446 1447 /** 1448 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds 1449 * @table: the sysctl table 1450 * @write: %TRUE if this is a write to the sysctl file 1451 * @buffer: the user buffer 1452 * @lenp: the size of the user buffer 1453 * @ppos: pointer to the file position 1454 * 1455 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer 1456 * values from/to the user buffer, treated as an ASCII string. 1457 * The values read are assumed to be in 1/USER_HZ seconds, and 1458 * are converted into jiffies. 1459 * 1460 * Returns 0 on success. 1461 */ 1462 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, 1463 void *buffer, size_t *lenp, loff_t *ppos) 1464 { 1465 return do_proc_dointvec(table,write,buffer,lenp,ppos, 1466 do_proc_dointvec_userhz_jiffies_conv,NULL); 1467 } 1468 1469 /** 1470 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds 1471 * @table: the sysctl table 1472 * @write: %TRUE if this is a write to the sysctl file 1473 * @buffer: the user buffer 1474 * @lenp: the size of the user buffer 1475 * @ppos: file position 1476 * @ppos: the current position in the file 1477 * 1478 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer 1479 * values from/to the user buffer, treated as an ASCII string. 1480 * The values read are assumed to be in 1/1000 seconds, and 1481 * are converted into jiffies. 1482 * 1483 * Returns 0 on success. 1484 */ 1485 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, void *buffer, 1486 size_t *lenp, loff_t *ppos) 1487 { 1488 return do_proc_dointvec(table, write, buffer, lenp, ppos, 1489 do_proc_dointvec_ms_jiffies_conv, NULL); 1490 } 1491 1492 static int proc_do_cad_pid(struct ctl_table *table, int write, void *buffer, 1493 size_t *lenp, loff_t *ppos) 1494 { 1495 struct pid *new_pid; 1496 pid_t tmp; 1497 int r; 1498 1499 tmp = pid_vnr(cad_pid); 1500 1501 r = __do_proc_dointvec(&tmp, table, write, buffer, 1502 lenp, ppos, NULL, NULL); 1503 if (r || !write) 1504 return r; 1505 1506 new_pid = find_get_pid(tmp); 1507 if (!new_pid) 1508 return -ESRCH; 1509 1510 put_pid(xchg(&cad_pid, new_pid)); 1511 return 0; 1512 } 1513 1514 /** 1515 * proc_do_large_bitmap - read/write from/to a large bitmap 1516 * @table: the sysctl table 1517 * @write: %TRUE if this is a write to the sysctl file 1518 * @buffer: the user buffer 1519 * @lenp: the size of the user buffer 1520 * @ppos: file position 1521 * 1522 * The bitmap is stored at table->data and the bitmap length (in bits) 1523 * in table->maxlen. 1524 * 1525 * We use a range comma separated format (e.g. 1,3-4,10-10) so that 1526 * large bitmaps may be represented in a compact manner. Writing into 1527 * the file will clear the bitmap then update it with the given input. 1528 * 1529 * Returns 0 on success. 1530 */ 1531 int proc_do_large_bitmap(struct ctl_table *table, int write, 1532 void *buffer, size_t *lenp, loff_t *ppos) 1533 { 1534 int err = 0; 1535 size_t left = *lenp; 1536 unsigned long bitmap_len = table->maxlen; 1537 unsigned long *bitmap = *(unsigned long **) table->data; 1538 unsigned long *tmp_bitmap = NULL; 1539 char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c; 1540 1541 if (!bitmap || !bitmap_len || !left || (*ppos && !write)) { 1542 *lenp = 0; 1543 return 0; 1544 } 1545 1546 if (write) { 1547 char *p = buffer; 1548 size_t skipped = 0; 1549 1550 if (left > PAGE_SIZE - 1) { 1551 left = PAGE_SIZE - 1; 1552 /* How much of the buffer we'll skip this pass */ 1553 skipped = *lenp - left; 1554 } 1555 1556 tmp_bitmap = bitmap_zalloc(bitmap_len, GFP_KERNEL); 1557 if (!tmp_bitmap) 1558 return -ENOMEM; 1559 proc_skip_char(&p, &left, '\n'); 1560 while (!err && left) { 1561 unsigned long val_a, val_b; 1562 bool neg; 1563 size_t saved_left; 1564 1565 /* In case we stop parsing mid-number, we can reset */ 1566 saved_left = left; 1567 err = proc_get_long(&p, &left, &val_a, &neg, tr_a, 1568 sizeof(tr_a), &c); 1569 /* 1570 * If we consumed the entirety of a truncated buffer or 1571 * only one char is left (may be a "-"), then stop here, 1572 * reset, & come back for more. 1573 */ 1574 if ((left <= 1) && skipped) { 1575 left = saved_left; 1576 break; 1577 } 1578 1579 if (err) 1580 break; 1581 if (val_a >= bitmap_len || neg) { 1582 err = -EINVAL; 1583 break; 1584 } 1585 1586 val_b = val_a; 1587 if (left) { 1588 p++; 1589 left--; 1590 } 1591 1592 if (c == '-') { 1593 err = proc_get_long(&p, &left, &val_b, 1594 &neg, tr_b, sizeof(tr_b), 1595 &c); 1596 /* 1597 * If we consumed all of a truncated buffer or 1598 * then stop here, reset, & come back for more. 1599 */ 1600 if (!left && skipped) { 1601 left = saved_left; 1602 break; 1603 } 1604 1605 if (err) 1606 break; 1607 if (val_b >= bitmap_len || neg || 1608 val_a > val_b) { 1609 err = -EINVAL; 1610 break; 1611 } 1612 if (left) { 1613 p++; 1614 left--; 1615 } 1616 } 1617 1618 bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1); 1619 proc_skip_char(&p, &left, '\n'); 1620 } 1621 left += skipped; 1622 } else { 1623 unsigned long bit_a, bit_b = 0; 1624 bool first = 1; 1625 1626 while (left) { 1627 bit_a = find_next_bit(bitmap, bitmap_len, bit_b); 1628 if (bit_a >= bitmap_len) 1629 break; 1630 bit_b = find_next_zero_bit(bitmap, bitmap_len, 1631 bit_a + 1) - 1; 1632 1633 if (!first) 1634 proc_put_char(&buffer, &left, ','); 1635 proc_put_long(&buffer, &left, bit_a, false); 1636 if (bit_a != bit_b) { 1637 proc_put_char(&buffer, &left, '-'); 1638 proc_put_long(&buffer, &left, bit_b, false); 1639 } 1640 1641 first = 0; bit_b++; 1642 } 1643 proc_put_char(&buffer, &left, '\n'); 1644 } 1645 1646 if (!err) { 1647 if (write) { 1648 if (*ppos) 1649 bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len); 1650 else 1651 bitmap_copy(bitmap, tmp_bitmap, bitmap_len); 1652 } 1653 *lenp -= left; 1654 *ppos += *lenp; 1655 } 1656 1657 bitmap_free(tmp_bitmap); 1658 return err; 1659 } 1660 1661 #else /* CONFIG_PROC_SYSCTL */ 1662 1663 int proc_dostring(struct ctl_table *table, int write, 1664 void *buffer, size_t *lenp, loff_t *ppos) 1665 { 1666 return -ENOSYS; 1667 } 1668 1669 int proc_dobool(struct ctl_table *table, int write, 1670 void *buffer, size_t *lenp, loff_t *ppos) 1671 { 1672 return -ENOSYS; 1673 } 1674 1675 int proc_dointvec(struct ctl_table *table, int write, 1676 void *buffer, size_t *lenp, loff_t *ppos) 1677 { 1678 return -ENOSYS; 1679 } 1680 1681 int proc_douintvec(struct ctl_table *table, int write, 1682 void *buffer, size_t *lenp, loff_t *ppos) 1683 { 1684 return -ENOSYS; 1685 } 1686 1687 int proc_dointvec_minmax(struct ctl_table *table, int write, 1688 void *buffer, size_t *lenp, loff_t *ppos) 1689 { 1690 return -ENOSYS; 1691 } 1692 1693 int proc_douintvec_minmax(struct ctl_table *table, int write, 1694 void *buffer, size_t *lenp, loff_t *ppos) 1695 { 1696 return -ENOSYS; 1697 } 1698 1699 int proc_dou8vec_minmax(struct ctl_table *table, int write, 1700 void *buffer, size_t *lenp, loff_t *ppos) 1701 { 1702 return -ENOSYS; 1703 } 1704 1705 int proc_dointvec_jiffies(struct ctl_table *table, int write, 1706 void *buffer, size_t *lenp, loff_t *ppos) 1707 { 1708 return -ENOSYS; 1709 } 1710 1711 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, 1712 void *buffer, size_t *lenp, loff_t *ppos) 1713 { 1714 return -ENOSYS; 1715 } 1716 1717 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, 1718 void *buffer, size_t *lenp, loff_t *ppos) 1719 { 1720 return -ENOSYS; 1721 } 1722 1723 int proc_doulongvec_minmax(struct ctl_table *table, int write, 1724 void *buffer, size_t *lenp, loff_t *ppos) 1725 { 1726 return -ENOSYS; 1727 } 1728 1729 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, 1730 void *buffer, size_t *lenp, loff_t *ppos) 1731 { 1732 return -ENOSYS; 1733 } 1734 1735 int proc_do_large_bitmap(struct ctl_table *table, int write, 1736 void *buffer, size_t *lenp, loff_t *ppos) 1737 { 1738 return -ENOSYS; 1739 } 1740 1741 #endif /* CONFIG_PROC_SYSCTL */ 1742 1743 #if defined(CONFIG_SYSCTL) 1744 int proc_do_static_key(struct ctl_table *table, int write, 1745 void *buffer, size_t *lenp, loff_t *ppos) 1746 { 1747 struct static_key *key = (struct static_key *)table->data; 1748 static DEFINE_MUTEX(static_key_mutex); 1749 int val, ret; 1750 struct ctl_table tmp = { 1751 .data = &val, 1752 .maxlen = sizeof(val), 1753 .mode = table->mode, 1754 .extra1 = SYSCTL_ZERO, 1755 .extra2 = SYSCTL_ONE, 1756 }; 1757 1758 if (write && !capable(CAP_SYS_ADMIN)) 1759 return -EPERM; 1760 1761 mutex_lock(&static_key_mutex); 1762 val = static_key_enabled(key); 1763 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 1764 if (write && !ret) { 1765 if (val) 1766 static_key_enable(key); 1767 else 1768 static_key_disable(key); 1769 } 1770 mutex_unlock(&static_key_mutex); 1771 return ret; 1772 } 1773 1774 static struct ctl_table kern_table[] = { 1775 { 1776 .procname = "sched_child_runs_first", 1777 .data = &sysctl_sched_child_runs_first, 1778 .maxlen = sizeof(unsigned int), 1779 .mode = 0644, 1780 .proc_handler = proc_dointvec, 1781 }, 1782 #ifdef CONFIG_SCHEDSTATS 1783 { 1784 .procname = "sched_schedstats", 1785 .data = NULL, 1786 .maxlen = sizeof(unsigned int), 1787 .mode = 0644, 1788 .proc_handler = sysctl_schedstats, 1789 .extra1 = SYSCTL_ZERO, 1790 .extra2 = SYSCTL_ONE, 1791 }, 1792 #endif /* CONFIG_SCHEDSTATS */ 1793 #ifdef CONFIG_TASK_DELAY_ACCT 1794 { 1795 .procname = "task_delayacct", 1796 .data = NULL, 1797 .maxlen = sizeof(unsigned int), 1798 .mode = 0644, 1799 .proc_handler = sysctl_delayacct, 1800 .extra1 = SYSCTL_ZERO, 1801 .extra2 = SYSCTL_ONE, 1802 }, 1803 #endif /* CONFIG_TASK_DELAY_ACCT */ 1804 #ifdef CONFIG_NUMA_BALANCING 1805 { 1806 .procname = "numa_balancing", 1807 .data = NULL, /* filled in by handler */ 1808 .maxlen = sizeof(unsigned int), 1809 .mode = 0644, 1810 .proc_handler = sysctl_numa_balancing, 1811 .extra1 = SYSCTL_ZERO, 1812 .extra2 = SYSCTL_ONE, 1813 }, 1814 #endif /* CONFIG_NUMA_BALANCING */ 1815 { 1816 .procname = "sched_rt_period_us", 1817 .data = &sysctl_sched_rt_period, 1818 .maxlen = sizeof(unsigned int), 1819 .mode = 0644, 1820 .proc_handler = sched_rt_handler, 1821 }, 1822 { 1823 .procname = "sched_rt_runtime_us", 1824 .data = &sysctl_sched_rt_runtime, 1825 .maxlen = sizeof(int), 1826 .mode = 0644, 1827 .proc_handler = sched_rt_handler, 1828 }, 1829 { 1830 .procname = "sched_deadline_period_max_us", 1831 .data = &sysctl_sched_dl_period_max, 1832 .maxlen = sizeof(unsigned int), 1833 .mode = 0644, 1834 .proc_handler = proc_dointvec, 1835 }, 1836 { 1837 .procname = "sched_deadline_period_min_us", 1838 .data = &sysctl_sched_dl_period_min, 1839 .maxlen = sizeof(unsigned int), 1840 .mode = 0644, 1841 .proc_handler = proc_dointvec, 1842 }, 1843 { 1844 .procname = "sched_rr_timeslice_ms", 1845 .data = &sysctl_sched_rr_timeslice, 1846 .maxlen = sizeof(int), 1847 .mode = 0644, 1848 .proc_handler = sched_rr_handler, 1849 }, 1850 #ifdef CONFIG_UCLAMP_TASK 1851 { 1852 .procname = "sched_util_clamp_min", 1853 .data = &sysctl_sched_uclamp_util_min, 1854 .maxlen = sizeof(unsigned int), 1855 .mode = 0644, 1856 .proc_handler = sysctl_sched_uclamp_handler, 1857 }, 1858 { 1859 .procname = "sched_util_clamp_max", 1860 .data = &sysctl_sched_uclamp_util_max, 1861 .maxlen = sizeof(unsigned int), 1862 .mode = 0644, 1863 .proc_handler = sysctl_sched_uclamp_handler, 1864 }, 1865 { 1866 .procname = "sched_util_clamp_min_rt_default", 1867 .data = &sysctl_sched_uclamp_util_min_rt_default, 1868 .maxlen = sizeof(unsigned int), 1869 .mode = 0644, 1870 .proc_handler = sysctl_sched_uclamp_handler, 1871 }, 1872 #endif 1873 #ifdef CONFIG_SCHED_AUTOGROUP 1874 { 1875 .procname = "sched_autogroup_enabled", 1876 .data = &sysctl_sched_autogroup_enabled, 1877 .maxlen = sizeof(unsigned int), 1878 .mode = 0644, 1879 .proc_handler = proc_dointvec_minmax, 1880 .extra1 = SYSCTL_ZERO, 1881 .extra2 = SYSCTL_ONE, 1882 }, 1883 #endif 1884 #ifdef CONFIG_CFS_BANDWIDTH 1885 { 1886 .procname = "sched_cfs_bandwidth_slice_us", 1887 .data = &sysctl_sched_cfs_bandwidth_slice, 1888 .maxlen = sizeof(unsigned int), 1889 .mode = 0644, 1890 .proc_handler = proc_dointvec_minmax, 1891 .extra1 = SYSCTL_ONE, 1892 }, 1893 #endif 1894 #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) 1895 { 1896 .procname = "sched_energy_aware", 1897 .data = &sysctl_sched_energy_aware, 1898 .maxlen = sizeof(unsigned int), 1899 .mode = 0644, 1900 .proc_handler = sched_energy_aware_handler, 1901 .extra1 = SYSCTL_ZERO, 1902 .extra2 = SYSCTL_ONE, 1903 }, 1904 #endif 1905 #ifdef CONFIG_PROVE_LOCKING 1906 { 1907 .procname = "prove_locking", 1908 .data = &prove_locking, 1909 .maxlen = sizeof(int), 1910 .mode = 0644, 1911 .proc_handler = proc_dointvec, 1912 }, 1913 #endif 1914 #ifdef CONFIG_LOCK_STAT 1915 { 1916 .procname = "lock_stat", 1917 .data = &lock_stat, 1918 .maxlen = sizeof(int), 1919 .mode = 0644, 1920 .proc_handler = proc_dointvec, 1921 }, 1922 #endif 1923 { 1924 .procname = "panic", 1925 .data = &panic_timeout, 1926 .maxlen = sizeof(int), 1927 .mode = 0644, 1928 .proc_handler = proc_dointvec, 1929 }, 1930 #ifdef CONFIG_COREDUMP 1931 { 1932 .procname = "core_uses_pid", 1933 .data = &core_uses_pid, 1934 .maxlen = sizeof(int), 1935 .mode = 0644, 1936 .proc_handler = proc_dointvec, 1937 }, 1938 { 1939 .procname = "core_pattern", 1940 .data = core_pattern, 1941 .maxlen = CORENAME_MAX_SIZE, 1942 .mode = 0644, 1943 .proc_handler = proc_dostring_coredump, 1944 }, 1945 { 1946 .procname = "core_pipe_limit", 1947 .data = &core_pipe_limit, 1948 .maxlen = sizeof(unsigned int), 1949 .mode = 0644, 1950 .proc_handler = proc_dointvec, 1951 }, 1952 #endif 1953 #ifdef CONFIG_PROC_SYSCTL 1954 { 1955 .procname = "tainted", 1956 .maxlen = sizeof(long), 1957 .mode = 0644, 1958 .proc_handler = proc_taint, 1959 }, 1960 { 1961 .procname = "sysctl_writes_strict", 1962 .data = &sysctl_writes_strict, 1963 .maxlen = sizeof(int), 1964 .mode = 0644, 1965 .proc_handler = proc_dointvec_minmax, 1966 .extra1 = &neg_one, 1967 .extra2 = SYSCTL_ONE, 1968 }, 1969 #endif 1970 #ifdef CONFIG_LATENCYTOP 1971 { 1972 .procname = "latencytop", 1973 .data = &latencytop_enabled, 1974 .maxlen = sizeof(int), 1975 .mode = 0644, 1976 .proc_handler = sysctl_latencytop, 1977 }, 1978 #endif 1979 #ifdef CONFIG_BLK_DEV_INITRD 1980 { 1981 .procname = "real-root-dev", 1982 .data = &real_root_dev, 1983 .maxlen = sizeof(int), 1984 .mode = 0644, 1985 .proc_handler = proc_dointvec, 1986 }, 1987 #endif 1988 { 1989 .procname = "print-fatal-signals", 1990 .data = &print_fatal_signals, 1991 .maxlen = sizeof(int), 1992 .mode = 0644, 1993 .proc_handler = proc_dointvec, 1994 }, 1995 #ifdef CONFIG_SPARC 1996 { 1997 .procname = "reboot-cmd", 1998 .data = reboot_command, 1999 .maxlen = 256, 2000 .mode = 0644, 2001 .proc_handler = proc_dostring, 2002 }, 2003 { 2004 .procname = "stop-a", 2005 .data = &stop_a_enabled, 2006 .maxlen = sizeof (int), 2007 .mode = 0644, 2008 .proc_handler = proc_dointvec, 2009 }, 2010 { 2011 .procname = "scons-poweroff", 2012 .data = &scons_pwroff, 2013 .maxlen = sizeof (int), 2014 .mode = 0644, 2015 .proc_handler = proc_dointvec, 2016 }, 2017 #endif 2018 #ifdef CONFIG_SPARC64 2019 { 2020 .procname = "tsb-ratio", 2021 .data = &sysctl_tsb_ratio, 2022 .maxlen = sizeof (int), 2023 .mode = 0644, 2024 .proc_handler = proc_dointvec, 2025 }, 2026 #endif 2027 #ifdef CONFIG_PARISC 2028 { 2029 .procname = "soft-power", 2030 .data = &pwrsw_enabled, 2031 .maxlen = sizeof (int), 2032 .mode = 0644, 2033 .proc_handler = proc_dointvec, 2034 }, 2035 #endif 2036 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW 2037 { 2038 .procname = "unaligned-trap", 2039 .data = &unaligned_enabled, 2040 .maxlen = sizeof (int), 2041 .mode = 0644, 2042 .proc_handler = proc_dointvec, 2043 }, 2044 #endif 2045 { 2046 .procname = "ctrl-alt-del", 2047 .data = &C_A_D, 2048 .maxlen = sizeof(int), 2049 .mode = 0644, 2050 .proc_handler = proc_dointvec, 2051 }, 2052 #ifdef CONFIG_FUNCTION_TRACER 2053 { 2054 .procname = "ftrace_enabled", 2055 .data = &ftrace_enabled, 2056 .maxlen = sizeof(int), 2057 .mode = 0644, 2058 .proc_handler = ftrace_enable_sysctl, 2059 }, 2060 #endif 2061 #ifdef CONFIG_STACK_TRACER 2062 { 2063 .procname = "stack_tracer_enabled", 2064 .data = &stack_tracer_enabled, 2065 .maxlen = sizeof(int), 2066 .mode = 0644, 2067 .proc_handler = stack_trace_sysctl, 2068 }, 2069 #endif 2070 #ifdef CONFIG_TRACING 2071 { 2072 .procname = "ftrace_dump_on_oops", 2073 .data = &ftrace_dump_on_oops, 2074 .maxlen = sizeof(int), 2075 .mode = 0644, 2076 .proc_handler = proc_dointvec, 2077 }, 2078 { 2079 .procname = "traceoff_on_warning", 2080 .data = &__disable_trace_on_warning, 2081 .maxlen = sizeof(__disable_trace_on_warning), 2082 .mode = 0644, 2083 .proc_handler = proc_dointvec, 2084 }, 2085 { 2086 .procname = "tracepoint_printk", 2087 .data = &tracepoint_printk, 2088 .maxlen = sizeof(tracepoint_printk), 2089 .mode = 0644, 2090 .proc_handler = tracepoint_printk_sysctl, 2091 }, 2092 #endif 2093 #ifdef CONFIG_KEXEC_CORE 2094 { 2095 .procname = "kexec_load_disabled", 2096 .data = &kexec_load_disabled, 2097 .maxlen = sizeof(int), 2098 .mode = 0644, 2099 /* only handle a transition from default "0" to "1" */ 2100 .proc_handler = proc_dointvec_minmax, 2101 .extra1 = SYSCTL_ONE, 2102 .extra2 = SYSCTL_ONE, 2103 }, 2104 #endif 2105 #ifdef CONFIG_MODULES 2106 { 2107 .procname = "modprobe", 2108 .data = &modprobe_path, 2109 .maxlen = KMOD_PATH_LEN, 2110 .mode = 0644, 2111 .proc_handler = proc_dostring, 2112 }, 2113 { 2114 .procname = "modules_disabled", 2115 .data = &modules_disabled, 2116 .maxlen = sizeof(int), 2117 .mode = 0644, 2118 /* only handle a transition from default "0" to "1" */ 2119 .proc_handler = proc_dointvec_minmax, 2120 .extra1 = SYSCTL_ONE, 2121 .extra2 = SYSCTL_ONE, 2122 }, 2123 #endif 2124 #ifdef CONFIG_UEVENT_HELPER 2125 { 2126 .procname = "hotplug", 2127 .data = &uevent_helper, 2128 .maxlen = UEVENT_HELPER_PATH_LEN, 2129 .mode = 0644, 2130 .proc_handler = proc_dostring, 2131 }, 2132 #endif 2133 #ifdef CONFIG_CHR_DEV_SG 2134 { 2135 .procname = "sg-big-buff", 2136 .data = &sg_big_buff, 2137 .maxlen = sizeof (int), 2138 .mode = 0444, 2139 .proc_handler = proc_dointvec, 2140 }, 2141 #endif 2142 #ifdef CONFIG_BSD_PROCESS_ACCT 2143 { 2144 .procname = "acct", 2145 .data = &acct_parm, 2146 .maxlen = 3*sizeof(int), 2147 .mode = 0644, 2148 .proc_handler = proc_dointvec, 2149 }, 2150 #endif 2151 #ifdef CONFIG_MAGIC_SYSRQ 2152 { 2153 .procname = "sysrq", 2154 .data = NULL, 2155 .maxlen = sizeof (int), 2156 .mode = 0644, 2157 .proc_handler = sysrq_sysctl_handler, 2158 }, 2159 #endif 2160 #ifdef CONFIG_PROC_SYSCTL 2161 { 2162 .procname = "cad_pid", 2163 .data = NULL, 2164 .maxlen = sizeof (int), 2165 .mode = 0600, 2166 .proc_handler = proc_do_cad_pid, 2167 }, 2168 #endif 2169 { 2170 .procname = "threads-max", 2171 .data = NULL, 2172 .maxlen = sizeof(int), 2173 .mode = 0644, 2174 .proc_handler = sysctl_max_threads, 2175 }, 2176 { 2177 .procname = "random", 2178 .mode = 0555, 2179 .child = random_table, 2180 }, 2181 { 2182 .procname = "usermodehelper", 2183 .mode = 0555, 2184 .child = usermodehelper_table, 2185 }, 2186 #ifdef CONFIG_FW_LOADER_USER_HELPER 2187 { 2188 .procname = "firmware_config", 2189 .mode = 0555, 2190 .child = firmware_config_table, 2191 }, 2192 #endif 2193 { 2194 .procname = "overflowuid", 2195 .data = &overflowuid, 2196 .maxlen = sizeof(int), 2197 .mode = 0644, 2198 .proc_handler = proc_dointvec_minmax, 2199 .extra1 = &minolduid, 2200 .extra2 = &maxolduid, 2201 }, 2202 { 2203 .procname = "overflowgid", 2204 .data = &overflowgid, 2205 .maxlen = sizeof(int), 2206 .mode = 0644, 2207 .proc_handler = proc_dointvec_minmax, 2208 .extra1 = &minolduid, 2209 .extra2 = &maxolduid, 2210 }, 2211 #ifdef CONFIG_S390 2212 { 2213 .procname = "userprocess_debug", 2214 .data = &show_unhandled_signals, 2215 .maxlen = sizeof(int), 2216 .mode = 0644, 2217 .proc_handler = proc_dointvec, 2218 }, 2219 #endif 2220 #ifdef CONFIG_SMP 2221 { 2222 .procname = "oops_all_cpu_backtrace", 2223 .data = &sysctl_oops_all_cpu_backtrace, 2224 .maxlen = sizeof(int), 2225 .mode = 0644, 2226 .proc_handler = proc_dointvec_minmax, 2227 .extra1 = SYSCTL_ZERO, 2228 .extra2 = SYSCTL_ONE, 2229 }, 2230 #endif /* CONFIG_SMP */ 2231 { 2232 .procname = "pid_max", 2233 .data = &pid_max, 2234 .maxlen = sizeof (int), 2235 .mode = 0644, 2236 .proc_handler = proc_dointvec_minmax, 2237 .extra1 = &pid_max_min, 2238 .extra2 = &pid_max_max, 2239 }, 2240 { 2241 .procname = "panic_on_oops", 2242 .data = &panic_on_oops, 2243 .maxlen = sizeof(int), 2244 .mode = 0644, 2245 .proc_handler = proc_dointvec, 2246 }, 2247 { 2248 .procname = "panic_print", 2249 .data = &panic_print, 2250 .maxlen = sizeof(unsigned long), 2251 .mode = 0644, 2252 .proc_handler = proc_doulongvec_minmax, 2253 }, 2254 #if defined CONFIG_PRINTK 2255 { 2256 .procname = "printk", 2257 .data = &console_loglevel, 2258 .maxlen = 4*sizeof(int), 2259 .mode = 0644, 2260 .proc_handler = proc_dointvec, 2261 }, 2262 { 2263 .procname = "printk_ratelimit", 2264 .data = &printk_ratelimit_state.interval, 2265 .maxlen = sizeof(int), 2266 .mode = 0644, 2267 .proc_handler = proc_dointvec_jiffies, 2268 }, 2269 { 2270 .procname = "printk_ratelimit_burst", 2271 .data = &printk_ratelimit_state.burst, 2272 .maxlen = sizeof(int), 2273 .mode = 0644, 2274 .proc_handler = proc_dointvec, 2275 }, 2276 { 2277 .procname = "printk_delay", 2278 .data = &printk_delay_msec, 2279 .maxlen = sizeof(int), 2280 .mode = 0644, 2281 .proc_handler = proc_dointvec_minmax, 2282 .extra1 = SYSCTL_ZERO, 2283 .extra2 = &ten_thousand, 2284 }, 2285 { 2286 .procname = "printk_devkmsg", 2287 .data = devkmsg_log_str, 2288 .maxlen = DEVKMSG_STR_MAX_SIZE, 2289 .mode = 0644, 2290 .proc_handler = devkmsg_sysctl_set_loglvl, 2291 }, 2292 { 2293 .procname = "dmesg_restrict", 2294 .data = &dmesg_restrict, 2295 .maxlen = sizeof(int), 2296 .mode = 0644, 2297 .proc_handler = proc_dointvec_minmax_sysadmin, 2298 .extra1 = SYSCTL_ZERO, 2299 .extra2 = SYSCTL_ONE, 2300 }, 2301 { 2302 .procname = "kptr_restrict", 2303 .data = &kptr_restrict, 2304 .maxlen = sizeof(int), 2305 .mode = 0644, 2306 .proc_handler = proc_dointvec_minmax_sysadmin, 2307 .extra1 = SYSCTL_ZERO, 2308 .extra2 = &two, 2309 }, 2310 #endif 2311 { 2312 .procname = "ngroups_max", 2313 .data = &ngroups_max, 2314 .maxlen = sizeof (int), 2315 .mode = 0444, 2316 .proc_handler = proc_dointvec, 2317 }, 2318 { 2319 .procname = "cap_last_cap", 2320 .data = (void *)&cap_last_cap, 2321 .maxlen = sizeof(int), 2322 .mode = 0444, 2323 .proc_handler = proc_dointvec, 2324 }, 2325 #if defined(CONFIG_LOCKUP_DETECTOR) 2326 { 2327 .procname = "watchdog", 2328 .data = &watchdog_user_enabled, 2329 .maxlen = sizeof(int), 2330 .mode = 0644, 2331 .proc_handler = proc_watchdog, 2332 .extra1 = SYSCTL_ZERO, 2333 .extra2 = SYSCTL_ONE, 2334 }, 2335 { 2336 .procname = "watchdog_thresh", 2337 .data = &watchdog_thresh, 2338 .maxlen = sizeof(int), 2339 .mode = 0644, 2340 .proc_handler = proc_watchdog_thresh, 2341 .extra1 = SYSCTL_ZERO, 2342 .extra2 = &sixty, 2343 }, 2344 { 2345 .procname = "nmi_watchdog", 2346 .data = &nmi_watchdog_user_enabled, 2347 .maxlen = sizeof(int), 2348 .mode = NMI_WATCHDOG_SYSCTL_PERM, 2349 .proc_handler = proc_nmi_watchdog, 2350 .extra1 = SYSCTL_ZERO, 2351 .extra2 = SYSCTL_ONE, 2352 }, 2353 { 2354 .procname = "watchdog_cpumask", 2355 .data = &watchdog_cpumask_bits, 2356 .maxlen = NR_CPUS, 2357 .mode = 0644, 2358 .proc_handler = proc_watchdog_cpumask, 2359 }, 2360 #ifdef CONFIG_SOFTLOCKUP_DETECTOR 2361 { 2362 .procname = "soft_watchdog", 2363 .data = &soft_watchdog_user_enabled, 2364 .maxlen = sizeof(int), 2365 .mode = 0644, 2366 .proc_handler = proc_soft_watchdog, 2367 .extra1 = SYSCTL_ZERO, 2368 .extra2 = SYSCTL_ONE, 2369 }, 2370 { 2371 .procname = "softlockup_panic", 2372 .data = &softlockup_panic, 2373 .maxlen = sizeof(int), 2374 .mode = 0644, 2375 .proc_handler = proc_dointvec_minmax, 2376 .extra1 = SYSCTL_ZERO, 2377 .extra2 = SYSCTL_ONE, 2378 }, 2379 #ifdef CONFIG_SMP 2380 { 2381 .procname = "softlockup_all_cpu_backtrace", 2382 .data = &sysctl_softlockup_all_cpu_backtrace, 2383 .maxlen = sizeof(int), 2384 .mode = 0644, 2385 .proc_handler = proc_dointvec_minmax, 2386 .extra1 = SYSCTL_ZERO, 2387 .extra2 = SYSCTL_ONE, 2388 }, 2389 #endif /* CONFIG_SMP */ 2390 #endif 2391 #ifdef CONFIG_HARDLOCKUP_DETECTOR 2392 { 2393 .procname = "hardlockup_panic", 2394 .data = &hardlockup_panic, 2395 .maxlen = sizeof(int), 2396 .mode = 0644, 2397 .proc_handler = proc_dointvec_minmax, 2398 .extra1 = SYSCTL_ZERO, 2399 .extra2 = SYSCTL_ONE, 2400 }, 2401 #ifdef CONFIG_SMP 2402 { 2403 .procname = "hardlockup_all_cpu_backtrace", 2404 .data = &sysctl_hardlockup_all_cpu_backtrace, 2405 .maxlen = sizeof(int), 2406 .mode = 0644, 2407 .proc_handler = proc_dointvec_minmax, 2408 .extra1 = SYSCTL_ZERO, 2409 .extra2 = SYSCTL_ONE, 2410 }, 2411 #endif /* CONFIG_SMP */ 2412 #endif 2413 #endif 2414 2415 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) 2416 { 2417 .procname = "unknown_nmi_panic", 2418 .data = &unknown_nmi_panic, 2419 .maxlen = sizeof (int), 2420 .mode = 0644, 2421 .proc_handler = proc_dointvec, 2422 }, 2423 #endif 2424 2425 #if (defined(CONFIG_X86_32) || defined(CONFIG_PARISC)) && \ 2426 defined(CONFIG_DEBUG_STACKOVERFLOW) 2427 { 2428 .procname = "panic_on_stackoverflow", 2429 .data = &sysctl_panic_on_stackoverflow, 2430 .maxlen = sizeof(int), 2431 .mode = 0644, 2432 .proc_handler = proc_dointvec, 2433 }, 2434 #endif 2435 #if defined(CONFIG_X86) 2436 { 2437 .procname = "panic_on_unrecovered_nmi", 2438 .data = &panic_on_unrecovered_nmi, 2439 .maxlen = sizeof(int), 2440 .mode = 0644, 2441 .proc_handler = proc_dointvec, 2442 }, 2443 { 2444 .procname = "panic_on_io_nmi", 2445 .data = &panic_on_io_nmi, 2446 .maxlen = sizeof(int), 2447 .mode = 0644, 2448 .proc_handler = proc_dointvec, 2449 }, 2450 { 2451 .procname = "bootloader_type", 2452 .data = &bootloader_type, 2453 .maxlen = sizeof (int), 2454 .mode = 0444, 2455 .proc_handler = proc_dointvec, 2456 }, 2457 { 2458 .procname = "bootloader_version", 2459 .data = &bootloader_version, 2460 .maxlen = sizeof (int), 2461 .mode = 0444, 2462 .proc_handler = proc_dointvec, 2463 }, 2464 { 2465 .procname = "io_delay_type", 2466 .data = &io_delay_type, 2467 .maxlen = sizeof(int), 2468 .mode = 0644, 2469 .proc_handler = proc_dointvec, 2470 }, 2471 #endif 2472 #if defined(CONFIG_MMU) 2473 { 2474 .procname = "randomize_va_space", 2475 .data = &randomize_va_space, 2476 .maxlen = sizeof(int), 2477 .mode = 0644, 2478 .proc_handler = proc_dointvec, 2479 }, 2480 #endif 2481 #if defined(CONFIG_S390) && defined(CONFIG_SMP) 2482 { 2483 .procname = "spin_retry", 2484 .data = &spin_retry, 2485 .maxlen = sizeof (int), 2486 .mode = 0644, 2487 .proc_handler = proc_dointvec, 2488 }, 2489 #endif 2490 #if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86) 2491 { 2492 .procname = "acpi_video_flags", 2493 .data = &acpi_realmode_flags, 2494 .maxlen = sizeof (unsigned long), 2495 .mode = 0644, 2496 .proc_handler = proc_doulongvec_minmax, 2497 }, 2498 #endif 2499 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN 2500 { 2501 .procname = "ignore-unaligned-usertrap", 2502 .data = &no_unaligned_warning, 2503 .maxlen = sizeof (int), 2504 .mode = 0644, 2505 .proc_handler = proc_dointvec, 2506 }, 2507 #endif 2508 #ifdef CONFIG_IA64 2509 { 2510 .procname = "unaligned-dump-stack", 2511 .data = &unaligned_dump_stack, 2512 .maxlen = sizeof (int), 2513 .mode = 0644, 2514 .proc_handler = proc_dointvec, 2515 }, 2516 #endif 2517 #ifdef CONFIG_DETECT_HUNG_TASK 2518 #ifdef CONFIG_SMP 2519 { 2520 .procname = "hung_task_all_cpu_backtrace", 2521 .data = &sysctl_hung_task_all_cpu_backtrace, 2522 .maxlen = sizeof(int), 2523 .mode = 0644, 2524 .proc_handler = proc_dointvec_minmax, 2525 .extra1 = SYSCTL_ZERO, 2526 .extra2 = SYSCTL_ONE, 2527 }, 2528 #endif /* CONFIG_SMP */ 2529 { 2530 .procname = "hung_task_panic", 2531 .data = &sysctl_hung_task_panic, 2532 .maxlen = sizeof(int), 2533 .mode = 0644, 2534 .proc_handler = proc_dointvec_minmax, 2535 .extra1 = SYSCTL_ZERO, 2536 .extra2 = SYSCTL_ONE, 2537 }, 2538 { 2539 .procname = "hung_task_check_count", 2540 .data = &sysctl_hung_task_check_count, 2541 .maxlen = sizeof(int), 2542 .mode = 0644, 2543 .proc_handler = proc_dointvec_minmax, 2544 .extra1 = SYSCTL_ZERO, 2545 }, 2546 { 2547 .procname = "hung_task_timeout_secs", 2548 .data = &sysctl_hung_task_timeout_secs, 2549 .maxlen = sizeof(unsigned long), 2550 .mode = 0644, 2551 .proc_handler = proc_dohung_task_timeout_secs, 2552 .extra2 = &hung_task_timeout_max, 2553 }, 2554 { 2555 .procname = "hung_task_check_interval_secs", 2556 .data = &sysctl_hung_task_check_interval_secs, 2557 .maxlen = sizeof(unsigned long), 2558 .mode = 0644, 2559 .proc_handler = proc_dohung_task_timeout_secs, 2560 .extra2 = &hung_task_timeout_max, 2561 }, 2562 { 2563 .procname = "hung_task_warnings", 2564 .data = &sysctl_hung_task_warnings, 2565 .maxlen = sizeof(int), 2566 .mode = 0644, 2567 .proc_handler = proc_dointvec_minmax, 2568 .extra1 = &neg_one, 2569 }, 2570 #endif 2571 #ifdef CONFIG_RT_MUTEXES 2572 { 2573 .procname = "max_lock_depth", 2574 .data = &max_lock_depth, 2575 .maxlen = sizeof(int), 2576 .mode = 0644, 2577 .proc_handler = proc_dointvec, 2578 }, 2579 #endif 2580 { 2581 .procname = "poweroff_cmd", 2582 .data = &poweroff_cmd, 2583 .maxlen = POWEROFF_CMD_PATH_LEN, 2584 .mode = 0644, 2585 .proc_handler = proc_dostring, 2586 }, 2587 #ifdef CONFIG_KEYS 2588 { 2589 .procname = "keys", 2590 .mode = 0555, 2591 .child = key_sysctls, 2592 }, 2593 #endif 2594 #ifdef CONFIG_PERF_EVENTS 2595 /* 2596 * User-space scripts rely on the existence of this file 2597 * as a feature check for perf_events being enabled. 2598 * 2599 * So it's an ABI, do not remove! 2600 */ 2601 { 2602 .procname = "perf_event_paranoid", 2603 .data = &sysctl_perf_event_paranoid, 2604 .maxlen = sizeof(sysctl_perf_event_paranoid), 2605 .mode = 0644, 2606 .proc_handler = proc_dointvec, 2607 }, 2608 { 2609 .procname = "perf_event_mlock_kb", 2610 .data = &sysctl_perf_event_mlock, 2611 .maxlen = sizeof(sysctl_perf_event_mlock), 2612 .mode = 0644, 2613 .proc_handler = proc_dointvec, 2614 }, 2615 { 2616 .procname = "perf_event_max_sample_rate", 2617 .data = &sysctl_perf_event_sample_rate, 2618 .maxlen = sizeof(sysctl_perf_event_sample_rate), 2619 .mode = 0644, 2620 .proc_handler = perf_proc_update_handler, 2621 .extra1 = SYSCTL_ONE, 2622 }, 2623 { 2624 .procname = "perf_cpu_time_max_percent", 2625 .data = &sysctl_perf_cpu_time_max_percent, 2626 .maxlen = sizeof(sysctl_perf_cpu_time_max_percent), 2627 .mode = 0644, 2628 .proc_handler = perf_cpu_time_max_percent_handler, 2629 .extra1 = SYSCTL_ZERO, 2630 .extra2 = &one_hundred, 2631 }, 2632 { 2633 .procname = "perf_event_max_stack", 2634 .data = &sysctl_perf_event_max_stack, 2635 .maxlen = sizeof(sysctl_perf_event_max_stack), 2636 .mode = 0644, 2637 .proc_handler = perf_event_max_stack_handler, 2638 .extra1 = SYSCTL_ZERO, 2639 .extra2 = &six_hundred_forty_kb, 2640 }, 2641 { 2642 .procname = "perf_event_max_contexts_per_stack", 2643 .data = &sysctl_perf_event_max_contexts_per_stack, 2644 .maxlen = sizeof(sysctl_perf_event_max_contexts_per_stack), 2645 .mode = 0644, 2646 .proc_handler = perf_event_max_stack_handler, 2647 .extra1 = SYSCTL_ZERO, 2648 .extra2 = &one_thousand, 2649 }, 2650 #endif 2651 { 2652 .procname = "panic_on_warn", 2653 .data = &panic_on_warn, 2654 .maxlen = sizeof(int), 2655 .mode = 0644, 2656 .proc_handler = proc_dointvec_minmax, 2657 .extra1 = SYSCTL_ZERO, 2658 .extra2 = SYSCTL_ONE, 2659 }, 2660 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) 2661 { 2662 .procname = "timer_migration", 2663 .data = &sysctl_timer_migration, 2664 .maxlen = sizeof(unsigned int), 2665 .mode = 0644, 2666 .proc_handler = timer_migration_handler, 2667 .extra1 = SYSCTL_ZERO, 2668 .extra2 = SYSCTL_ONE, 2669 }, 2670 #endif 2671 #ifdef CONFIG_BPF_SYSCALL 2672 { 2673 .procname = "unprivileged_bpf_disabled", 2674 .data = &sysctl_unprivileged_bpf_disabled, 2675 .maxlen = sizeof(sysctl_unprivileged_bpf_disabled), 2676 .mode = 0644, 2677 .proc_handler = bpf_unpriv_handler, 2678 .extra1 = SYSCTL_ZERO, 2679 .extra2 = &two, 2680 }, 2681 { 2682 .procname = "bpf_stats_enabled", 2683 .data = &bpf_stats_enabled_key.key, 2684 .maxlen = sizeof(bpf_stats_enabled_key), 2685 .mode = 0644, 2686 .proc_handler = bpf_stats_handler, 2687 }, 2688 #endif 2689 #if defined(CONFIG_TREE_RCU) 2690 { 2691 .procname = "panic_on_rcu_stall", 2692 .data = &sysctl_panic_on_rcu_stall, 2693 .maxlen = sizeof(sysctl_panic_on_rcu_stall), 2694 .mode = 0644, 2695 .proc_handler = proc_dointvec_minmax, 2696 .extra1 = SYSCTL_ZERO, 2697 .extra2 = SYSCTL_ONE, 2698 }, 2699 #endif 2700 #if defined(CONFIG_TREE_RCU) 2701 { 2702 .procname = "max_rcu_stall_to_panic", 2703 .data = &sysctl_max_rcu_stall_to_panic, 2704 .maxlen = sizeof(sysctl_max_rcu_stall_to_panic), 2705 .mode = 0644, 2706 .proc_handler = proc_dointvec_minmax, 2707 .extra1 = SYSCTL_ONE, 2708 .extra2 = SYSCTL_INT_MAX, 2709 }, 2710 #endif 2711 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE 2712 { 2713 .procname = "stack_erasing", 2714 .data = NULL, 2715 .maxlen = sizeof(int), 2716 .mode = 0600, 2717 .proc_handler = stack_erasing_sysctl, 2718 .extra1 = SYSCTL_ZERO, 2719 .extra2 = SYSCTL_ONE, 2720 }, 2721 #endif 2722 { } 2723 }; 2724 2725 static struct ctl_table vm_table[] = { 2726 { 2727 .procname = "overcommit_memory", 2728 .data = &sysctl_overcommit_memory, 2729 .maxlen = sizeof(sysctl_overcommit_memory), 2730 .mode = 0644, 2731 .proc_handler = overcommit_policy_handler, 2732 .extra1 = SYSCTL_ZERO, 2733 .extra2 = &two, 2734 }, 2735 { 2736 .procname = "panic_on_oom", 2737 .data = &sysctl_panic_on_oom, 2738 .maxlen = sizeof(sysctl_panic_on_oom), 2739 .mode = 0644, 2740 .proc_handler = proc_dointvec_minmax, 2741 .extra1 = SYSCTL_ZERO, 2742 .extra2 = &two, 2743 }, 2744 { 2745 .procname = "oom_kill_allocating_task", 2746 .data = &sysctl_oom_kill_allocating_task, 2747 .maxlen = sizeof(sysctl_oom_kill_allocating_task), 2748 .mode = 0644, 2749 .proc_handler = proc_dointvec, 2750 }, 2751 { 2752 .procname = "oom_dump_tasks", 2753 .data = &sysctl_oom_dump_tasks, 2754 .maxlen = sizeof(sysctl_oom_dump_tasks), 2755 .mode = 0644, 2756 .proc_handler = proc_dointvec, 2757 }, 2758 { 2759 .procname = "overcommit_ratio", 2760 .data = &sysctl_overcommit_ratio, 2761 .maxlen = sizeof(sysctl_overcommit_ratio), 2762 .mode = 0644, 2763 .proc_handler = overcommit_ratio_handler, 2764 }, 2765 { 2766 .procname = "overcommit_kbytes", 2767 .data = &sysctl_overcommit_kbytes, 2768 .maxlen = sizeof(sysctl_overcommit_kbytes), 2769 .mode = 0644, 2770 .proc_handler = overcommit_kbytes_handler, 2771 }, 2772 { 2773 .procname = "page-cluster", 2774 .data = &page_cluster, 2775 .maxlen = sizeof(int), 2776 .mode = 0644, 2777 .proc_handler = proc_dointvec_minmax, 2778 .extra1 = SYSCTL_ZERO, 2779 }, 2780 { 2781 .procname = "dirty_background_ratio", 2782 .data = &dirty_background_ratio, 2783 .maxlen = sizeof(dirty_background_ratio), 2784 .mode = 0644, 2785 .proc_handler = dirty_background_ratio_handler, 2786 .extra1 = SYSCTL_ZERO, 2787 .extra2 = &one_hundred, 2788 }, 2789 { 2790 .procname = "dirty_background_bytes", 2791 .data = &dirty_background_bytes, 2792 .maxlen = sizeof(dirty_background_bytes), 2793 .mode = 0644, 2794 .proc_handler = dirty_background_bytes_handler, 2795 .extra1 = &one_ul, 2796 }, 2797 { 2798 .procname = "dirty_ratio", 2799 .data = &vm_dirty_ratio, 2800 .maxlen = sizeof(vm_dirty_ratio), 2801 .mode = 0644, 2802 .proc_handler = dirty_ratio_handler, 2803 .extra1 = SYSCTL_ZERO, 2804 .extra2 = &one_hundred, 2805 }, 2806 { 2807 .procname = "dirty_bytes", 2808 .data = &vm_dirty_bytes, 2809 .maxlen = sizeof(vm_dirty_bytes), 2810 .mode = 0644, 2811 .proc_handler = dirty_bytes_handler, 2812 .extra1 = &dirty_bytes_min, 2813 }, 2814 { 2815 .procname = "dirty_writeback_centisecs", 2816 .data = &dirty_writeback_interval, 2817 .maxlen = sizeof(dirty_writeback_interval), 2818 .mode = 0644, 2819 .proc_handler = dirty_writeback_centisecs_handler, 2820 }, 2821 { 2822 .procname = "dirty_expire_centisecs", 2823 .data = &dirty_expire_interval, 2824 .maxlen = sizeof(dirty_expire_interval), 2825 .mode = 0644, 2826 .proc_handler = proc_dointvec_minmax, 2827 .extra1 = SYSCTL_ZERO, 2828 }, 2829 { 2830 .procname = "dirtytime_expire_seconds", 2831 .data = &dirtytime_expire_interval, 2832 .maxlen = sizeof(dirtytime_expire_interval), 2833 .mode = 0644, 2834 .proc_handler = dirtytime_interval_handler, 2835 .extra1 = SYSCTL_ZERO, 2836 }, 2837 { 2838 .procname = "swappiness", 2839 .data = &vm_swappiness, 2840 .maxlen = sizeof(vm_swappiness), 2841 .mode = 0644, 2842 .proc_handler = proc_dointvec_minmax, 2843 .extra1 = SYSCTL_ZERO, 2844 .extra2 = &two_hundred, 2845 }, 2846 #ifdef CONFIG_HUGETLB_PAGE 2847 { 2848 .procname = "nr_hugepages", 2849 .data = NULL, 2850 .maxlen = sizeof(unsigned long), 2851 .mode = 0644, 2852 .proc_handler = hugetlb_sysctl_handler, 2853 }, 2854 #ifdef CONFIG_NUMA 2855 { 2856 .procname = "nr_hugepages_mempolicy", 2857 .data = NULL, 2858 .maxlen = sizeof(unsigned long), 2859 .mode = 0644, 2860 .proc_handler = &hugetlb_mempolicy_sysctl_handler, 2861 }, 2862 { 2863 .procname = "numa_stat", 2864 .data = &sysctl_vm_numa_stat, 2865 .maxlen = sizeof(int), 2866 .mode = 0644, 2867 .proc_handler = sysctl_vm_numa_stat_handler, 2868 .extra1 = SYSCTL_ZERO, 2869 .extra2 = SYSCTL_ONE, 2870 }, 2871 #endif 2872 { 2873 .procname = "hugetlb_shm_group", 2874 .data = &sysctl_hugetlb_shm_group, 2875 .maxlen = sizeof(gid_t), 2876 .mode = 0644, 2877 .proc_handler = proc_dointvec, 2878 }, 2879 { 2880 .procname = "nr_overcommit_hugepages", 2881 .data = NULL, 2882 .maxlen = sizeof(unsigned long), 2883 .mode = 0644, 2884 .proc_handler = hugetlb_overcommit_handler, 2885 }, 2886 #endif 2887 { 2888 .procname = "lowmem_reserve_ratio", 2889 .data = &sysctl_lowmem_reserve_ratio, 2890 .maxlen = sizeof(sysctl_lowmem_reserve_ratio), 2891 .mode = 0644, 2892 .proc_handler = lowmem_reserve_ratio_sysctl_handler, 2893 }, 2894 { 2895 .procname = "drop_caches", 2896 .data = &sysctl_drop_caches, 2897 .maxlen = sizeof(int), 2898 .mode = 0200, 2899 .proc_handler = drop_caches_sysctl_handler, 2900 .extra1 = SYSCTL_ONE, 2901 .extra2 = &four, 2902 }, 2903 #ifdef CONFIG_COMPACTION 2904 { 2905 .procname = "compact_memory", 2906 .data = NULL, 2907 .maxlen = sizeof(int), 2908 .mode = 0200, 2909 .proc_handler = sysctl_compaction_handler, 2910 }, 2911 { 2912 .procname = "compaction_proactiveness", 2913 .data = &sysctl_compaction_proactiveness, 2914 .maxlen = sizeof(sysctl_compaction_proactiveness), 2915 .mode = 0644, 2916 .proc_handler = compaction_proactiveness_sysctl_handler, 2917 .extra1 = SYSCTL_ZERO, 2918 .extra2 = &one_hundred, 2919 }, 2920 { 2921 .procname = "extfrag_threshold", 2922 .data = &sysctl_extfrag_threshold, 2923 .maxlen = sizeof(int), 2924 .mode = 0644, 2925 .proc_handler = proc_dointvec_minmax, 2926 .extra1 = &min_extfrag_threshold, 2927 .extra2 = &max_extfrag_threshold, 2928 }, 2929 { 2930 .procname = "compact_unevictable_allowed", 2931 .data = &sysctl_compact_unevictable_allowed, 2932 .maxlen = sizeof(int), 2933 .mode = 0644, 2934 .proc_handler = proc_dointvec_minmax_warn_RT_change, 2935 .extra1 = SYSCTL_ZERO, 2936 .extra2 = SYSCTL_ONE, 2937 }, 2938 2939 #endif /* CONFIG_COMPACTION */ 2940 { 2941 .procname = "min_free_kbytes", 2942 .data = &min_free_kbytes, 2943 .maxlen = sizeof(min_free_kbytes), 2944 .mode = 0644, 2945 .proc_handler = min_free_kbytes_sysctl_handler, 2946 .extra1 = SYSCTL_ZERO, 2947 }, 2948 { 2949 .procname = "watermark_boost_factor", 2950 .data = &watermark_boost_factor, 2951 .maxlen = sizeof(watermark_boost_factor), 2952 .mode = 0644, 2953 .proc_handler = proc_dointvec_minmax, 2954 .extra1 = SYSCTL_ZERO, 2955 }, 2956 { 2957 .procname = "watermark_scale_factor", 2958 .data = &watermark_scale_factor, 2959 .maxlen = sizeof(watermark_scale_factor), 2960 .mode = 0644, 2961 .proc_handler = watermark_scale_factor_sysctl_handler, 2962 .extra1 = SYSCTL_ONE, 2963 .extra2 = &one_thousand, 2964 }, 2965 { 2966 .procname = "percpu_pagelist_high_fraction", 2967 .data = &percpu_pagelist_high_fraction, 2968 .maxlen = sizeof(percpu_pagelist_high_fraction), 2969 .mode = 0644, 2970 .proc_handler = percpu_pagelist_high_fraction_sysctl_handler, 2971 .extra1 = SYSCTL_ZERO, 2972 }, 2973 { 2974 .procname = "page_lock_unfairness", 2975 .data = &sysctl_page_lock_unfairness, 2976 .maxlen = sizeof(sysctl_page_lock_unfairness), 2977 .mode = 0644, 2978 .proc_handler = proc_dointvec_minmax, 2979 .extra1 = SYSCTL_ZERO, 2980 }, 2981 #ifdef CONFIG_MMU 2982 { 2983 .procname = "max_map_count", 2984 .data = &sysctl_max_map_count, 2985 .maxlen = sizeof(sysctl_max_map_count), 2986 .mode = 0644, 2987 .proc_handler = proc_dointvec_minmax, 2988 .extra1 = SYSCTL_ZERO, 2989 }, 2990 #else 2991 { 2992 .procname = "nr_trim_pages", 2993 .data = &sysctl_nr_trim_pages, 2994 .maxlen = sizeof(sysctl_nr_trim_pages), 2995 .mode = 0644, 2996 .proc_handler = proc_dointvec_minmax, 2997 .extra1 = SYSCTL_ZERO, 2998 }, 2999 #endif 3000 { 3001 .procname = "laptop_mode", 3002 .data = &laptop_mode, 3003 .maxlen = sizeof(laptop_mode), 3004 .mode = 0644, 3005 .proc_handler = proc_dointvec_jiffies, 3006 }, 3007 { 3008 .procname = "vfs_cache_pressure", 3009 .data = &sysctl_vfs_cache_pressure, 3010 .maxlen = sizeof(sysctl_vfs_cache_pressure), 3011 .mode = 0644, 3012 .proc_handler = proc_dointvec_minmax, 3013 .extra1 = SYSCTL_ZERO, 3014 }, 3015 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \ 3016 defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT) 3017 { 3018 .procname = "legacy_va_layout", 3019 .data = &sysctl_legacy_va_layout, 3020 .maxlen = sizeof(sysctl_legacy_va_layout), 3021 .mode = 0644, 3022 .proc_handler = proc_dointvec_minmax, 3023 .extra1 = SYSCTL_ZERO, 3024 }, 3025 #endif 3026 #ifdef CONFIG_NUMA 3027 { 3028 .procname = "zone_reclaim_mode", 3029 .data = &node_reclaim_mode, 3030 .maxlen = sizeof(node_reclaim_mode), 3031 .mode = 0644, 3032 .proc_handler = proc_dointvec_minmax, 3033 .extra1 = SYSCTL_ZERO, 3034 }, 3035 { 3036 .procname = "min_unmapped_ratio", 3037 .data = &sysctl_min_unmapped_ratio, 3038 .maxlen = sizeof(sysctl_min_unmapped_ratio), 3039 .mode = 0644, 3040 .proc_handler = sysctl_min_unmapped_ratio_sysctl_handler, 3041 .extra1 = SYSCTL_ZERO, 3042 .extra2 = &one_hundred, 3043 }, 3044 { 3045 .procname = "min_slab_ratio", 3046 .data = &sysctl_min_slab_ratio, 3047 .maxlen = sizeof(sysctl_min_slab_ratio), 3048 .mode = 0644, 3049 .proc_handler = sysctl_min_slab_ratio_sysctl_handler, 3050 .extra1 = SYSCTL_ZERO, 3051 .extra2 = &one_hundred, 3052 }, 3053 #endif 3054 #ifdef CONFIG_SMP 3055 { 3056 .procname = "stat_interval", 3057 .data = &sysctl_stat_interval, 3058 .maxlen = sizeof(sysctl_stat_interval), 3059 .mode = 0644, 3060 .proc_handler = proc_dointvec_jiffies, 3061 }, 3062 { 3063 .procname = "stat_refresh", 3064 .data = NULL, 3065 .maxlen = 0, 3066 .mode = 0600, 3067 .proc_handler = vmstat_refresh, 3068 }, 3069 #endif 3070 #ifdef CONFIG_MMU 3071 { 3072 .procname = "mmap_min_addr", 3073 .data = &dac_mmap_min_addr, 3074 .maxlen = sizeof(unsigned long), 3075 .mode = 0644, 3076 .proc_handler = mmap_min_addr_handler, 3077 }, 3078 #endif 3079 #ifdef CONFIG_NUMA 3080 { 3081 .procname = "numa_zonelist_order", 3082 .data = &numa_zonelist_order, 3083 .maxlen = NUMA_ZONELIST_ORDER_LEN, 3084 .mode = 0644, 3085 .proc_handler = numa_zonelist_order_handler, 3086 }, 3087 #endif 3088 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \ 3089 (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL)) 3090 { 3091 .procname = "vdso_enabled", 3092 #ifdef CONFIG_X86_32 3093 .data = &vdso32_enabled, 3094 .maxlen = sizeof(vdso32_enabled), 3095 #else 3096 .data = &vdso_enabled, 3097 .maxlen = sizeof(vdso_enabled), 3098 #endif 3099 .mode = 0644, 3100 .proc_handler = proc_dointvec, 3101 .extra1 = SYSCTL_ZERO, 3102 }, 3103 #endif 3104 #ifdef CONFIG_HIGHMEM 3105 { 3106 .procname = "highmem_is_dirtyable", 3107 .data = &vm_highmem_is_dirtyable, 3108 .maxlen = sizeof(vm_highmem_is_dirtyable), 3109 .mode = 0644, 3110 .proc_handler = proc_dointvec_minmax, 3111 .extra1 = SYSCTL_ZERO, 3112 .extra2 = SYSCTL_ONE, 3113 }, 3114 #endif 3115 #ifdef CONFIG_MEMORY_FAILURE 3116 { 3117 .procname = "memory_failure_early_kill", 3118 .data = &sysctl_memory_failure_early_kill, 3119 .maxlen = sizeof(sysctl_memory_failure_early_kill), 3120 .mode = 0644, 3121 .proc_handler = proc_dointvec_minmax, 3122 .extra1 = SYSCTL_ZERO, 3123 .extra2 = SYSCTL_ONE, 3124 }, 3125 { 3126 .procname = "memory_failure_recovery", 3127 .data = &sysctl_memory_failure_recovery, 3128 .maxlen = sizeof(sysctl_memory_failure_recovery), 3129 .mode = 0644, 3130 .proc_handler = proc_dointvec_minmax, 3131 .extra1 = SYSCTL_ZERO, 3132 .extra2 = SYSCTL_ONE, 3133 }, 3134 #endif 3135 { 3136 .procname = "user_reserve_kbytes", 3137 .data = &sysctl_user_reserve_kbytes, 3138 .maxlen = sizeof(sysctl_user_reserve_kbytes), 3139 .mode = 0644, 3140 .proc_handler = proc_doulongvec_minmax, 3141 }, 3142 { 3143 .procname = "admin_reserve_kbytes", 3144 .data = &sysctl_admin_reserve_kbytes, 3145 .maxlen = sizeof(sysctl_admin_reserve_kbytes), 3146 .mode = 0644, 3147 .proc_handler = proc_doulongvec_minmax, 3148 }, 3149 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS 3150 { 3151 .procname = "mmap_rnd_bits", 3152 .data = &mmap_rnd_bits, 3153 .maxlen = sizeof(mmap_rnd_bits), 3154 .mode = 0600, 3155 .proc_handler = proc_dointvec_minmax, 3156 .extra1 = (void *)&mmap_rnd_bits_min, 3157 .extra2 = (void *)&mmap_rnd_bits_max, 3158 }, 3159 #endif 3160 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS 3161 { 3162 .procname = "mmap_rnd_compat_bits", 3163 .data = &mmap_rnd_compat_bits, 3164 .maxlen = sizeof(mmap_rnd_compat_bits), 3165 .mode = 0600, 3166 .proc_handler = proc_dointvec_minmax, 3167 .extra1 = (void *)&mmap_rnd_compat_bits_min, 3168 .extra2 = (void *)&mmap_rnd_compat_bits_max, 3169 }, 3170 #endif 3171 #ifdef CONFIG_USERFAULTFD 3172 { 3173 .procname = "unprivileged_userfaultfd", 3174 .data = &sysctl_unprivileged_userfaultfd, 3175 .maxlen = sizeof(sysctl_unprivileged_userfaultfd), 3176 .mode = 0644, 3177 .proc_handler = proc_dointvec_minmax, 3178 .extra1 = SYSCTL_ZERO, 3179 .extra2 = SYSCTL_ONE, 3180 }, 3181 #endif 3182 { } 3183 }; 3184 3185 static struct ctl_table fs_table[] = { 3186 { 3187 .procname = "inode-nr", 3188 .data = &inodes_stat, 3189 .maxlen = 2*sizeof(long), 3190 .mode = 0444, 3191 .proc_handler = proc_nr_inodes, 3192 }, 3193 { 3194 .procname = "inode-state", 3195 .data = &inodes_stat, 3196 .maxlen = 7*sizeof(long), 3197 .mode = 0444, 3198 .proc_handler = proc_nr_inodes, 3199 }, 3200 { 3201 .procname = "file-nr", 3202 .data = &files_stat, 3203 .maxlen = sizeof(files_stat), 3204 .mode = 0444, 3205 .proc_handler = proc_nr_files, 3206 }, 3207 { 3208 .procname = "file-max", 3209 .data = &files_stat.max_files, 3210 .maxlen = sizeof(files_stat.max_files), 3211 .mode = 0644, 3212 .proc_handler = proc_doulongvec_minmax, 3213 .extra1 = &zero_ul, 3214 .extra2 = &long_max, 3215 }, 3216 { 3217 .procname = "nr_open", 3218 .data = &sysctl_nr_open, 3219 .maxlen = sizeof(unsigned int), 3220 .mode = 0644, 3221 .proc_handler = proc_dointvec_minmax, 3222 .extra1 = &sysctl_nr_open_min, 3223 .extra2 = &sysctl_nr_open_max, 3224 }, 3225 { 3226 .procname = "dentry-state", 3227 .data = &dentry_stat, 3228 .maxlen = 6*sizeof(long), 3229 .mode = 0444, 3230 .proc_handler = proc_nr_dentry, 3231 }, 3232 { 3233 .procname = "overflowuid", 3234 .data = &fs_overflowuid, 3235 .maxlen = sizeof(int), 3236 .mode = 0644, 3237 .proc_handler = proc_dointvec_minmax, 3238 .extra1 = &minolduid, 3239 .extra2 = &maxolduid, 3240 }, 3241 { 3242 .procname = "overflowgid", 3243 .data = &fs_overflowgid, 3244 .maxlen = sizeof(int), 3245 .mode = 0644, 3246 .proc_handler = proc_dointvec_minmax, 3247 .extra1 = &minolduid, 3248 .extra2 = &maxolduid, 3249 }, 3250 #ifdef CONFIG_FILE_LOCKING 3251 { 3252 .procname = "leases-enable", 3253 .data = &leases_enable, 3254 .maxlen = sizeof(int), 3255 .mode = 0644, 3256 .proc_handler = proc_dointvec, 3257 }, 3258 #endif 3259 #ifdef CONFIG_DNOTIFY 3260 { 3261 .procname = "dir-notify-enable", 3262 .data = &dir_notify_enable, 3263 .maxlen = sizeof(int), 3264 .mode = 0644, 3265 .proc_handler = proc_dointvec, 3266 }, 3267 #endif 3268 #ifdef CONFIG_MMU 3269 #ifdef CONFIG_FILE_LOCKING 3270 { 3271 .procname = "lease-break-time", 3272 .data = &lease_break_time, 3273 .maxlen = sizeof(int), 3274 .mode = 0644, 3275 .proc_handler = proc_dointvec, 3276 }, 3277 #endif 3278 #ifdef CONFIG_AIO 3279 { 3280 .procname = "aio-nr", 3281 .data = &aio_nr, 3282 .maxlen = sizeof(aio_nr), 3283 .mode = 0444, 3284 .proc_handler = proc_doulongvec_minmax, 3285 }, 3286 { 3287 .procname = "aio-max-nr", 3288 .data = &aio_max_nr, 3289 .maxlen = sizeof(aio_max_nr), 3290 .mode = 0644, 3291 .proc_handler = proc_doulongvec_minmax, 3292 }, 3293 #endif /* CONFIG_AIO */ 3294 #ifdef CONFIG_INOTIFY_USER 3295 { 3296 .procname = "inotify", 3297 .mode = 0555, 3298 .child = inotify_table, 3299 }, 3300 #endif 3301 #ifdef CONFIG_FANOTIFY 3302 { 3303 .procname = "fanotify", 3304 .mode = 0555, 3305 .child = fanotify_table, 3306 }, 3307 #endif 3308 #ifdef CONFIG_EPOLL 3309 { 3310 .procname = "epoll", 3311 .mode = 0555, 3312 .child = epoll_table, 3313 }, 3314 #endif 3315 #endif 3316 { 3317 .procname = "protected_symlinks", 3318 .data = &sysctl_protected_symlinks, 3319 .maxlen = sizeof(int), 3320 .mode = 0600, 3321 .proc_handler = proc_dointvec_minmax, 3322 .extra1 = SYSCTL_ZERO, 3323 .extra2 = SYSCTL_ONE, 3324 }, 3325 { 3326 .procname = "protected_hardlinks", 3327 .data = &sysctl_protected_hardlinks, 3328 .maxlen = sizeof(int), 3329 .mode = 0600, 3330 .proc_handler = proc_dointvec_minmax, 3331 .extra1 = SYSCTL_ZERO, 3332 .extra2 = SYSCTL_ONE, 3333 }, 3334 { 3335 .procname = "protected_fifos", 3336 .data = &sysctl_protected_fifos, 3337 .maxlen = sizeof(int), 3338 .mode = 0600, 3339 .proc_handler = proc_dointvec_minmax, 3340 .extra1 = SYSCTL_ZERO, 3341 .extra2 = &two, 3342 }, 3343 { 3344 .procname = "protected_regular", 3345 .data = &sysctl_protected_regular, 3346 .maxlen = sizeof(int), 3347 .mode = 0600, 3348 .proc_handler = proc_dointvec_minmax, 3349 .extra1 = SYSCTL_ZERO, 3350 .extra2 = &two, 3351 }, 3352 { 3353 .procname = "suid_dumpable", 3354 .data = &suid_dumpable, 3355 .maxlen = sizeof(int), 3356 .mode = 0644, 3357 .proc_handler = proc_dointvec_minmax_coredump, 3358 .extra1 = SYSCTL_ZERO, 3359 .extra2 = &two, 3360 }, 3361 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE) 3362 { 3363 .procname = "binfmt_misc", 3364 .mode = 0555, 3365 .child = sysctl_mount_point, 3366 }, 3367 #endif 3368 { 3369 .procname = "pipe-max-size", 3370 .data = &pipe_max_size, 3371 .maxlen = sizeof(pipe_max_size), 3372 .mode = 0644, 3373 .proc_handler = proc_dopipe_max_size, 3374 }, 3375 { 3376 .procname = "pipe-user-pages-hard", 3377 .data = &pipe_user_pages_hard, 3378 .maxlen = sizeof(pipe_user_pages_hard), 3379 .mode = 0644, 3380 .proc_handler = proc_doulongvec_minmax, 3381 }, 3382 { 3383 .procname = "pipe-user-pages-soft", 3384 .data = &pipe_user_pages_soft, 3385 .maxlen = sizeof(pipe_user_pages_soft), 3386 .mode = 0644, 3387 .proc_handler = proc_doulongvec_minmax, 3388 }, 3389 { 3390 .procname = "mount-max", 3391 .data = &sysctl_mount_max, 3392 .maxlen = sizeof(unsigned int), 3393 .mode = 0644, 3394 .proc_handler = proc_dointvec_minmax, 3395 .extra1 = SYSCTL_ONE, 3396 }, 3397 { } 3398 }; 3399 3400 static struct ctl_table debug_table[] = { 3401 #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE 3402 { 3403 .procname = "exception-trace", 3404 .data = &show_unhandled_signals, 3405 .maxlen = sizeof(int), 3406 .mode = 0644, 3407 .proc_handler = proc_dointvec 3408 }, 3409 #endif 3410 #if defined(CONFIG_OPTPROBES) 3411 { 3412 .procname = "kprobes-optimization", 3413 .data = &sysctl_kprobes_optimization, 3414 .maxlen = sizeof(int), 3415 .mode = 0644, 3416 .proc_handler = proc_kprobes_optimization_handler, 3417 .extra1 = SYSCTL_ZERO, 3418 .extra2 = SYSCTL_ONE, 3419 }, 3420 #endif 3421 { } 3422 }; 3423 3424 static struct ctl_table dev_table[] = { 3425 { } 3426 }; 3427 3428 static struct ctl_table sysctl_base_table[] = { 3429 { 3430 .procname = "kernel", 3431 .mode = 0555, 3432 .child = kern_table, 3433 }, 3434 { 3435 .procname = "vm", 3436 .mode = 0555, 3437 .child = vm_table, 3438 }, 3439 { 3440 .procname = "fs", 3441 .mode = 0555, 3442 .child = fs_table, 3443 }, 3444 { 3445 .procname = "debug", 3446 .mode = 0555, 3447 .child = debug_table, 3448 }, 3449 { 3450 .procname = "dev", 3451 .mode = 0555, 3452 .child = dev_table, 3453 }, 3454 { } 3455 }; 3456 3457 int __init sysctl_init(void) 3458 { 3459 struct ctl_table_header *hdr; 3460 3461 hdr = register_sysctl_table(sysctl_base_table); 3462 kmemleak_not_leak(hdr); 3463 return 0; 3464 } 3465 #endif /* CONFIG_SYSCTL */ 3466 /* 3467 * No sense putting this after each symbol definition, twice, 3468 * exception granted :-) 3469 */ 3470 EXPORT_SYMBOL(proc_dobool); 3471 EXPORT_SYMBOL(proc_dointvec); 3472 EXPORT_SYMBOL(proc_douintvec); 3473 EXPORT_SYMBOL(proc_dointvec_jiffies); 3474 EXPORT_SYMBOL(proc_dointvec_minmax); 3475 EXPORT_SYMBOL_GPL(proc_douintvec_minmax); 3476 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies); 3477 EXPORT_SYMBOL(proc_dointvec_ms_jiffies); 3478 EXPORT_SYMBOL(proc_dostring); 3479 EXPORT_SYMBOL(proc_doulongvec_minmax); 3480 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax); 3481 EXPORT_SYMBOL(proc_do_large_bitmap); 3482