1 /* 2 * sysctl.c: General linux system control interface 3 * 4 * Begun 24 March 1995, Stephen Tweedie 5 * Added /proc support, Dec 1995 6 * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas. 7 * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver. 8 * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver. 9 * Dynamic registration fixes, Stephen Tweedie. 10 * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn. 11 * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris 12 * Horn. 13 * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer. 14 * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer. 15 * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill 16 * Wendling. 17 * The list_for_each() macro wasn't appropriate for the sysctl loop. 18 * Removed it and replaced it with older style, 03/23/00, Bill Wendling 19 */ 20 21 #include <linux/module.h> 22 #include <linux/mm.h> 23 #include <linux/swap.h> 24 #include <linux/slab.h> 25 #include <linux/sysctl.h> 26 #include <linux/proc_fs.h> 27 #include <linux/capability.h> 28 #include <linux/ctype.h> 29 #include <linux/utsname.h> 30 #include <linux/capability.h> 31 #include <linux/smp_lock.h> 32 #include <linux/init.h> 33 #include <linux/kernel.h> 34 #include <linux/kobject.h> 35 #include <linux/net.h> 36 #include <linux/sysrq.h> 37 #include <linux/highuid.h> 38 #include <linux/writeback.h> 39 #include <linux/hugetlb.h> 40 #include <linux/security.h> 41 #include <linux/initrd.h> 42 #include <linux/times.h> 43 #include <linux/limits.h> 44 #include <linux/dcache.h> 45 #include <linux/syscalls.h> 46 #include <linux/nfs_fs.h> 47 #include <linux/acpi.h> 48 49 #include <asm/uaccess.h> 50 #include <asm/processor.h> 51 52 extern int proc_nr_files(ctl_table *table, int write, struct file *filp, 53 void __user *buffer, size_t *lenp, loff_t *ppos); 54 55 #ifdef CONFIG_X86 56 #include <asm/nmi.h> 57 #endif 58 59 #if defined(CONFIG_SYSCTL) 60 61 /* External variables not in a header file. */ 62 extern int C_A_D; 63 extern int sysctl_overcommit_memory; 64 extern int sysctl_overcommit_ratio; 65 extern int sysctl_panic_on_oom; 66 extern int max_threads; 67 extern int sysrq_enabled; 68 extern int core_uses_pid; 69 extern int suid_dumpable; 70 extern char core_pattern[]; 71 extern int pid_max; 72 extern int min_free_kbytes; 73 extern int printk_ratelimit_jiffies; 74 extern int printk_ratelimit_burst; 75 extern int pid_max_min, pid_max_max; 76 extern int sysctl_drop_caches; 77 extern int percpu_pagelist_fraction; 78 extern int compat_log; 79 80 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ 81 static int maxolduid = 65535; 82 static int minolduid; 83 static int min_percpu_pagelist_fract = 8; 84 85 static int ngroups_max = NGROUPS_MAX; 86 87 #ifdef CONFIG_KMOD 88 extern char modprobe_path[]; 89 #endif 90 #ifdef CONFIG_CHR_DEV_SG 91 extern int sg_big_buff; 92 #endif 93 #ifdef CONFIG_SYSVIPC 94 static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp, 95 void __user *buffer, size_t *lenp, loff_t *ppos); 96 #endif 97 98 #ifdef __sparc__ 99 extern char reboot_command []; 100 extern int stop_a_enabled; 101 extern int scons_pwroff; 102 #endif 103 104 #ifdef __hppa__ 105 extern int pwrsw_enabled; 106 extern int unaligned_enabled; 107 #endif 108 109 #ifdef CONFIG_S390 110 #ifdef CONFIG_MATHEMU 111 extern int sysctl_ieee_emulation_warnings; 112 #endif 113 extern int sysctl_userprocess_debug; 114 extern int spin_retry; 115 #endif 116 117 extern int sysctl_hz_timer; 118 119 #ifdef CONFIG_BSD_PROCESS_ACCT 120 extern int acct_parm[]; 121 #endif 122 123 #ifdef CONFIG_IA64 124 extern int no_unaligned_warning; 125 #endif 126 127 #ifdef CONFIG_RT_MUTEXES 128 extern int max_lock_depth; 129 #endif 130 131 #ifdef CONFIG_SYSCTL_SYSCALL 132 static int parse_table(int __user *, int, void __user *, size_t __user *, 133 void __user *, size_t, ctl_table *, void **); 134 #endif 135 136 static int proc_do_uts_string(ctl_table *table, int write, struct file *filp, 137 void __user *buffer, size_t *lenp, loff_t *ppos); 138 139 #ifdef CONFIG_PROC_SYSCTL 140 static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp, 141 void __user *buffer, size_t *lenp, loff_t *ppos); 142 #endif 143 144 static ctl_table root_table[]; 145 static struct ctl_table_header root_table_header = 146 { root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) }; 147 148 static ctl_table kern_table[]; 149 static ctl_table vm_table[]; 150 static ctl_table fs_table[]; 151 static ctl_table debug_table[]; 152 static ctl_table dev_table[]; 153 extern ctl_table random_table[]; 154 #ifdef CONFIG_UNIX98_PTYS 155 extern ctl_table pty_table[]; 156 #endif 157 #ifdef CONFIG_INOTIFY_USER 158 extern ctl_table inotify_table[]; 159 #endif 160 161 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT 162 int sysctl_legacy_va_layout; 163 #endif 164 165 /* /proc declarations: */ 166 167 #ifdef CONFIG_PROC_SYSCTL 168 169 static ssize_t proc_readsys(struct file *, char __user *, size_t, loff_t *); 170 static ssize_t proc_writesys(struct file *, const char __user *, size_t, loff_t *); 171 static int proc_opensys(struct inode *, struct file *); 172 173 const struct file_operations proc_sys_file_operations = { 174 .open = proc_opensys, 175 .read = proc_readsys, 176 .write = proc_writesys, 177 }; 178 179 extern struct proc_dir_entry *proc_sys_root; 180 181 static void register_proc_table(ctl_table *, struct proc_dir_entry *, void *); 182 static void unregister_proc_table(ctl_table *, struct proc_dir_entry *); 183 #endif 184 185 /* The default sysctl tables: */ 186 187 static ctl_table root_table[] = { 188 { 189 .ctl_name = CTL_KERN, 190 .procname = "kernel", 191 .mode = 0555, 192 .child = kern_table, 193 }, 194 { 195 .ctl_name = CTL_VM, 196 .procname = "vm", 197 .mode = 0555, 198 .child = vm_table, 199 }, 200 #ifdef CONFIG_NET 201 { 202 .ctl_name = CTL_NET, 203 .procname = "net", 204 .mode = 0555, 205 .child = net_table, 206 }, 207 #endif 208 { 209 .ctl_name = CTL_FS, 210 .procname = "fs", 211 .mode = 0555, 212 .child = fs_table, 213 }, 214 { 215 .ctl_name = CTL_DEBUG, 216 .procname = "debug", 217 .mode = 0555, 218 .child = debug_table, 219 }, 220 { 221 .ctl_name = CTL_DEV, 222 .procname = "dev", 223 .mode = 0555, 224 .child = dev_table, 225 }, 226 227 { .ctl_name = 0 } 228 }; 229 230 static ctl_table kern_table[] = { 231 #ifndef CONFIG_UTS_NS 232 { 233 .ctl_name = KERN_OSTYPE, 234 .procname = "ostype", 235 .data = init_uts_ns.name.sysname, 236 .maxlen = sizeof(init_uts_ns.name.sysname), 237 .mode = 0444, 238 .proc_handler = &proc_do_uts_string, 239 .strategy = &sysctl_string, 240 }, 241 { 242 .ctl_name = KERN_OSRELEASE, 243 .procname = "osrelease", 244 .data = init_uts_ns.name.release, 245 .maxlen = sizeof(init_uts_ns.name.release), 246 .mode = 0444, 247 .proc_handler = &proc_do_uts_string, 248 .strategy = &sysctl_string, 249 }, 250 { 251 .ctl_name = KERN_VERSION, 252 .procname = "version", 253 .data = init_uts_ns.name.version, 254 .maxlen = sizeof(init_uts_ns.name.version), 255 .mode = 0444, 256 .proc_handler = &proc_do_uts_string, 257 .strategy = &sysctl_string, 258 }, 259 { 260 .ctl_name = KERN_NODENAME, 261 .procname = "hostname", 262 .data = init_uts_ns.name.nodename, 263 .maxlen = sizeof(init_uts_ns.name.nodename), 264 .mode = 0644, 265 .proc_handler = &proc_do_uts_string, 266 .strategy = &sysctl_string, 267 }, 268 { 269 .ctl_name = KERN_DOMAINNAME, 270 .procname = "domainname", 271 .data = init_uts_ns.name.domainname, 272 .maxlen = sizeof(init_uts_ns.name.domainname), 273 .mode = 0644, 274 .proc_handler = &proc_do_uts_string, 275 .strategy = &sysctl_string, 276 }, 277 #else /* !CONFIG_UTS_NS */ 278 { 279 .ctl_name = KERN_OSTYPE, 280 .procname = "ostype", 281 .data = NULL, 282 /* could maybe use __NEW_UTS_LEN here? */ 283 .maxlen = FIELD_SIZEOF(struct new_utsname, sysname), 284 .mode = 0444, 285 .proc_handler = &proc_do_uts_string, 286 .strategy = &sysctl_string, 287 }, 288 { 289 .ctl_name = KERN_OSRELEASE, 290 .procname = "osrelease", 291 .data = NULL, 292 .maxlen = FIELD_SIZEOF(struct new_utsname, release), 293 .mode = 0444, 294 .proc_handler = &proc_do_uts_string, 295 .strategy = &sysctl_string, 296 }, 297 { 298 .ctl_name = KERN_VERSION, 299 .procname = "version", 300 .data = NULL, 301 .maxlen = FIELD_SIZEOF(struct new_utsname, version), 302 .mode = 0444, 303 .proc_handler = &proc_do_uts_string, 304 .strategy = &sysctl_string, 305 }, 306 { 307 .ctl_name = KERN_NODENAME, 308 .procname = "hostname", 309 .data = NULL, 310 .maxlen = FIELD_SIZEOF(struct new_utsname, nodename), 311 .mode = 0644, 312 .proc_handler = &proc_do_uts_string, 313 .strategy = &sysctl_string, 314 }, 315 { 316 .ctl_name = KERN_DOMAINNAME, 317 .procname = "domainname", 318 .data = NULL, 319 .maxlen = FIELD_SIZEOF(struct new_utsname, domainname), 320 .mode = 0644, 321 .proc_handler = &proc_do_uts_string, 322 .strategy = &sysctl_string, 323 }, 324 #endif /* !CONFIG_UTS_NS */ 325 { 326 .ctl_name = KERN_PANIC, 327 .procname = "panic", 328 .data = &panic_timeout, 329 .maxlen = sizeof(int), 330 .mode = 0644, 331 .proc_handler = &proc_dointvec, 332 }, 333 { 334 .ctl_name = KERN_CORE_USES_PID, 335 .procname = "core_uses_pid", 336 .data = &core_uses_pid, 337 .maxlen = sizeof(int), 338 .mode = 0644, 339 .proc_handler = &proc_dointvec, 340 }, 341 { 342 .ctl_name = KERN_CORE_PATTERN, 343 .procname = "core_pattern", 344 .data = core_pattern, 345 .maxlen = 128, 346 .mode = 0644, 347 .proc_handler = &proc_dostring, 348 .strategy = &sysctl_string, 349 }, 350 { 351 .ctl_name = KERN_TAINTED, 352 .procname = "tainted", 353 .data = &tainted, 354 .maxlen = sizeof(int), 355 .mode = 0444, 356 .proc_handler = &proc_dointvec, 357 }, 358 { 359 .ctl_name = KERN_CAP_BSET, 360 .procname = "cap-bound", 361 .data = &cap_bset, 362 .maxlen = sizeof(kernel_cap_t), 363 .mode = 0600, 364 .proc_handler = &proc_dointvec_bset, 365 }, 366 #ifdef CONFIG_BLK_DEV_INITRD 367 { 368 .ctl_name = KERN_REALROOTDEV, 369 .procname = "real-root-dev", 370 .data = &real_root_dev, 371 .maxlen = sizeof(int), 372 .mode = 0644, 373 .proc_handler = &proc_dointvec, 374 }, 375 #endif 376 #ifdef __sparc__ 377 { 378 .ctl_name = KERN_SPARC_REBOOT, 379 .procname = "reboot-cmd", 380 .data = reboot_command, 381 .maxlen = 256, 382 .mode = 0644, 383 .proc_handler = &proc_dostring, 384 .strategy = &sysctl_string, 385 }, 386 { 387 .ctl_name = KERN_SPARC_STOP_A, 388 .procname = "stop-a", 389 .data = &stop_a_enabled, 390 .maxlen = sizeof (int), 391 .mode = 0644, 392 .proc_handler = &proc_dointvec, 393 }, 394 { 395 .ctl_name = KERN_SPARC_SCONS_PWROFF, 396 .procname = "scons-poweroff", 397 .data = &scons_pwroff, 398 .maxlen = sizeof (int), 399 .mode = 0644, 400 .proc_handler = &proc_dointvec, 401 }, 402 #endif 403 #ifdef __hppa__ 404 { 405 .ctl_name = KERN_HPPA_PWRSW, 406 .procname = "soft-power", 407 .data = &pwrsw_enabled, 408 .maxlen = sizeof (int), 409 .mode = 0644, 410 .proc_handler = &proc_dointvec, 411 }, 412 { 413 .ctl_name = KERN_HPPA_UNALIGNED, 414 .procname = "unaligned-trap", 415 .data = &unaligned_enabled, 416 .maxlen = sizeof (int), 417 .mode = 0644, 418 .proc_handler = &proc_dointvec, 419 }, 420 #endif 421 { 422 .ctl_name = KERN_CTLALTDEL, 423 .procname = "ctrl-alt-del", 424 .data = &C_A_D, 425 .maxlen = sizeof(int), 426 .mode = 0644, 427 .proc_handler = &proc_dointvec, 428 }, 429 { 430 .ctl_name = KERN_PRINTK, 431 .procname = "printk", 432 .data = &console_loglevel, 433 .maxlen = 4*sizeof(int), 434 .mode = 0644, 435 .proc_handler = &proc_dointvec, 436 }, 437 #ifdef CONFIG_KMOD 438 { 439 .ctl_name = KERN_MODPROBE, 440 .procname = "modprobe", 441 .data = &modprobe_path, 442 .maxlen = KMOD_PATH_LEN, 443 .mode = 0644, 444 .proc_handler = &proc_dostring, 445 .strategy = &sysctl_string, 446 }, 447 #endif 448 #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET) 449 { 450 .ctl_name = KERN_HOTPLUG, 451 .procname = "hotplug", 452 .data = &uevent_helper, 453 .maxlen = UEVENT_HELPER_PATH_LEN, 454 .mode = 0644, 455 .proc_handler = &proc_dostring, 456 .strategy = &sysctl_string, 457 }, 458 #endif 459 #ifdef CONFIG_CHR_DEV_SG 460 { 461 .ctl_name = KERN_SG_BIG_BUFF, 462 .procname = "sg-big-buff", 463 .data = &sg_big_buff, 464 .maxlen = sizeof (int), 465 .mode = 0444, 466 .proc_handler = &proc_dointvec, 467 }, 468 #endif 469 #ifdef CONFIG_BSD_PROCESS_ACCT 470 { 471 .ctl_name = KERN_ACCT, 472 .procname = "acct", 473 .data = &acct_parm, 474 .maxlen = 3*sizeof(int), 475 .mode = 0644, 476 .proc_handler = &proc_dointvec, 477 }, 478 #endif 479 #ifdef CONFIG_SYSVIPC 480 { 481 .ctl_name = KERN_SHMMAX, 482 .procname = "shmmax", 483 .data = NULL, 484 .maxlen = sizeof (size_t), 485 .mode = 0644, 486 .proc_handler = &proc_do_ipc_string, 487 }, 488 { 489 .ctl_name = KERN_SHMALL, 490 .procname = "shmall", 491 .data = NULL, 492 .maxlen = sizeof (size_t), 493 .mode = 0644, 494 .proc_handler = &proc_do_ipc_string, 495 }, 496 { 497 .ctl_name = KERN_SHMMNI, 498 .procname = "shmmni", 499 .data = NULL, 500 .maxlen = sizeof (int), 501 .mode = 0644, 502 .proc_handler = &proc_do_ipc_string, 503 }, 504 { 505 .ctl_name = KERN_MSGMAX, 506 .procname = "msgmax", 507 .data = NULL, 508 .maxlen = sizeof (int), 509 .mode = 0644, 510 .proc_handler = &proc_do_ipc_string, 511 }, 512 { 513 .ctl_name = KERN_MSGMNI, 514 .procname = "msgmni", 515 .data = NULL, 516 .maxlen = sizeof (int), 517 .mode = 0644, 518 .proc_handler = &proc_do_ipc_string, 519 }, 520 { 521 .ctl_name = KERN_MSGMNB, 522 .procname = "msgmnb", 523 .data = NULL, 524 .maxlen = sizeof (int), 525 .mode = 0644, 526 .proc_handler = &proc_do_ipc_string, 527 }, 528 { 529 .ctl_name = KERN_SEM, 530 .procname = "sem", 531 .data = NULL, 532 .maxlen = 4*sizeof (int), 533 .mode = 0644, 534 .proc_handler = &proc_do_ipc_string, 535 }, 536 #endif 537 #ifdef CONFIG_MAGIC_SYSRQ 538 { 539 .ctl_name = KERN_SYSRQ, 540 .procname = "sysrq", 541 .data = &sysrq_enabled, 542 .maxlen = sizeof (int), 543 .mode = 0644, 544 .proc_handler = &proc_dointvec, 545 }, 546 #endif 547 #ifdef CONFIG_PROC_SYSCTL 548 { 549 .ctl_name = KERN_CADPID, 550 .procname = "cad_pid", 551 .data = NULL, 552 .maxlen = sizeof (int), 553 .mode = 0600, 554 .proc_handler = &proc_do_cad_pid, 555 }, 556 #endif 557 { 558 .ctl_name = KERN_MAX_THREADS, 559 .procname = "threads-max", 560 .data = &max_threads, 561 .maxlen = sizeof(int), 562 .mode = 0644, 563 .proc_handler = &proc_dointvec, 564 }, 565 { 566 .ctl_name = KERN_RANDOM, 567 .procname = "random", 568 .mode = 0555, 569 .child = random_table, 570 }, 571 #ifdef CONFIG_UNIX98_PTYS 572 { 573 .ctl_name = KERN_PTY, 574 .procname = "pty", 575 .mode = 0555, 576 .child = pty_table, 577 }, 578 #endif 579 { 580 .ctl_name = KERN_OVERFLOWUID, 581 .procname = "overflowuid", 582 .data = &overflowuid, 583 .maxlen = sizeof(int), 584 .mode = 0644, 585 .proc_handler = &proc_dointvec_minmax, 586 .strategy = &sysctl_intvec, 587 .extra1 = &minolduid, 588 .extra2 = &maxolduid, 589 }, 590 { 591 .ctl_name = KERN_OVERFLOWGID, 592 .procname = "overflowgid", 593 .data = &overflowgid, 594 .maxlen = sizeof(int), 595 .mode = 0644, 596 .proc_handler = &proc_dointvec_minmax, 597 .strategy = &sysctl_intvec, 598 .extra1 = &minolduid, 599 .extra2 = &maxolduid, 600 }, 601 #ifdef CONFIG_S390 602 #ifdef CONFIG_MATHEMU 603 { 604 .ctl_name = KERN_IEEE_EMULATION_WARNINGS, 605 .procname = "ieee_emulation_warnings", 606 .data = &sysctl_ieee_emulation_warnings, 607 .maxlen = sizeof(int), 608 .mode = 0644, 609 .proc_handler = &proc_dointvec, 610 }, 611 #endif 612 #ifdef CONFIG_NO_IDLE_HZ 613 { 614 .ctl_name = KERN_HZ_TIMER, 615 .procname = "hz_timer", 616 .data = &sysctl_hz_timer, 617 .maxlen = sizeof(int), 618 .mode = 0644, 619 .proc_handler = &proc_dointvec, 620 }, 621 #endif 622 { 623 .ctl_name = KERN_S390_USER_DEBUG_LOGGING, 624 .procname = "userprocess_debug", 625 .data = &sysctl_userprocess_debug, 626 .maxlen = sizeof(int), 627 .mode = 0644, 628 .proc_handler = &proc_dointvec, 629 }, 630 #endif 631 { 632 .ctl_name = KERN_PIDMAX, 633 .procname = "pid_max", 634 .data = &pid_max, 635 .maxlen = sizeof (int), 636 .mode = 0644, 637 .proc_handler = &proc_dointvec_minmax, 638 .strategy = sysctl_intvec, 639 .extra1 = &pid_max_min, 640 .extra2 = &pid_max_max, 641 }, 642 { 643 .ctl_name = KERN_PANIC_ON_OOPS, 644 .procname = "panic_on_oops", 645 .data = &panic_on_oops, 646 .maxlen = sizeof(int), 647 .mode = 0644, 648 .proc_handler = &proc_dointvec, 649 }, 650 { 651 .ctl_name = KERN_PRINTK_RATELIMIT, 652 .procname = "printk_ratelimit", 653 .data = &printk_ratelimit_jiffies, 654 .maxlen = sizeof(int), 655 .mode = 0644, 656 .proc_handler = &proc_dointvec_jiffies, 657 .strategy = &sysctl_jiffies, 658 }, 659 { 660 .ctl_name = KERN_PRINTK_RATELIMIT_BURST, 661 .procname = "printk_ratelimit_burst", 662 .data = &printk_ratelimit_burst, 663 .maxlen = sizeof(int), 664 .mode = 0644, 665 .proc_handler = &proc_dointvec, 666 }, 667 { 668 .ctl_name = KERN_NGROUPS_MAX, 669 .procname = "ngroups_max", 670 .data = &ngroups_max, 671 .maxlen = sizeof (int), 672 .mode = 0444, 673 .proc_handler = &proc_dointvec, 674 }, 675 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) 676 { 677 .ctl_name = KERN_UNKNOWN_NMI_PANIC, 678 .procname = "unknown_nmi_panic", 679 .data = &unknown_nmi_panic, 680 .maxlen = sizeof (int), 681 .mode = 0644, 682 .proc_handler = &proc_dointvec, 683 }, 684 { 685 .ctl_name = KERN_NMI_WATCHDOG, 686 .procname = "nmi_watchdog", 687 .data = &nmi_watchdog_enabled, 688 .maxlen = sizeof (int), 689 .mode = 0644, 690 .proc_handler = &proc_nmi_enabled, 691 }, 692 #endif 693 #if defined(CONFIG_X86) 694 { 695 .ctl_name = KERN_PANIC_ON_NMI, 696 .procname = "panic_on_unrecovered_nmi", 697 .data = &panic_on_unrecovered_nmi, 698 .maxlen = sizeof(int), 699 .mode = 0644, 700 .proc_handler = &proc_dointvec, 701 }, 702 { 703 .ctl_name = KERN_BOOTLOADER_TYPE, 704 .procname = "bootloader_type", 705 .data = &bootloader_type, 706 .maxlen = sizeof (int), 707 .mode = 0444, 708 .proc_handler = &proc_dointvec, 709 }, 710 #endif 711 #if defined(CONFIG_MMU) 712 { 713 .ctl_name = KERN_RANDOMIZE, 714 .procname = "randomize_va_space", 715 .data = &randomize_va_space, 716 .maxlen = sizeof(int), 717 .mode = 0644, 718 .proc_handler = &proc_dointvec, 719 }, 720 #endif 721 #if defined(CONFIG_S390) && defined(CONFIG_SMP) 722 { 723 .ctl_name = KERN_SPIN_RETRY, 724 .procname = "spin_retry", 725 .data = &spin_retry, 726 .maxlen = sizeof (int), 727 .mode = 0644, 728 .proc_handler = &proc_dointvec, 729 }, 730 #endif 731 #ifdef CONFIG_ACPI_SLEEP 732 { 733 .ctl_name = KERN_ACPI_VIDEO_FLAGS, 734 .procname = "acpi_video_flags", 735 .data = &acpi_video_flags, 736 .maxlen = sizeof (unsigned long), 737 .mode = 0644, 738 .proc_handler = &proc_doulongvec_minmax, 739 }, 740 #endif 741 #ifdef CONFIG_IA64 742 { 743 .ctl_name = KERN_IA64_UNALIGNED, 744 .procname = "ignore-unaligned-usertrap", 745 .data = &no_unaligned_warning, 746 .maxlen = sizeof (int), 747 .mode = 0644, 748 .proc_handler = &proc_dointvec, 749 }, 750 #endif 751 #ifdef CONFIG_COMPAT 752 { 753 .ctl_name = KERN_COMPAT_LOG, 754 .procname = "compat-log", 755 .data = &compat_log, 756 .maxlen = sizeof (int), 757 .mode = 0644, 758 .proc_handler = &proc_dointvec, 759 }, 760 #endif 761 #ifdef CONFIG_RT_MUTEXES 762 { 763 .ctl_name = KERN_MAX_LOCK_DEPTH, 764 .procname = "max_lock_depth", 765 .data = &max_lock_depth, 766 .maxlen = sizeof(int), 767 .mode = 0644, 768 .proc_handler = &proc_dointvec, 769 }, 770 #endif 771 772 { .ctl_name = 0 } 773 }; 774 775 /* Constants for minimum and maximum testing in vm_table. 776 We use these as one-element integer vectors. */ 777 static int zero; 778 static int one_hundred = 100; 779 780 781 static ctl_table vm_table[] = { 782 { 783 .ctl_name = VM_OVERCOMMIT_MEMORY, 784 .procname = "overcommit_memory", 785 .data = &sysctl_overcommit_memory, 786 .maxlen = sizeof(sysctl_overcommit_memory), 787 .mode = 0644, 788 .proc_handler = &proc_dointvec, 789 }, 790 { 791 .ctl_name = VM_PANIC_ON_OOM, 792 .procname = "panic_on_oom", 793 .data = &sysctl_panic_on_oom, 794 .maxlen = sizeof(sysctl_panic_on_oom), 795 .mode = 0644, 796 .proc_handler = &proc_dointvec, 797 }, 798 { 799 .ctl_name = VM_OVERCOMMIT_RATIO, 800 .procname = "overcommit_ratio", 801 .data = &sysctl_overcommit_ratio, 802 .maxlen = sizeof(sysctl_overcommit_ratio), 803 .mode = 0644, 804 .proc_handler = &proc_dointvec, 805 }, 806 { 807 .ctl_name = VM_PAGE_CLUSTER, 808 .procname = "page-cluster", 809 .data = &page_cluster, 810 .maxlen = sizeof(int), 811 .mode = 0644, 812 .proc_handler = &proc_dointvec, 813 }, 814 { 815 .ctl_name = VM_DIRTY_BACKGROUND, 816 .procname = "dirty_background_ratio", 817 .data = &dirty_background_ratio, 818 .maxlen = sizeof(dirty_background_ratio), 819 .mode = 0644, 820 .proc_handler = &proc_dointvec_minmax, 821 .strategy = &sysctl_intvec, 822 .extra1 = &zero, 823 .extra2 = &one_hundred, 824 }, 825 { 826 .ctl_name = VM_DIRTY_RATIO, 827 .procname = "dirty_ratio", 828 .data = &vm_dirty_ratio, 829 .maxlen = sizeof(vm_dirty_ratio), 830 .mode = 0644, 831 .proc_handler = &proc_dointvec_minmax, 832 .strategy = &sysctl_intvec, 833 .extra1 = &zero, 834 .extra2 = &one_hundred, 835 }, 836 { 837 .ctl_name = VM_DIRTY_WB_CS, 838 .procname = "dirty_writeback_centisecs", 839 .data = &dirty_writeback_interval, 840 .maxlen = sizeof(dirty_writeback_interval), 841 .mode = 0644, 842 .proc_handler = &dirty_writeback_centisecs_handler, 843 }, 844 { 845 .ctl_name = VM_DIRTY_EXPIRE_CS, 846 .procname = "dirty_expire_centisecs", 847 .data = &dirty_expire_interval, 848 .maxlen = sizeof(dirty_expire_interval), 849 .mode = 0644, 850 .proc_handler = &proc_dointvec_userhz_jiffies, 851 }, 852 { 853 .ctl_name = VM_NR_PDFLUSH_THREADS, 854 .procname = "nr_pdflush_threads", 855 .data = &nr_pdflush_threads, 856 .maxlen = sizeof nr_pdflush_threads, 857 .mode = 0444 /* read-only*/, 858 .proc_handler = &proc_dointvec, 859 }, 860 { 861 .ctl_name = VM_SWAPPINESS, 862 .procname = "swappiness", 863 .data = &vm_swappiness, 864 .maxlen = sizeof(vm_swappiness), 865 .mode = 0644, 866 .proc_handler = &proc_dointvec_minmax, 867 .strategy = &sysctl_intvec, 868 .extra1 = &zero, 869 .extra2 = &one_hundred, 870 }, 871 #ifdef CONFIG_HUGETLB_PAGE 872 { 873 .ctl_name = VM_HUGETLB_PAGES, 874 .procname = "nr_hugepages", 875 .data = &max_huge_pages, 876 .maxlen = sizeof(unsigned long), 877 .mode = 0644, 878 .proc_handler = &hugetlb_sysctl_handler, 879 .extra1 = (void *)&hugetlb_zero, 880 .extra2 = (void *)&hugetlb_infinity, 881 }, 882 { 883 .ctl_name = VM_HUGETLB_GROUP, 884 .procname = "hugetlb_shm_group", 885 .data = &sysctl_hugetlb_shm_group, 886 .maxlen = sizeof(gid_t), 887 .mode = 0644, 888 .proc_handler = &proc_dointvec, 889 }, 890 #endif 891 { 892 .ctl_name = VM_LOWMEM_RESERVE_RATIO, 893 .procname = "lowmem_reserve_ratio", 894 .data = &sysctl_lowmem_reserve_ratio, 895 .maxlen = sizeof(sysctl_lowmem_reserve_ratio), 896 .mode = 0644, 897 .proc_handler = &lowmem_reserve_ratio_sysctl_handler, 898 .strategy = &sysctl_intvec, 899 }, 900 { 901 .ctl_name = VM_DROP_PAGECACHE, 902 .procname = "drop_caches", 903 .data = &sysctl_drop_caches, 904 .maxlen = sizeof(int), 905 .mode = 0644, 906 .proc_handler = drop_caches_sysctl_handler, 907 .strategy = &sysctl_intvec, 908 }, 909 { 910 .ctl_name = VM_MIN_FREE_KBYTES, 911 .procname = "min_free_kbytes", 912 .data = &min_free_kbytes, 913 .maxlen = sizeof(min_free_kbytes), 914 .mode = 0644, 915 .proc_handler = &min_free_kbytes_sysctl_handler, 916 .strategy = &sysctl_intvec, 917 .extra1 = &zero, 918 }, 919 { 920 .ctl_name = VM_PERCPU_PAGELIST_FRACTION, 921 .procname = "percpu_pagelist_fraction", 922 .data = &percpu_pagelist_fraction, 923 .maxlen = sizeof(percpu_pagelist_fraction), 924 .mode = 0644, 925 .proc_handler = &percpu_pagelist_fraction_sysctl_handler, 926 .strategy = &sysctl_intvec, 927 .extra1 = &min_percpu_pagelist_fract, 928 }, 929 #ifdef CONFIG_MMU 930 { 931 .ctl_name = VM_MAX_MAP_COUNT, 932 .procname = "max_map_count", 933 .data = &sysctl_max_map_count, 934 .maxlen = sizeof(sysctl_max_map_count), 935 .mode = 0644, 936 .proc_handler = &proc_dointvec 937 }, 938 #endif 939 { 940 .ctl_name = VM_LAPTOP_MODE, 941 .procname = "laptop_mode", 942 .data = &laptop_mode, 943 .maxlen = sizeof(laptop_mode), 944 .mode = 0644, 945 .proc_handler = &proc_dointvec_jiffies, 946 .strategy = &sysctl_jiffies, 947 }, 948 { 949 .ctl_name = VM_BLOCK_DUMP, 950 .procname = "block_dump", 951 .data = &block_dump, 952 .maxlen = sizeof(block_dump), 953 .mode = 0644, 954 .proc_handler = &proc_dointvec, 955 .strategy = &sysctl_intvec, 956 .extra1 = &zero, 957 }, 958 { 959 .ctl_name = VM_VFS_CACHE_PRESSURE, 960 .procname = "vfs_cache_pressure", 961 .data = &sysctl_vfs_cache_pressure, 962 .maxlen = sizeof(sysctl_vfs_cache_pressure), 963 .mode = 0644, 964 .proc_handler = &proc_dointvec, 965 .strategy = &sysctl_intvec, 966 .extra1 = &zero, 967 }, 968 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT 969 { 970 .ctl_name = VM_LEGACY_VA_LAYOUT, 971 .procname = "legacy_va_layout", 972 .data = &sysctl_legacy_va_layout, 973 .maxlen = sizeof(sysctl_legacy_va_layout), 974 .mode = 0644, 975 .proc_handler = &proc_dointvec, 976 .strategy = &sysctl_intvec, 977 .extra1 = &zero, 978 }, 979 #endif 980 #ifdef CONFIG_NUMA 981 { 982 .ctl_name = VM_ZONE_RECLAIM_MODE, 983 .procname = "zone_reclaim_mode", 984 .data = &zone_reclaim_mode, 985 .maxlen = sizeof(zone_reclaim_mode), 986 .mode = 0644, 987 .proc_handler = &proc_dointvec, 988 .strategy = &sysctl_intvec, 989 .extra1 = &zero, 990 }, 991 { 992 .ctl_name = VM_MIN_UNMAPPED, 993 .procname = "min_unmapped_ratio", 994 .data = &sysctl_min_unmapped_ratio, 995 .maxlen = sizeof(sysctl_min_unmapped_ratio), 996 .mode = 0644, 997 .proc_handler = &sysctl_min_unmapped_ratio_sysctl_handler, 998 .strategy = &sysctl_intvec, 999 .extra1 = &zero, 1000 .extra2 = &one_hundred, 1001 }, 1002 { 1003 .ctl_name = VM_MIN_SLAB, 1004 .procname = "min_slab_ratio", 1005 .data = &sysctl_min_slab_ratio, 1006 .maxlen = sizeof(sysctl_min_slab_ratio), 1007 .mode = 0644, 1008 .proc_handler = &sysctl_min_slab_ratio_sysctl_handler, 1009 .strategy = &sysctl_intvec, 1010 .extra1 = &zero, 1011 .extra2 = &one_hundred, 1012 }, 1013 #endif 1014 #ifdef CONFIG_X86_32 1015 { 1016 .ctl_name = VM_VDSO_ENABLED, 1017 .procname = "vdso_enabled", 1018 .data = &vdso_enabled, 1019 .maxlen = sizeof(vdso_enabled), 1020 .mode = 0644, 1021 .proc_handler = &proc_dointvec, 1022 .strategy = &sysctl_intvec, 1023 .extra1 = &zero, 1024 }, 1025 #endif 1026 { .ctl_name = 0 } 1027 }; 1028 1029 static ctl_table fs_table[] = { 1030 { 1031 .ctl_name = FS_NRINODE, 1032 .procname = "inode-nr", 1033 .data = &inodes_stat, 1034 .maxlen = 2*sizeof(int), 1035 .mode = 0444, 1036 .proc_handler = &proc_dointvec, 1037 }, 1038 { 1039 .ctl_name = FS_STATINODE, 1040 .procname = "inode-state", 1041 .data = &inodes_stat, 1042 .maxlen = 7*sizeof(int), 1043 .mode = 0444, 1044 .proc_handler = &proc_dointvec, 1045 }, 1046 { 1047 .ctl_name = FS_NRFILE, 1048 .procname = "file-nr", 1049 .data = &files_stat, 1050 .maxlen = 3*sizeof(int), 1051 .mode = 0444, 1052 .proc_handler = &proc_nr_files, 1053 }, 1054 { 1055 .ctl_name = FS_MAXFILE, 1056 .procname = "file-max", 1057 .data = &files_stat.max_files, 1058 .maxlen = sizeof(int), 1059 .mode = 0644, 1060 .proc_handler = &proc_dointvec, 1061 }, 1062 { 1063 .ctl_name = FS_DENTRY, 1064 .procname = "dentry-state", 1065 .data = &dentry_stat, 1066 .maxlen = 6*sizeof(int), 1067 .mode = 0444, 1068 .proc_handler = &proc_dointvec, 1069 }, 1070 { 1071 .ctl_name = FS_OVERFLOWUID, 1072 .procname = "overflowuid", 1073 .data = &fs_overflowuid, 1074 .maxlen = sizeof(int), 1075 .mode = 0644, 1076 .proc_handler = &proc_dointvec_minmax, 1077 .strategy = &sysctl_intvec, 1078 .extra1 = &minolduid, 1079 .extra2 = &maxolduid, 1080 }, 1081 { 1082 .ctl_name = FS_OVERFLOWGID, 1083 .procname = "overflowgid", 1084 .data = &fs_overflowgid, 1085 .maxlen = sizeof(int), 1086 .mode = 0644, 1087 .proc_handler = &proc_dointvec_minmax, 1088 .strategy = &sysctl_intvec, 1089 .extra1 = &minolduid, 1090 .extra2 = &maxolduid, 1091 }, 1092 { 1093 .ctl_name = FS_LEASES, 1094 .procname = "leases-enable", 1095 .data = &leases_enable, 1096 .maxlen = sizeof(int), 1097 .mode = 0644, 1098 .proc_handler = &proc_dointvec, 1099 }, 1100 #ifdef CONFIG_DNOTIFY 1101 { 1102 .ctl_name = FS_DIR_NOTIFY, 1103 .procname = "dir-notify-enable", 1104 .data = &dir_notify_enable, 1105 .maxlen = sizeof(int), 1106 .mode = 0644, 1107 .proc_handler = &proc_dointvec, 1108 }, 1109 #endif 1110 #ifdef CONFIG_MMU 1111 { 1112 .ctl_name = FS_LEASE_TIME, 1113 .procname = "lease-break-time", 1114 .data = &lease_break_time, 1115 .maxlen = sizeof(int), 1116 .mode = 0644, 1117 .proc_handler = &proc_dointvec, 1118 }, 1119 { 1120 .ctl_name = FS_AIO_NR, 1121 .procname = "aio-nr", 1122 .data = &aio_nr, 1123 .maxlen = sizeof(aio_nr), 1124 .mode = 0444, 1125 .proc_handler = &proc_doulongvec_minmax, 1126 }, 1127 { 1128 .ctl_name = FS_AIO_MAX_NR, 1129 .procname = "aio-max-nr", 1130 .data = &aio_max_nr, 1131 .maxlen = sizeof(aio_max_nr), 1132 .mode = 0644, 1133 .proc_handler = &proc_doulongvec_minmax, 1134 }, 1135 #ifdef CONFIG_INOTIFY_USER 1136 { 1137 .ctl_name = FS_INOTIFY, 1138 .procname = "inotify", 1139 .mode = 0555, 1140 .child = inotify_table, 1141 }, 1142 #endif 1143 #endif 1144 { 1145 .ctl_name = KERN_SETUID_DUMPABLE, 1146 .procname = "suid_dumpable", 1147 .data = &suid_dumpable, 1148 .maxlen = sizeof(int), 1149 .mode = 0644, 1150 .proc_handler = &proc_dointvec, 1151 }, 1152 { .ctl_name = 0 } 1153 }; 1154 1155 static ctl_table debug_table[] = { 1156 { .ctl_name = 0 } 1157 }; 1158 1159 static ctl_table dev_table[] = { 1160 { .ctl_name = 0 } 1161 }; 1162 1163 extern void init_irq_proc (void); 1164 1165 static DEFINE_SPINLOCK(sysctl_lock); 1166 1167 /* called under sysctl_lock */ 1168 static int use_table(struct ctl_table_header *p) 1169 { 1170 if (unlikely(p->unregistering)) 1171 return 0; 1172 p->used++; 1173 return 1; 1174 } 1175 1176 /* called under sysctl_lock */ 1177 static void unuse_table(struct ctl_table_header *p) 1178 { 1179 if (!--p->used) 1180 if (unlikely(p->unregistering)) 1181 complete(p->unregistering); 1182 } 1183 1184 /* called under sysctl_lock, will reacquire if has to wait */ 1185 static void start_unregistering(struct ctl_table_header *p) 1186 { 1187 /* 1188 * if p->used is 0, nobody will ever touch that entry again; 1189 * we'll eliminate all paths to it before dropping sysctl_lock 1190 */ 1191 if (unlikely(p->used)) { 1192 struct completion wait; 1193 init_completion(&wait); 1194 p->unregistering = &wait; 1195 spin_unlock(&sysctl_lock); 1196 wait_for_completion(&wait); 1197 spin_lock(&sysctl_lock); 1198 } 1199 /* 1200 * do not remove from the list until nobody holds it; walking the 1201 * list in do_sysctl() relies on that. 1202 */ 1203 list_del_init(&p->ctl_entry); 1204 } 1205 1206 void __init sysctl_init(void) 1207 { 1208 #ifdef CONFIG_PROC_SYSCTL 1209 register_proc_table(root_table, proc_sys_root, &root_table_header); 1210 init_irq_proc(); 1211 #endif 1212 } 1213 1214 #ifdef CONFIG_SYSCTL_SYSCALL 1215 int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, 1216 void __user *newval, size_t newlen) 1217 { 1218 struct list_head *tmp; 1219 int error = -ENOTDIR; 1220 1221 if (nlen <= 0 || nlen >= CTL_MAXNAME) 1222 return -ENOTDIR; 1223 if (oldval) { 1224 int old_len; 1225 if (!oldlenp || get_user(old_len, oldlenp)) 1226 return -EFAULT; 1227 } 1228 spin_lock(&sysctl_lock); 1229 tmp = &root_table_header.ctl_entry; 1230 do { 1231 struct ctl_table_header *head = 1232 list_entry(tmp, struct ctl_table_header, ctl_entry); 1233 void *context = NULL; 1234 1235 if (!use_table(head)) 1236 continue; 1237 1238 spin_unlock(&sysctl_lock); 1239 1240 error = parse_table(name, nlen, oldval, oldlenp, 1241 newval, newlen, head->ctl_table, 1242 &context); 1243 kfree(context); 1244 1245 spin_lock(&sysctl_lock); 1246 unuse_table(head); 1247 if (error != -ENOTDIR) 1248 break; 1249 } while ((tmp = tmp->next) != &root_table_header.ctl_entry); 1250 spin_unlock(&sysctl_lock); 1251 return error; 1252 } 1253 1254 asmlinkage long sys_sysctl(struct __sysctl_args __user *args) 1255 { 1256 struct __sysctl_args tmp; 1257 int error; 1258 1259 if (copy_from_user(&tmp, args, sizeof(tmp))) 1260 return -EFAULT; 1261 1262 lock_kernel(); 1263 error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp, 1264 tmp.newval, tmp.newlen); 1265 unlock_kernel(); 1266 return error; 1267 } 1268 #endif /* CONFIG_SYSCTL_SYSCALL */ 1269 1270 /* 1271 * ctl_perm does NOT grant the superuser all rights automatically, because 1272 * some sysctl variables are readonly even to root. 1273 */ 1274 1275 static int test_perm(int mode, int op) 1276 { 1277 if (!current->euid) 1278 mode >>= 6; 1279 else if (in_egroup_p(0)) 1280 mode >>= 3; 1281 if ((mode & op & 0007) == op) 1282 return 0; 1283 return -EACCES; 1284 } 1285 1286 static inline int ctl_perm(ctl_table *table, int op) 1287 { 1288 int error; 1289 error = security_sysctl(table, op); 1290 if (error) 1291 return error; 1292 return test_perm(table->mode, op); 1293 } 1294 1295 #ifdef CONFIG_SYSCTL_SYSCALL 1296 static int parse_table(int __user *name, int nlen, 1297 void __user *oldval, size_t __user *oldlenp, 1298 void __user *newval, size_t newlen, 1299 ctl_table *table, void **context) 1300 { 1301 int n; 1302 repeat: 1303 if (!nlen) 1304 return -ENOTDIR; 1305 if (get_user(n, name)) 1306 return -EFAULT; 1307 for ( ; table->ctl_name || table->procname; table++) { 1308 if (!table->ctl_name) 1309 continue; 1310 if (n == table->ctl_name || table->ctl_name == CTL_ANY) { 1311 int error; 1312 if (table->child) { 1313 if (ctl_perm(table, 001)) 1314 return -EPERM; 1315 if (table->strategy) { 1316 error = table->strategy( 1317 table, name, nlen, 1318 oldval, oldlenp, 1319 newval, newlen, context); 1320 if (error) 1321 return error; 1322 } 1323 name++; 1324 nlen--; 1325 table = table->child; 1326 goto repeat; 1327 } 1328 error = do_sysctl_strategy(table, name, nlen, 1329 oldval, oldlenp, 1330 newval, newlen, context); 1331 return error; 1332 } 1333 } 1334 return -ENOTDIR; 1335 } 1336 1337 /* Perform the actual read/write of a sysctl table entry. */ 1338 int do_sysctl_strategy (ctl_table *table, 1339 int __user *name, int nlen, 1340 void __user *oldval, size_t __user *oldlenp, 1341 void __user *newval, size_t newlen, void **context) 1342 { 1343 int op = 0, rc; 1344 size_t len; 1345 1346 if (oldval) 1347 op |= 004; 1348 if (newval) 1349 op |= 002; 1350 if (ctl_perm(table, op)) 1351 return -EPERM; 1352 1353 if (table->strategy) { 1354 rc = table->strategy(table, name, nlen, oldval, oldlenp, 1355 newval, newlen, context); 1356 if (rc < 0) 1357 return rc; 1358 if (rc > 0) 1359 return 0; 1360 } 1361 1362 /* If there is no strategy routine, or if the strategy returns 1363 * zero, proceed with automatic r/w */ 1364 if (table->data && table->maxlen) { 1365 if (oldval && oldlenp) { 1366 if (get_user(len, oldlenp)) 1367 return -EFAULT; 1368 if (len) { 1369 if (len > table->maxlen) 1370 len = table->maxlen; 1371 if(copy_to_user(oldval, table->data, len)) 1372 return -EFAULT; 1373 if(put_user(len, oldlenp)) 1374 return -EFAULT; 1375 } 1376 } 1377 if (newval && newlen) { 1378 len = newlen; 1379 if (len > table->maxlen) 1380 len = table->maxlen; 1381 if(copy_from_user(table->data, newval, len)) 1382 return -EFAULT; 1383 } 1384 } 1385 return 0; 1386 } 1387 #endif /* CONFIG_SYSCTL_SYSCALL */ 1388 1389 /** 1390 * register_sysctl_table - register a sysctl hierarchy 1391 * @table: the top-level table structure 1392 * @insert_at_head: whether the entry should be inserted in front or at the end 1393 * 1394 * Register a sysctl table hierarchy. @table should be a filled in ctl_table 1395 * array. An entry with a ctl_name of 0 terminates the table. 1396 * 1397 * The members of the &ctl_table structure are used as follows: 1398 * 1399 * ctl_name - This is the numeric sysctl value used by sysctl(2). The number 1400 * must be unique within that level of sysctl 1401 * 1402 * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not 1403 * enter a sysctl file 1404 * 1405 * data - a pointer to data for use by proc_handler 1406 * 1407 * maxlen - the maximum size in bytes of the data 1408 * 1409 * mode - the file permissions for the /proc/sys file, and for sysctl(2) 1410 * 1411 * child - a pointer to the child sysctl table if this entry is a directory, or 1412 * %NULL. 1413 * 1414 * proc_handler - the text handler routine (described below) 1415 * 1416 * strategy - the strategy routine (described below) 1417 * 1418 * de - for internal use by the sysctl routines 1419 * 1420 * extra1, extra2 - extra pointers usable by the proc handler routines 1421 * 1422 * Leaf nodes in the sysctl tree will be represented by a single file 1423 * under /proc; non-leaf nodes will be represented by directories. 1424 * 1425 * sysctl(2) can automatically manage read and write requests through 1426 * the sysctl table. The data and maxlen fields of the ctl_table 1427 * struct enable minimal validation of the values being written to be 1428 * performed, and the mode field allows minimal authentication. 1429 * 1430 * More sophisticated management can be enabled by the provision of a 1431 * strategy routine with the table entry. This will be called before 1432 * any automatic read or write of the data is performed. 1433 * 1434 * The strategy routine may return 1435 * 1436 * < 0 - Error occurred (error is passed to user process) 1437 * 1438 * 0 - OK - proceed with automatic read or write. 1439 * 1440 * > 0 - OK - read or write has been done by the strategy routine, so 1441 * return immediately. 1442 * 1443 * There must be a proc_handler routine for any terminal nodes 1444 * mirrored under /proc/sys (non-terminals are handled by a built-in 1445 * directory handler). Several default handlers are available to 1446 * cover common cases - 1447 * 1448 * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), 1449 * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), 1450 * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax() 1451 * 1452 * It is the handler's job to read the input buffer from user memory 1453 * and process it. The handler should return 0 on success. 1454 * 1455 * This routine returns %NULL on a failure to register, and a pointer 1456 * to the table header on success. 1457 */ 1458 struct ctl_table_header *register_sysctl_table(ctl_table * table, 1459 int insert_at_head) 1460 { 1461 struct ctl_table_header *tmp; 1462 tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL); 1463 if (!tmp) 1464 return NULL; 1465 tmp->ctl_table = table; 1466 INIT_LIST_HEAD(&tmp->ctl_entry); 1467 tmp->used = 0; 1468 tmp->unregistering = NULL; 1469 spin_lock(&sysctl_lock); 1470 if (insert_at_head) 1471 list_add(&tmp->ctl_entry, &root_table_header.ctl_entry); 1472 else 1473 list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry); 1474 spin_unlock(&sysctl_lock); 1475 #ifdef CONFIG_PROC_SYSCTL 1476 register_proc_table(table, proc_sys_root, tmp); 1477 #endif 1478 return tmp; 1479 } 1480 1481 /** 1482 * unregister_sysctl_table - unregister a sysctl table hierarchy 1483 * @header: the header returned from register_sysctl_table 1484 * 1485 * Unregisters the sysctl table and all children. proc entries may not 1486 * actually be removed until they are no longer used by anyone. 1487 */ 1488 void unregister_sysctl_table(struct ctl_table_header * header) 1489 { 1490 might_sleep(); 1491 spin_lock(&sysctl_lock); 1492 start_unregistering(header); 1493 #ifdef CONFIG_PROC_SYSCTL 1494 unregister_proc_table(header->ctl_table, proc_sys_root); 1495 #endif 1496 spin_unlock(&sysctl_lock); 1497 kfree(header); 1498 } 1499 1500 #else /* !CONFIG_SYSCTL */ 1501 struct ctl_table_header * register_sysctl_table(ctl_table * table, 1502 int insert_at_head) 1503 { 1504 return NULL; 1505 } 1506 1507 void unregister_sysctl_table(struct ctl_table_header * table) 1508 { 1509 } 1510 1511 #endif /* CONFIG_SYSCTL */ 1512 1513 /* 1514 * /proc/sys support 1515 */ 1516 1517 #ifdef CONFIG_PROC_SYSCTL 1518 1519 /* Scan the sysctl entries in table and add them all into /proc */ 1520 static void register_proc_table(ctl_table * table, struct proc_dir_entry *root, void *set) 1521 { 1522 struct proc_dir_entry *de; 1523 int len; 1524 mode_t mode; 1525 1526 for (; table->ctl_name || table->procname; table++) { 1527 /* Can't do anything without a proc name. */ 1528 if (!table->procname) 1529 continue; 1530 /* Maybe we can't do anything with it... */ 1531 if (!table->proc_handler && !table->child) { 1532 printk(KERN_WARNING "SYSCTL: Can't register %s\n", 1533 table->procname); 1534 continue; 1535 } 1536 1537 len = strlen(table->procname); 1538 mode = table->mode; 1539 1540 de = NULL; 1541 if (table->proc_handler) 1542 mode |= S_IFREG; 1543 else { 1544 mode |= S_IFDIR; 1545 for (de = root->subdir; de; de = de->next) { 1546 if (proc_match(len, table->procname, de)) 1547 break; 1548 } 1549 /* If the subdir exists already, de is non-NULL */ 1550 } 1551 1552 if (!de) { 1553 de = create_proc_entry(table->procname, mode, root); 1554 if (!de) 1555 continue; 1556 de->set = set; 1557 de->data = (void *) table; 1558 if (table->proc_handler) 1559 de->proc_fops = &proc_sys_file_operations; 1560 } 1561 table->de = de; 1562 if (de->mode & S_IFDIR) 1563 register_proc_table(table->child, de, set); 1564 } 1565 } 1566 1567 /* 1568 * Unregister a /proc sysctl table and any subdirectories. 1569 */ 1570 static void unregister_proc_table(ctl_table * table, struct proc_dir_entry *root) 1571 { 1572 struct proc_dir_entry *de; 1573 for (; table->ctl_name || table->procname; table++) { 1574 if (!(de = table->de)) 1575 continue; 1576 if (de->mode & S_IFDIR) { 1577 if (!table->child) { 1578 printk (KERN_ALERT "Help - malformed sysctl tree on free\n"); 1579 continue; 1580 } 1581 unregister_proc_table(table->child, de); 1582 1583 /* Don't unregister directories which still have entries.. */ 1584 if (de->subdir) 1585 continue; 1586 } 1587 1588 /* 1589 * In any case, mark the entry as goner; we'll keep it 1590 * around if it's busy, but we'll know to do nothing with 1591 * its fields. We are under sysctl_lock here. 1592 */ 1593 de->data = NULL; 1594 1595 /* Don't unregister proc entries that are still being used.. */ 1596 if (atomic_read(&de->count)) 1597 continue; 1598 1599 table->de = NULL; 1600 remove_proc_entry(table->procname, root); 1601 } 1602 } 1603 1604 static ssize_t do_rw_proc(int write, struct file * file, char __user * buf, 1605 size_t count, loff_t *ppos) 1606 { 1607 int op; 1608 struct proc_dir_entry *de = PDE(file->f_dentry->d_inode); 1609 struct ctl_table *table; 1610 size_t res; 1611 ssize_t error = -ENOTDIR; 1612 1613 spin_lock(&sysctl_lock); 1614 if (de && de->data && use_table(de->set)) { 1615 /* 1616 * at that point we know that sysctl was not unregistered 1617 * and won't be until we finish 1618 */ 1619 spin_unlock(&sysctl_lock); 1620 table = (struct ctl_table *) de->data; 1621 if (!table || !table->proc_handler) 1622 goto out; 1623 error = -EPERM; 1624 op = (write ? 002 : 004); 1625 if (ctl_perm(table, op)) 1626 goto out; 1627 1628 /* careful: calling conventions are nasty here */ 1629 res = count; 1630 error = (*table->proc_handler)(table, write, file, 1631 buf, &res, ppos); 1632 if (!error) 1633 error = res; 1634 out: 1635 spin_lock(&sysctl_lock); 1636 unuse_table(de->set); 1637 } 1638 spin_unlock(&sysctl_lock); 1639 return error; 1640 } 1641 1642 static int proc_opensys(struct inode *inode, struct file *file) 1643 { 1644 if (file->f_mode & FMODE_WRITE) { 1645 /* 1646 * sysctl entries that are not writable, 1647 * are _NOT_ writable, capabilities or not. 1648 */ 1649 if (!(inode->i_mode & S_IWUSR)) 1650 return -EPERM; 1651 } 1652 1653 return 0; 1654 } 1655 1656 static ssize_t proc_readsys(struct file * file, char __user * buf, 1657 size_t count, loff_t *ppos) 1658 { 1659 return do_rw_proc(0, file, buf, count, ppos); 1660 } 1661 1662 static ssize_t proc_writesys(struct file * file, const char __user * buf, 1663 size_t count, loff_t *ppos) 1664 { 1665 return do_rw_proc(1, file, (char __user *) buf, count, ppos); 1666 } 1667 1668 static int _proc_do_string(void* data, int maxlen, int write, 1669 struct file *filp, void __user *buffer, 1670 size_t *lenp, loff_t *ppos) 1671 { 1672 size_t len; 1673 char __user *p; 1674 char c; 1675 1676 if (!data || !maxlen || !*lenp || 1677 (*ppos && !write)) { 1678 *lenp = 0; 1679 return 0; 1680 } 1681 1682 if (write) { 1683 len = 0; 1684 p = buffer; 1685 while (len < *lenp) { 1686 if (get_user(c, p++)) 1687 return -EFAULT; 1688 if (c == 0 || c == '\n') 1689 break; 1690 len++; 1691 } 1692 if (len >= maxlen) 1693 len = maxlen-1; 1694 if(copy_from_user(data, buffer, len)) 1695 return -EFAULT; 1696 ((char *) data)[len] = 0; 1697 *ppos += *lenp; 1698 } else { 1699 len = strlen(data); 1700 if (len > maxlen) 1701 len = maxlen; 1702 if (len > *lenp) 1703 len = *lenp; 1704 if (len) 1705 if(copy_to_user(buffer, data, len)) 1706 return -EFAULT; 1707 if (len < *lenp) { 1708 if(put_user('\n', ((char __user *) buffer) + len)) 1709 return -EFAULT; 1710 len++; 1711 } 1712 *lenp = len; 1713 *ppos += len; 1714 } 1715 return 0; 1716 } 1717 1718 /** 1719 * proc_dostring - read a string sysctl 1720 * @table: the sysctl table 1721 * @write: %TRUE if this is a write to the sysctl file 1722 * @filp: the file structure 1723 * @buffer: the user buffer 1724 * @lenp: the size of the user buffer 1725 * @ppos: file position 1726 * 1727 * Reads/writes a string from/to the user buffer. If the kernel 1728 * buffer provided is not large enough to hold the string, the 1729 * string is truncated. The copied string is %NULL-terminated. 1730 * If the string is being read by the user process, it is copied 1731 * and a newline '\n' is added. It is truncated if the buffer is 1732 * not large enough. 1733 * 1734 * Returns 0 on success. 1735 */ 1736 int proc_dostring(ctl_table *table, int write, struct file *filp, 1737 void __user *buffer, size_t *lenp, loff_t *ppos) 1738 { 1739 return _proc_do_string(table->data, table->maxlen, write, filp, 1740 buffer, lenp, ppos); 1741 } 1742 1743 /* 1744 * Special case of dostring for the UTS structure. This has locks 1745 * to observe. Should this be in kernel/sys.c ???? 1746 */ 1747 1748 #ifndef CONFIG_UTS_NS 1749 static int proc_do_uts_string(ctl_table *table, int write, struct file *filp, 1750 void __user *buffer, size_t *lenp, loff_t *ppos) 1751 { 1752 int r; 1753 1754 if (!write) { 1755 down_read(&uts_sem); 1756 r=proc_dostring(table,0,filp,buffer,lenp, ppos); 1757 up_read(&uts_sem); 1758 } else { 1759 down_write(&uts_sem); 1760 r=proc_dostring(table,1,filp,buffer,lenp, ppos); 1761 up_write(&uts_sem); 1762 } 1763 return r; 1764 } 1765 #else /* !CONFIG_UTS_NS */ 1766 static int proc_do_uts_string(ctl_table *table, int write, struct file *filp, 1767 void __user *buffer, size_t *lenp, loff_t *ppos) 1768 { 1769 int r; 1770 struct uts_namespace* uts_ns = current->nsproxy->uts_ns; 1771 char* which; 1772 1773 switch (table->ctl_name) { 1774 case KERN_OSTYPE: 1775 which = uts_ns->name.sysname; 1776 break; 1777 case KERN_NODENAME: 1778 which = uts_ns->name.nodename; 1779 break; 1780 case KERN_OSRELEASE: 1781 which = uts_ns->name.release; 1782 break; 1783 case KERN_VERSION: 1784 which = uts_ns->name.version; 1785 break; 1786 case KERN_DOMAINNAME: 1787 which = uts_ns->name.domainname; 1788 break; 1789 default: 1790 r = -EINVAL; 1791 goto out; 1792 } 1793 1794 if (!write) { 1795 down_read(&uts_sem); 1796 r=_proc_do_string(which,table->maxlen,0,filp,buffer,lenp, ppos); 1797 up_read(&uts_sem); 1798 } else { 1799 down_write(&uts_sem); 1800 r=_proc_do_string(which,table->maxlen,1,filp,buffer,lenp, ppos); 1801 up_write(&uts_sem); 1802 } 1803 out: 1804 return r; 1805 } 1806 #endif /* !CONFIG_UTS_NS */ 1807 1808 static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp, 1809 int *valp, 1810 int write, void *data) 1811 { 1812 if (write) { 1813 *valp = *negp ? -*lvalp : *lvalp; 1814 } else { 1815 int val = *valp; 1816 if (val < 0) { 1817 *negp = -1; 1818 *lvalp = (unsigned long)-val; 1819 } else { 1820 *negp = 0; 1821 *lvalp = (unsigned long)val; 1822 } 1823 } 1824 return 0; 1825 } 1826 1827 static int __do_proc_dointvec(void *tbl_data, ctl_table *table, 1828 int write, struct file *filp, void __user *buffer, 1829 size_t *lenp, loff_t *ppos, 1830 int (*conv)(int *negp, unsigned long *lvalp, int *valp, 1831 int write, void *data), 1832 void *data) 1833 { 1834 #define TMPBUFLEN 21 1835 int *i, vleft, first=1, neg, val; 1836 unsigned long lval; 1837 size_t left, len; 1838 1839 char buf[TMPBUFLEN], *p; 1840 char __user *s = buffer; 1841 1842 if (!tbl_data || !table->maxlen || !*lenp || 1843 (*ppos && !write)) { 1844 *lenp = 0; 1845 return 0; 1846 } 1847 1848 i = (int *) tbl_data; 1849 vleft = table->maxlen / sizeof(*i); 1850 left = *lenp; 1851 1852 if (!conv) 1853 conv = do_proc_dointvec_conv; 1854 1855 for (; left && vleft--; i++, first=0) { 1856 if (write) { 1857 while (left) { 1858 char c; 1859 if (get_user(c, s)) 1860 return -EFAULT; 1861 if (!isspace(c)) 1862 break; 1863 left--; 1864 s++; 1865 } 1866 if (!left) 1867 break; 1868 neg = 0; 1869 len = left; 1870 if (len > sizeof(buf) - 1) 1871 len = sizeof(buf) - 1; 1872 if (copy_from_user(buf, s, len)) 1873 return -EFAULT; 1874 buf[len] = 0; 1875 p = buf; 1876 if (*p == '-' && left > 1) { 1877 neg = 1; 1878 p++; 1879 } 1880 if (*p < '0' || *p > '9') 1881 break; 1882 1883 lval = simple_strtoul(p, &p, 0); 1884 1885 len = p-buf; 1886 if ((len < left) && *p && !isspace(*p)) 1887 break; 1888 if (neg) 1889 val = -val; 1890 s += len; 1891 left -= len; 1892 1893 if (conv(&neg, &lval, i, 1, data)) 1894 break; 1895 } else { 1896 p = buf; 1897 if (!first) 1898 *p++ = '\t'; 1899 1900 if (conv(&neg, &lval, i, 0, data)) 1901 break; 1902 1903 sprintf(p, "%s%lu", neg ? "-" : "", lval); 1904 len = strlen(buf); 1905 if (len > left) 1906 len = left; 1907 if(copy_to_user(s, buf, len)) 1908 return -EFAULT; 1909 left -= len; 1910 s += len; 1911 } 1912 } 1913 1914 if (!write && !first && left) { 1915 if(put_user('\n', s)) 1916 return -EFAULT; 1917 left--, s++; 1918 } 1919 if (write) { 1920 while (left) { 1921 char c; 1922 if (get_user(c, s++)) 1923 return -EFAULT; 1924 if (!isspace(c)) 1925 break; 1926 left--; 1927 } 1928 } 1929 if (write && first) 1930 return -EINVAL; 1931 *lenp -= left; 1932 *ppos += *lenp; 1933 return 0; 1934 #undef TMPBUFLEN 1935 } 1936 1937 static int do_proc_dointvec(ctl_table *table, int write, struct file *filp, 1938 void __user *buffer, size_t *lenp, loff_t *ppos, 1939 int (*conv)(int *negp, unsigned long *lvalp, int *valp, 1940 int write, void *data), 1941 void *data) 1942 { 1943 return __do_proc_dointvec(table->data, table, write, filp, 1944 buffer, lenp, ppos, conv, data); 1945 } 1946 1947 /** 1948 * proc_dointvec - read a vector of integers 1949 * @table: the sysctl table 1950 * @write: %TRUE if this is a write to the sysctl file 1951 * @filp: the file structure 1952 * @buffer: the user buffer 1953 * @lenp: the size of the user buffer 1954 * @ppos: file position 1955 * 1956 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer 1957 * values from/to the user buffer, treated as an ASCII string. 1958 * 1959 * Returns 0 on success. 1960 */ 1961 int proc_dointvec(ctl_table *table, int write, struct file *filp, 1962 void __user *buffer, size_t *lenp, loff_t *ppos) 1963 { 1964 return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, 1965 NULL,NULL); 1966 } 1967 1968 #define OP_SET 0 1969 #define OP_AND 1 1970 #define OP_OR 2 1971 #define OP_MAX 3 1972 #define OP_MIN 4 1973 1974 static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp, 1975 int *valp, 1976 int write, void *data) 1977 { 1978 int op = *(int *)data; 1979 if (write) { 1980 int val = *negp ? -*lvalp : *lvalp; 1981 switch(op) { 1982 case OP_SET: *valp = val; break; 1983 case OP_AND: *valp &= val; break; 1984 case OP_OR: *valp |= val; break; 1985 case OP_MAX: if(*valp < val) 1986 *valp = val; 1987 break; 1988 case OP_MIN: if(*valp > val) 1989 *valp = val; 1990 break; 1991 } 1992 } else { 1993 int val = *valp; 1994 if (val < 0) { 1995 *negp = -1; 1996 *lvalp = (unsigned long)-val; 1997 } else { 1998 *negp = 0; 1999 *lvalp = (unsigned long)val; 2000 } 2001 } 2002 return 0; 2003 } 2004 2005 /* 2006 * init may raise the set. 2007 */ 2008 2009 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp, 2010 void __user *buffer, size_t *lenp, loff_t *ppos) 2011 { 2012 int op; 2013 2014 if (!capable(CAP_SYS_MODULE)) { 2015 return -EPERM; 2016 } 2017 2018 op = is_init(current) ? OP_SET : OP_AND; 2019 return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, 2020 do_proc_dointvec_bset_conv,&op); 2021 } 2022 2023 struct do_proc_dointvec_minmax_conv_param { 2024 int *min; 2025 int *max; 2026 }; 2027 2028 static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, 2029 int *valp, 2030 int write, void *data) 2031 { 2032 struct do_proc_dointvec_minmax_conv_param *param = data; 2033 if (write) { 2034 int val = *negp ? -*lvalp : *lvalp; 2035 if ((param->min && *param->min > val) || 2036 (param->max && *param->max < val)) 2037 return -EINVAL; 2038 *valp = val; 2039 } else { 2040 int val = *valp; 2041 if (val < 0) { 2042 *negp = -1; 2043 *lvalp = (unsigned long)-val; 2044 } else { 2045 *negp = 0; 2046 *lvalp = (unsigned long)val; 2047 } 2048 } 2049 return 0; 2050 } 2051 2052 /** 2053 * proc_dointvec_minmax - read a vector of integers with min/max values 2054 * @table: the sysctl table 2055 * @write: %TRUE if this is a write to the sysctl file 2056 * @filp: the file structure 2057 * @buffer: the user buffer 2058 * @lenp: the size of the user buffer 2059 * @ppos: file position 2060 * 2061 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer 2062 * values from/to the user buffer, treated as an ASCII string. 2063 * 2064 * This routine will ensure the values are within the range specified by 2065 * table->extra1 (min) and table->extra2 (max). 2066 * 2067 * Returns 0 on success. 2068 */ 2069 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp, 2070 void __user *buffer, size_t *lenp, loff_t *ppos) 2071 { 2072 struct do_proc_dointvec_minmax_conv_param param = { 2073 .min = (int *) table->extra1, 2074 .max = (int *) table->extra2, 2075 }; 2076 return do_proc_dointvec(table, write, filp, buffer, lenp, ppos, 2077 do_proc_dointvec_minmax_conv, ¶m); 2078 } 2079 2080 static int __do_proc_doulongvec_minmax(void *data, ctl_table *table, int write, 2081 struct file *filp, 2082 void __user *buffer, 2083 size_t *lenp, loff_t *ppos, 2084 unsigned long convmul, 2085 unsigned long convdiv) 2086 { 2087 #define TMPBUFLEN 21 2088 unsigned long *i, *min, *max, val; 2089 int vleft, first=1, neg; 2090 size_t len, left; 2091 char buf[TMPBUFLEN], *p; 2092 char __user *s = buffer; 2093 2094 if (!data || !table->maxlen || !*lenp || 2095 (*ppos && !write)) { 2096 *lenp = 0; 2097 return 0; 2098 } 2099 2100 i = (unsigned long *) data; 2101 min = (unsigned long *) table->extra1; 2102 max = (unsigned long *) table->extra2; 2103 vleft = table->maxlen / sizeof(unsigned long); 2104 left = *lenp; 2105 2106 for (; left && vleft--; i++, min++, max++, first=0) { 2107 if (write) { 2108 while (left) { 2109 char c; 2110 if (get_user(c, s)) 2111 return -EFAULT; 2112 if (!isspace(c)) 2113 break; 2114 left--; 2115 s++; 2116 } 2117 if (!left) 2118 break; 2119 neg = 0; 2120 len = left; 2121 if (len > TMPBUFLEN-1) 2122 len = TMPBUFLEN-1; 2123 if (copy_from_user(buf, s, len)) 2124 return -EFAULT; 2125 buf[len] = 0; 2126 p = buf; 2127 if (*p == '-' && left > 1) { 2128 neg = 1; 2129 p++; 2130 } 2131 if (*p < '0' || *p > '9') 2132 break; 2133 val = simple_strtoul(p, &p, 0) * convmul / convdiv ; 2134 len = p-buf; 2135 if ((len < left) && *p && !isspace(*p)) 2136 break; 2137 if (neg) 2138 val = -val; 2139 s += len; 2140 left -= len; 2141 2142 if(neg) 2143 continue; 2144 if ((min && val < *min) || (max && val > *max)) 2145 continue; 2146 *i = val; 2147 } else { 2148 p = buf; 2149 if (!first) 2150 *p++ = '\t'; 2151 sprintf(p, "%lu", convdiv * (*i) / convmul); 2152 len = strlen(buf); 2153 if (len > left) 2154 len = left; 2155 if(copy_to_user(s, buf, len)) 2156 return -EFAULT; 2157 left -= len; 2158 s += len; 2159 } 2160 } 2161 2162 if (!write && !first && left) { 2163 if(put_user('\n', s)) 2164 return -EFAULT; 2165 left--, s++; 2166 } 2167 if (write) { 2168 while (left) { 2169 char c; 2170 if (get_user(c, s++)) 2171 return -EFAULT; 2172 if (!isspace(c)) 2173 break; 2174 left--; 2175 } 2176 } 2177 if (write && first) 2178 return -EINVAL; 2179 *lenp -= left; 2180 *ppos += *lenp; 2181 return 0; 2182 #undef TMPBUFLEN 2183 } 2184 2185 static int do_proc_doulongvec_minmax(ctl_table *table, int write, 2186 struct file *filp, 2187 void __user *buffer, 2188 size_t *lenp, loff_t *ppos, 2189 unsigned long convmul, 2190 unsigned long convdiv) 2191 { 2192 return __do_proc_doulongvec_minmax(table->data, table, write, 2193 filp, buffer, lenp, ppos, convmul, convdiv); 2194 } 2195 2196 /** 2197 * proc_doulongvec_minmax - read a vector of long integers with min/max values 2198 * @table: the sysctl table 2199 * @write: %TRUE if this is a write to the sysctl file 2200 * @filp: the file structure 2201 * @buffer: the user buffer 2202 * @lenp: the size of the user buffer 2203 * @ppos: file position 2204 * 2205 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long 2206 * values from/to the user buffer, treated as an ASCII string. 2207 * 2208 * This routine will ensure the values are within the range specified by 2209 * table->extra1 (min) and table->extra2 (max). 2210 * 2211 * Returns 0 on success. 2212 */ 2213 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp, 2214 void __user *buffer, size_t *lenp, loff_t *ppos) 2215 { 2216 return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l); 2217 } 2218 2219 /** 2220 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values 2221 * @table: the sysctl table 2222 * @write: %TRUE if this is a write to the sysctl file 2223 * @filp: the file structure 2224 * @buffer: the user buffer 2225 * @lenp: the size of the user buffer 2226 * @ppos: file position 2227 * 2228 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long 2229 * values from/to the user buffer, treated as an ASCII string. The values 2230 * are treated as milliseconds, and converted to jiffies when they are stored. 2231 * 2232 * This routine will ensure the values are within the range specified by 2233 * table->extra1 (min) and table->extra2 (max). 2234 * 2235 * Returns 0 on success. 2236 */ 2237 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write, 2238 struct file *filp, 2239 void __user *buffer, 2240 size_t *lenp, loff_t *ppos) 2241 { 2242 return do_proc_doulongvec_minmax(table, write, filp, buffer, 2243 lenp, ppos, HZ, 1000l); 2244 } 2245 2246 2247 static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp, 2248 int *valp, 2249 int write, void *data) 2250 { 2251 if (write) { 2252 if (*lvalp > LONG_MAX / HZ) 2253 return 1; 2254 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ); 2255 } else { 2256 int val = *valp; 2257 unsigned long lval; 2258 if (val < 0) { 2259 *negp = -1; 2260 lval = (unsigned long)-val; 2261 } else { 2262 *negp = 0; 2263 lval = (unsigned long)val; 2264 } 2265 *lvalp = lval / HZ; 2266 } 2267 return 0; 2268 } 2269 2270 static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp, 2271 int *valp, 2272 int write, void *data) 2273 { 2274 if (write) { 2275 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ) 2276 return 1; 2277 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp); 2278 } else { 2279 int val = *valp; 2280 unsigned long lval; 2281 if (val < 0) { 2282 *negp = -1; 2283 lval = (unsigned long)-val; 2284 } else { 2285 *negp = 0; 2286 lval = (unsigned long)val; 2287 } 2288 *lvalp = jiffies_to_clock_t(lval); 2289 } 2290 return 0; 2291 } 2292 2293 static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp, 2294 int *valp, 2295 int write, void *data) 2296 { 2297 if (write) { 2298 *valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp); 2299 } else { 2300 int val = *valp; 2301 unsigned long lval; 2302 if (val < 0) { 2303 *negp = -1; 2304 lval = (unsigned long)-val; 2305 } else { 2306 *negp = 0; 2307 lval = (unsigned long)val; 2308 } 2309 *lvalp = jiffies_to_msecs(lval); 2310 } 2311 return 0; 2312 } 2313 2314 /** 2315 * proc_dointvec_jiffies - read a vector of integers as seconds 2316 * @table: the sysctl table 2317 * @write: %TRUE if this is a write to the sysctl file 2318 * @filp: the file structure 2319 * @buffer: the user buffer 2320 * @lenp: the size of the user buffer 2321 * @ppos: file position 2322 * 2323 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer 2324 * values from/to the user buffer, treated as an ASCII string. 2325 * The values read are assumed to be in seconds, and are converted into 2326 * jiffies. 2327 * 2328 * Returns 0 on success. 2329 */ 2330 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp, 2331 void __user *buffer, size_t *lenp, loff_t *ppos) 2332 { 2333 return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, 2334 do_proc_dointvec_jiffies_conv,NULL); 2335 } 2336 2337 /** 2338 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds 2339 * @table: the sysctl table 2340 * @write: %TRUE if this is a write to the sysctl file 2341 * @filp: the file structure 2342 * @buffer: the user buffer 2343 * @lenp: the size of the user buffer 2344 * @ppos: pointer to the file position 2345 * 2346 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer 2347 * values from/to the user buffer, treated as an ASCII string. 2348 * The values read are assumed to be in 1/USER_HZ seconds, and 2349 * are converted into jiffies. 2350 * 2351 * Returns 0 on success. 2352 */ 2353 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp, 2354 void __user *buffer, size_t *lenp, loff_t *ppos) 2355 { 2356 return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, 2357 do_proc_dointvec_userhz_jiffies_conv,NULL); 2358 } 2359 2360 /** 2361 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds 2362 * @table: the sysctl table 2363 * @write: %TRUE if this is a write to the sysctl file 2364 * @filp: the file structure 2365 * @buffer: the user buffer 2366 * @lenp: the size of the user buffer 2367 * @ppos: file position 2368 * @ppos: the current position in the file 2369 * 2370 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer 2371 * values from/to the user buffer, treated as an ASCII string. 2372 * The values read are assumed to be in 1/1000 seconds, and 2373 * are converted into jiffies. 2374 * 2375 * Returns 0 on success. 2376 */ 2377 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp, 2378 void __user *buffer, size_t *lenp, loff_t *ppos) 2379 { 2380 return do_proc_dointvec(table, write, filp, buffer, lenp, ppos, 2381 do_proc_dointvec_ms_jiffies_conv, NULL); 2382 } 2383 2384 #ifdef CONFIG_SYSVIPC 2385 static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp, 2386 void __user *buffer, size_t *lenp, loff_t *ppos) 2387 { 2388 void *data; 2389 struct ipc_namespace *ns; 2390 2391 ns = current->nsproxy->ipc_ns; 2392 2393 switch (table->ctl_name) { 2394 case KERN_SHMMAX: 2395 data = &ns->shm_ctlmax; 2396 goto proc_minmax; 2397 case KERN_SHMALL: 2398 data = &ns->shm_ctlall; 2399 goto proc_minmax; 2400 case KERN_SHMMNI: 2401 data = &ns->shm_ctlmni; 2402 break; 2403 case KERN_MSGMAX: 2404 data = &ns->msg_ctlmax; 2405 break; 2406 case KERN_MSGMNI: 2407 data = &ns->msg_ctlmni; 2408 break; 2409 case KERN_MSGMNB: 2410 data = &ns->msg_ctlmnb; 2411 break; 2412 case KERN_SEM: 2413 data = &ns->sem_ctls; 2414 break; 2415 default: 2416 return -EINVAL; 2417 } 2418 2419 return __do_proc_dointvec(data, table, write, filp, buffer, 2420 lenp, ppos, NULL, NULL); 2421 proc_minmax: 2422 return __do_proc_doulongvec_minmax(data, table, write, filp, buffer, 2423 lenp, ppos, 1l, 1l); 2424 } 2425 #endif 2426 2427 static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp, 2428 void __user *buffer, size_t *lenp, loff_t *ppos) 2429 { 2430 struct pid *new_pid; 2431 pid_t tmp; 2432 int r; 2433 2434 tmp = pid_nr(cad_pid); 2435 2436 r = __do_proc_dointvec(&tmp, table, write, filp, buffer, 2437 lenp, ppos, NULL, NULL); 2438 if (r || !write) 2439 return r; 2440 2441 new_pid = find_get_pid(tmp); 2442 if (!new_pid) 2443 return -ESRCH; 2444 2445 put_pid(xchg(&cad_pid, new_pid)); 2446 return 0; 2447 } 2448 2449 #else /* CONFIG_PROC_FS */ 2450 2451 int proc_dostring(ctl_table *table, int write, struct file *filp, 2452 void __user *buffer, size_t *lenp, loff_t *ppos) 2453 { 2454 return -ENOSYS; 2455 } 2456 2457 static int proc_do_uts_string(ctl_table *table, int write, struct file *filp, 2458 void __user *buffer, size_t *lenp, loff_t *ppos) 2459 { 2460 return -ENOSYS; 2461 } 2462 2463 #ifdef CONFIG_SYSVIPC 2464 static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp, 2465 void __user *buffer, size_t *lenp, loff_t *ppos) 2466 { 2467 return -ENOSYS; 2468 } 2469 #endif 2470 2471 int proc_dointvec(ctl_table *table, int write, struct file *filp, 2472 void __user *buffer, size_t *lenp, loff_t *ppos) 2473 { 2474 return -ENOSYS; 2475 } 2476 2477 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp, 2478 void __user *buffer, size_t *lenp, loff_t *ppos) 2479 { 2480 return -ENOSYS; 2481 } 2482 2483 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp, 2484 void __user *buffer, size_t *lenp, loff_t *ppos) 2485 { 2486 return -ENOSYS; 2487 } 2488 2489 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp, 2490 void __user *buffer, size_t *lenp, loff_t *ppos) 2491 { 2492 return -ENOSYS; 2493 } 2494 2495 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp, 2496 void __user *buffer, size_t *lenp, loff_t *ppos) 2497 { 2498 return -ENOSYS; 2499 } 2500 2501 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp, 2502 void __user *buffer, size_t *lenp, loff_t *ppos) 2503 { 2504 return -ENOSYS; 2505 } 2506 2507 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp, 2508 void __user *buffer, size_t *lenp, loff_t *ppos) 2509 { 2510 return -ENOSYS; 2511 } 2512 2513 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write, 2514 struct file *filp, 2515 void __user *buffer, 2516 size_t *lenp, loff_t *ppos) 2517 { 2518 return -ENOSYS; 2519 } 2520 2521 2522 #endif /* CONFIG_PROC_FS */ 2523 2524 2525 #ifdef CONFIG_SYSCTL_SYSCALL 2526 /* 2527 * General sysctl support routines 2528 */ 2529 2530 /* The generic string strategy routine: */ 2531 int sysctl_string(ctl_table *table, int __user *name, int nlen, 2532 void __user *oldval, size_t __user *oldlenp, 2533 void __user *newval, size_t newlen, void **context) 2534 { 2535 if (!table->data || !table->maxlen) 2536 return -ENOTDIR; 2537 2538 if (oldval && oldlenp) { 2539 size_t bufsize; 2540 if (get_user(bufsize, oldlenp)) 2541 return -EFAULT; 2542 if (bufsize) { 2543 size_t len = strlen(table->data), copied; 2544 2545 /* This shouldn't trigger for a well-formed sysctl */ 2546 if (len > table->maxlen) 2547 len = table->maxlen; 2548 2549 /* Copy up to a max of bufsize-1 bytes of the string */ 2550 copied = (len >= bufsize) ? bufsize - 1 : len; 2551 2552 if (copy_to_user(oldval, table->data, copied) || 2553 put_user(0, (char __user *)(oldval + copied))) 2554 return -EFAULT; 2555 if (put_user(len, oldlenp)) 2556 return -EFAULT; 2557 } 2558 } 2559 if (newval && newlen) { 2560 size_t len = newlen; 2561 if (len > table->maxlen) 2562 len = table->maxlen; 2563 if(copy_from_user(table->data, newval, len)) 2564 return -EFAULT; 2565 if (len == table->maxlen) 2566 len--; 2567 ((char *) table->data)[len] = 0; 2568 } 2569 return 1; 2570 } 2571 2572 /* 2573 * This function makes sure that all of the integers in the vector 2574 * are between the minimum and maximum values given in the arrays 2575 * table->extra1 and table->extra2, respectively. 2576 */ 2577 int sysctl_intvec(ctl_table *table, int __user *name, int nlen, 2578 void __user *oldval, size_t __user *oldlenp, 2579 void __user *newval, size_t newlen, void **context) 2580 { 2581 2582 if (newval && newlen) { 2583 int __user *vec = (int __user *) newval; 2584 int *min = (int *) table->extra1; 2585 int *max = (int *) table->extra2; 2586 size_t length; 2587 int i; 2588 2589 if (newlen % sizeof(int) != 0) 2590 return -EINVAL; 2591 2592 if (!table->extra1 && !table->extra2) 2593 return 0; 2594 2595 if (newlen > table->maxlen) 2596 newlen = table->maxlen; 2597 length = newlen / sizeof(int); 2598 2599 for (i = 0; i < length; i++) { 2600 int value; 2601 if (get_user(value, vec + i)) 2602 return -EFAULT; 2603 if (min && value < min[i]) 2604 return -EINVAL; 2605 if (max && value > max[i]) 2606 return -EINVAL; 2607 } 2608 } 2609 return 0; 2610 } 2611 2612 /* Strategy function to convert jiffies to seconds */ 2613 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen, 2614 void __user *oldval, size_t __user *oldlenp, 2615 void __user *newval, size_t newlen, void **context) 2616 { 2617 if (oldval) { 2618 size_t olen; 2619 if (oldlenp) { 2620 if (get_user(olen, oldlenp)) 2621 return -EFAULT; 2622 if (olen!=sizeof(int)) 2623 return -EINVAL; 2624 } 2625 if (put_user(*(int *)(table->data)/HZ, (int __user *)oldval) || 2626 (oldlenp && put_user(sizeof(int),oldlenp))) 2627 return -EFAULT; 2628 } 2629 if (newval && newlen) { 2630 int new; 2631 if (newlen != sizeof(int)) 2632 return -EINVAL; 2633 if (get_user(new, (int __user *)newval)) 2634 return -EFAULT; 2635 *(int *)(table->data) = new*HZ; 2636 } 2637 return 1; 2638 } 2639 2640 /* Strategy function to convert jiffies to seconds */ 2641 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen, 2642 void __user *oldval, size_t __user *oldlenp, 2643 void __user *newval, size_t newlen, void **context) 2644 { 2645 if (oldval) { 2646 size_t olen; 2647 if (oldlenp) { 2648 if (get_user(olen, oldlenp)) 2649 return -EFAULT; 2650 if (olen!=sizeof(int)) 2651 return -EINVAL; 2652 } 2653 if (put_user(jiffies_to_msecs(*(int *)(table->data)), (int __user *)oldval) || 2654 (oldlenp && put_user(sizeof(int),oldlenp))) 2655 return -EFAULT; 2656 } 2657 if (newval && newlen) { 2658 int new; 2659 if (newlen != sizeof(int)) 2660 return -EINVAL; 2661 if (get_user(new, (int __user *)newval)) 2662 return -EFAULT; 2663 *(int *)(table->data) = msecs_to_jiffies(new); 2664 } 2665 return 1; 2666 } 2667 2668 #else /* CONFIG_SYSCTL_SYSCALL */ 2669 2670 2671 asmlinkage long sys_sysctl(struct __sysctl_args __user *args) 2672 { 2673 static int msg_count; 2674 struct __sysctl_args tmp; 2675 int name[CTL_MAXNAME]; 2676 int i; 2677 2678 /* Read in the sysctl name for better debug message logging */ 2679 if (copy_from_user(&tmp, args, sizeof(tmp))) 2680 return -EFAULT; 2681 if (tmp.nlen <= 0 || tmp.nlen >= CTL_MAXNAME) 2682 return -ENOTDIR; 2683 for (i = 0; i < tmp.nlen; i++) 2684 if (get_user(name[i], tmp.name + i)) 2685 return -EFAULT; 2686 2687 /* Ignore accesses to kernel.version */ 2688 if ((tmp.nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION)) 2689 goto out; 2690 2691 if (msg_count < 5) { 2692 msg_count++; 2693 printk(KERN_INFO 2694 "warning: process `%s' used the removed sysctl " 2695 "system call with ", current->comm); 2696 for (i = 0; i < tmp.nlen; i++) 2697 printk("%d.", name[i]); 2698 printk("\n"); 2699 } 2700 out: 2701 return -ENOSYS; 2702 } 2703 2704 int sysctl_string(ctl_table *table, int __user *name, int nlen, 2705 void __user *oldval, size_t __user *oldlenp, 2706 void __user *newval, size_t newlen, void **context) 2707 { 2708 return -ENOSYS; 2709 } 2710 2711 int sysctl_intvec(ctl_table *table, int __user *name, int nlen, 2712 void __user *oldval, size_t __user *oldlenp, 2713 void __user *newval, size_t newlen, void **context) 2714 { 2715 return -ENOSYS; 2716 } 2717 2718 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen, 2719 void __user *oldval, size_t __user *oldlenp, 2720 void __user *newval, size_t newlen, void **context) 2721 { 2722 return -ENOSYS; 2723 } 2724 2725 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen, 2726 void __user *oldval, size_t __user *oldlenp, 2727 void __user *newval, size_t newlen, void **context) 2728 { 2729 return -ENOSYS; 2730 } 2731 2732 #endif /* CONFIG_SYSCTL_SYSCALL */ 2733 2734 /* 2735 * No sense putting this after each symbol definition, twice, 2736 * exception granted :-) 2737 */ 2738 EXPORT_SYMBOL(proc_dointvec); 2739 EXPORT_SYMBOL(proc_dointvec_jiffies); 2740 EXPORT_SYMBOL(proc_dointvec_minmax); 2741 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies); 2742 EXPORT_SYMBOL(proc_dointvec_ms_jiffies); 2743 EXPORT_SYMBOL(proc_dostring); 2744 EXPORT_SYMBOL(proc_doulongvec_minmax); 2745 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax); 2746 EXPORT_SYMBOL(register_sysctl_table); 2747 EXPORT_SYMBOL(sysctl_intvec); 2748 EXPORT_SYMBOL(sysctl_jiffies); 2749 EXPORT_SYMBOL(sysctl_ms_jiffies); 2750 EXPORT_SYMBOL(sysctl_string); 2751 EXPORT_SYMBOL(unregister_sysctl_table); 2752