1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * /proc/sys support 4 */ 5 #include <linux/init.h> 6 #include <linux/sysctl.h> 7 #include <linux/poll.h> 8 #include <linux/proc_fs.h> 9 #include <linux/printk.h> 10 #include <linux/security.h> 11 #include <linux/sched.h> 12 #include <linux/cred.h> 13 #include <linux/namei.h> 14 #include <linux/mm.h> 15 #include <linux/module.h> 16 #include <linux/bpf-cgroup.h> 17 #include <linux/mount.h> 18 #include "internal.h" 19 20 static const struct dentry_operations proc_sys_dentry_operations; 21 static const struct file_operations proc_sys_file_operations; 22 static const struct inode_operations proc_sys_inode_operations; 23 static const struct file_operations proc_sys_dir_file_operations; 24 static const struct inode_operations proc_sys_dir_operations; 25 26 /* shared constants to be used in various sysctls */ 27 const int sysctl_vals[] = { 0, 1, INT_MAX }; 28 EXPORT_SYMBOL(sysctl_vals); 29 30 /* Support for permanently empty directories */ 31 32 struct ctl_table sysctl_mount_point[] = { 33 { } 34 }; 35 36 static bool is_empty_dir(struct ctl_table_header *head) 37 { 38 return head->ctl_table[0].child == sysctl_mount_point; 39 } 40 41 static void set_empty_dir(struct ctl_dir *dir) 42 { 43 dir->header.ctl_table[0].child = sysctl_mount_point; 44 } 45 46 static void clear_empty_dir(struct ctl_dir *dir) 47 48 { 49 dir->header.ctl_table[0].child = NULL; 50 } 51 52 void proc_sys_poll_notify(struct ctl_table_poll *poll) 53 { 54 if (!poll) 55 return; 56 57 atomic_inc(&poll->event); 58 wake_up_interruptible(&poll->wait); 59 } 60 61 static struct ctl_table root_table[] = { 62 { 63 .procname = "", 64 .mode = S_IFDIR|S_IRUGO|S_IXUGO, 65 }, 66 { } 67 }; 68 static struct ctl_table_root sysctl_table_root = { 69 .default_set.dir.header = { 70 {{.count = 1, 71 .nreg = 1, 72 .ctl_table = root_table }}, 73 .ctl_table_arg = root_table, 74 .root = &sysctl_table_root, 75 .set = &sysctl_table_root.default_set, 76 }, 77 }; 78 79 static DEFINE_SPINLOCK(sysctl_lock); 80 81 static void drop_sysctl_table(struct ctl_table_header *header); 82 static int sysctl_follow_link(struct ctl_table_header **phead, 83 struct ctl_table **pentry); 84 static int insert_links(struct ctl_table_header *head); 85 static void put_links(struct ctl_table_header *header); 86 87 static void sysctl_print_dir(struct ctl_dir *dir) 88 { 89 if (dir->header.parent) 90 sysctl_print_dir(dir->header.parent); 91 pr_cont("%s/", dir->header.ctl_table[0].procname); 92 } 93 94 static int namecmp(const char *name1, int len1, const char *name2, int len2) 95 { 96 int minlen; 97 int cmp; 98 99 minlen = len1; 100 if (minlen > len2) 101 minlen = len2; 102 103 cmp = memcmp(name1, name2, minlen); 104 if (cmp == 0) 105 cmp = len1 - len2; 106 return cmp; 107 } 108 109 /* Called under sysctl_lock */ 110 static struct ctl_table *find_entry(struct ctl_table_header **phead, 111 struct ctl_dir *dir, const char *name, int namelen) 112 { 113 struct ctl_table_header *head; 114 struct ctl_table *entry; 115 struct rb_node *node = dir->root.rb_node; 116 117 while (node) 118 { 119 struct ctl_node *ctl_node; 120 const char *procname; 121 int cmp; 122 123 ctl_node = rb_entry(node, struct ctl_node, node); 124 head = ctl_node->header; 125 entry = &head->ctl_table[ctl_node - head->node]; 126 procname = entry->procname; 127 128 cmp = namecmp(name, namelen, procname, strlen(procname)); 129 if (cmp < 0) 130 node = node->rb_left; 131 else if (cmp > 0) 132 node = node->rb_right; 133 else { 134 *phead = head; 135 return entry; 136 } 137 } 138 return NULL; 139 } 140 141 static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry) 142 { 143 struct rb_node *node = &head->node[entry - head->ctl_table].node; 144 struct rb_node **p = &head->parent->root.rb_node; 145 struct rb_node *parent = NULL; 146 const char *name = entry->procname; 147 int namelen = strlen(name); 148 149 while (*p) { 150 struct ctl_table_header *parent_head; 151 struct ctl_table *parent_entry; 152 struct ctl_node *parent_node; 153 const char *parent_name; 154 int cmp; 155 156 parent = *p; 157 parent_node = rb_entry(parent, struct ctl_node, node); 158 parent_head = parent_node->header; 159 parent_entry = &parent_head->ctl_table[parent_node - parent_head->node]; 160 parent_name = parent_entry->procname; 161 162 cmp = namecmp(name, namelen, parent_name, strlen(parent_name)); 163 if (cmp < 0) 164 p = &(*p)->rb_left; 165 else if (cmp > 0) 166 p = &(*p)->rb_right; 167 else { 168 pr_err("sysctl duplicate entry: "); 169 sysctl_print_dir(head->parent); 170 pr_cont("/%s\n", entry->procname); 171 return -EEXIST; 172 } 173 } 174 175 rb_link_node(node, parent, p); 176 rb_insert_color(node, &head->parent->root); 177 return 0; 178 } 179 180 static void erase_entry(struct ctl_table_header *head, struct ctl_table *entry) 181 { 182 struct rb_node *node = &head->node[entry - head->ctl_table].node; 183 184 rb_erase(node, &head->parent->root); 185 } 186 187 static void init_header(struct ctl_table_header *head, 188 struct ctl_table_root *root, struct ctl_table_set *set, 189 struct ctl_node *node, struct ctl_table *table) 190 { 191 head->ctl_table = table; 192 head->ctl_table_arg = table; 193 head->used = 0; 194 head->count = 1; 195 head->nreg = 1; 196 head->unregistering = NULL; 197 head->root = root; 198 head->set = set; 199 head->parent = NULL; 200 head->node = node; 201 INIT_HLIST_HEAD(&head->inodes); 202 if (node) { 203 struct ctl_table *entry; 204 for (entry = table; entry->procname; entry++, node++) 205 node->header = head; 206 } 207 } 208 209 static void erase_header(struct ctl_table_header *head) 210 { 211 struct ctl_table *entry; 212 for (entry = head->ctl_table; entry->procname; entry++) 213 erase_entry(head, entry); 214 } 215 216 static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) 217 { 218 struct ctl_table *entry; 219 int err; 220 221 /* Is this a permanently empty directory? */ 222 if (is_empty_dir(&dir->header)) 223 return -EROFS; 224 225 /* Am I creating a permanently empty directory? */ 226 if (header->ctl_table == sysctl_mount_point) { 227 if (!RB_EMPTY_ROOT(&dir->root)) 228 return -EINVAL; 229 set_empty_dir(dir); 230 } 231 232 dir->header.nreg++; 233 header->parent = dir; 234 err = insert_links(header); 235 if (err) 236 goto fail_links; 237 for (entry = header->ctl_table; entry->procname; entry++) { 238 err = insert_entry(header, entry); 239 if (err) 240 goto fail; 241 } 242 return 0; 243 fail: 244 erase_header(header); 245 put_links(header); 246 fail_links: 247 if (header->ctl_table == sysctl_mount_point) 248 clear_empty_dir(dir); 249 header->parent = NULL; 250 drop_sysctl_table(&dir->header); 251 return err; 252 } 253 254 /* called under sysctl_lock */ 255 static int use_table(struct ctl_table_header *p) 256 { 257 if (unlikely(p->unregistering)) 258 return 0; 259 p->used++; 260 return 1; 261 } 262 263 /* called under sysctl_lock */ 264 static void unuse_table(struct ctl_table_header *p) 265 { 266 if (!--p->used) 267 if (unlikely(p->unregistering)) 268 complete(p->unregistering); 269 } 270 271 static void proc_sys_invalidate_dcache(struct ctl_table_header *head) 272 { 273 proc_invalidate_siblings_dcache(&head->inodes, &sysctl_lock); 274 } 275 276 /* called under sysctl_lock, will reacquire if has to wait */ 277 static void start_unregistering(struct ctl_table_header *p) 278 { 279 /* 280 * if p->used is 0, nobody will ever touch that entry again; 281 * we'll eliminate all paths to it before dropping sysctl_lock 282 */ 283 if (unlikely(p->used)) { 284 struct completion wait; 285 init_completion(&wait); 286 p->unregistering = &wait; 287 spin_unlock(&sysctl_lock); 288 wait_for_completion(&wait); 289 } else { 290 /* anything non-NULL; we'll never dereference it */ 291 p->unregistering = ERR_PTR(-EINVAL); 292 spin_unlock(&sysctl_lock); 293 } 294 /* 295 * Invalidate dentries for unregistered sysctls: namespaced sysctls 296 * can have duplicate names and contaminate dcache very badly. 297 */ 298 proc_sys_invalidate_dcache(p); 299 /* 300 * do not remove from the list until nobody holds it; walking the 301 * list in do_sysctl() relies on that. 302 */ 303 spin_lock(&sysctl_lock); 304 erase_header(p); 305 } 306 307 static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) 308 { 309 BUG_ON(!head); 310 spin_lock(&sysctl_lock); 311 if (!use_table(head)) 312 head = ERR_PTR(-ENOENT); 313 spin_unlock(&sysctl_lock); 314 return head; 315 } 316 317 static void sysctl_head_finish(struct ctl_table_header *head) 318 { 319 if (!head) 320 return; 321 spin_lock(&sysctl_lock); 322 unuse_table(head); 323 spin_unlock(&sysctl_lock); 324 } 325 326 static struct ctl_table_set * 327 lookup_header_set(struct ctl_table_root *root) 328 { 329 struct ctl_table_set *set = &root->default_set; 330 if (root->lookup) 331 set = root->lookup(root); 332 return set; 333 } 334 335 static struct ctl_table *lookup_entry(struct ctl_table_header **phead, 336 struct ctl_dir *dir, 337 const char *name, int namelen) 338 { 339 struct ctl_table_header *head; 340 struct ctl_table *entry; 341 342 spin_lock(&sysctl_lock); 343 entry = find_entry(&head, dir, name, namelen); 344 if (entry && use_table(head)) 345 *phead = head; 346 else 347 entry = NULL; 348 spin_unlock(&sysctl_lock); 349 return entry; 350 } 351 352 static struct ctl_node *first_usable_entry(struct rb_node *node) 353 { 354 struct ctl_node *ctl_node; 355 356 for (;node; node = rb_next(node)) { 357 ctl_node = rb_entry(node, struct ctl_node, node); 358 if (use_table(ctl_node->header)) 359 return ctl_node; 360 } 361 return NULL; 362 } 363 364 static void first_entry(struct ctl_dir *dir, 365 struct ctl_table_header **phead, struct ctl_table **pentry) 366 { 367 struct ctl_table_header *head = NULL; 368 struct ctl_table *entry = NULL; 369 struct ctl_node *ctl_node; 370 371 spin_lock(&sysctl_lock); 372 ctl_node = first_usable_entry(rb_first(&dir->root)); 373 spin_unlock(&sysctl_lock); 374 if (ctl_node) { 375 head = ctl_node->header; 376 entry = &head->ctl_table[ctl_node - head->node]; 377 } 378 *phead = head; 379 *pentry = entry; 380 } 381 382 static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentry) 383 { 384 struct ctl_table_header *head = *phead; 385 struct ctl_table *entry = *pentry; 386 struct ctl_node *ctl_node = &head->node[entry - head->ctl_table]; 387 388 spin_lock(&sysctl_lock); 389 unuse_table(head); 390 391 ctl_node = first_usable_entry(rb_next(&ctl_node->node)); 392 spin_unlock(&sysctl_lock); 393 head = NULL; 394 if (ctl_node) { 395 head = ctl_node->header; 396 entry = &head->ctl_table[ctl_node - head->node]; 397 } 398 *phead = head; 399 *pentry = entry; 400 } 401 402 /* 403 * sysctl_perm does NOT grant the superuser all rights automatically, because 404 * some sysctl variables are readonly even to root. 405 */ 406 407 static int test_perm(int mode, int op) 408 { 409 if (uid_eq(current_euid(), GLOBAL_ROOT_UID)) 410 mode >>= 6; 411 else if (in_egroup_p(GLOBAL_ROOT_GID)) 412 mode >>= 3; 413 if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0) 414 return 0; 415 return -EACCES; 416 } 417 418 static int sysctl_perm(struct ctl_table_header *head, struct ctl_table *table, int op) 419 { 420 struct ctl_table_root *root = head->root; 421 int mode; 422 423 if (root->permissions) 424 mode = root->permissions(head, table); 425 else 426 mode = table->mode; 427 428 return test_perm(mode, op); 429 } 430 431 static struct inode *proc_sys_make_inode(struct super_block *sb, 432 struct ctl_table_header *head, struct ctl_table *table) 433 { 434 struct ctl_table_root *root = head->root; 435 struct inode *inode; 436 struct proc_inode *ei; 437 438 inode = new_inode(sb); 439 if (!inode) 440 return ERR_PTR(-ENOMEM); 441 442 inode->i_ino = get_next_ino(); 443 444 ei = PROC_I(inode); 445 446 spin_lock(&sysctl_lock); 447 if (unlikely(head->unregistering)) { 448 spin_unlock(&sysctl_lock); 449 iput(inode); 450 return ERR_PTR(-ENOENT); 451 } 452 ei->sysctl = head; 453 ei->sysctl_entry = table; 454 hlist_add_head_rcu(&ei->sibling_inodes, &head->inodes); 455 head->count++; 456 spin_unlock(&sysctl_lock); 457 458 inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); 459 inode->i_mode = table->mode; 460 if (!S_ISDIR(table->mode)) { 461 inode->i_mode |= S_IFREG; 462 inode->i_op = &proc_sys_inode_operations; 463 inode->i_fop = &proc_sys_file_operations; 464 } else { 465 inode->i_mode |= S_IFDIR; 466 inode->i_op = &proc_sys_dir_operations; 467 inode->i_fop = &proc_sys_dir_file_operations; 468 if (is_empty_dir(head)) 469 make_empty_dir_inode(inode); 470 } 471 472 if (root->set_ownership) 473 root->set_ownership(head, table, &inode->i_uid, &inode->i_gid); 474 else { 475 inode->i_uid = GLOBAL_ROOT_UID; 476 inode->i_gid = GLOBAL_ROOT_GID; 477 } 478 479 return inode; 480 } 481 482 void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head) 483 { 484 spin_lock(&sysctl_lock); 485 hlist_del_init_rcu(&PROC_I(inode)->sibling_inodes); 486 if (!--head->count) 487 kfree_rcu(head, rcu); 488 spin_unlock(&sysctl_lock); 489 } 490 491 static struct ctl_table_header *grab_header(struct inode *inode) 492 { 493 struct ctl_table_header *head = PROC_I(inode)->sysctl; 494 if (!head) 495 head = &sysctl_table_root.default_set.dir.header; 496 return sysctl_head_grab(head); 497 } 498 499 static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, 500 unsigned int flags) 501 { 502 struct ctl_table_header *head = grab_header(dir); 503 struct ctl_table_header *h = NULL; 504 const struct qstr *name = &dentry->d_name; 505 struct ctl_table *p; 506 struct inode *inode; 507 struct dentry *err = ERR_PTR(-ENOENT); 508 struct ctl_dir *ctl_dir; 509 int ret; 510 511 if (IS_ERR(head)) 512 return ERR_CAST(head); 513 514 ctl_dir = container_of(head, struct ctl_dir, header); 515 516 p = lookup_entry(&h, ctl_dir, name->name, name->len); 517 if (!p) 518 goto out; 519 520 if (S_ISLNK(p->mode)) { 521 ret = sysctl_follow_link(&h, &p); 522 err = ERR_PTR(ret); 523 if (ret) 524 goto out; 525 } 526 527 inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); 528 if (IS_ERR(inode)) { 529 err = ERR_CAST(inode); 530 goto out; 531 } 532 533 d_set_d_op(dentry, &proc_sys_dentry_operations); 534 err = d_splice_alias(inode, dentry); 535 536 out: 537 if (h) 538 sysctl_head_finish(h); 539 sysctl_head_finish(head); 540 return err; 541 } 542 543 static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf, 544 size_t count, loff_t *ppos, int write) 545 { 546 struct inode *inode = file_inode(filp); 547 struct ctl_table_header *head = grab_header(inode); 548 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 549 void *kbuf; 550 ssize_t error; 551 552 if (IS_ERR(head)) 553 return PTR_ERR(head); 554 555 /* 556 * At this point we know that the sysctl was not unregistered 557 * and won't be until we finish. 558 */ 559 error = -EPERM; 560 if (sysctl_perm(head, table, write ? MAY_WRITE : MAY_READ)) 561 goto out; 562 563 /* if that can happen at all, it should be -EINVAL, not -EISDIR */ 564 error = -EINVAL; 565 if (!table->proc_handler) 566 goto out; 567 568 if (write) { 569 kbuf = memdup_user_nul(ubuf, count); 570 if (IS_ERR(kbuf)) { 571 error = PTR_ERR(kbuf); 572 goto out; 573 } 574 } else { 575 error = -ENOMEM; 576 kbuf = kzalloc(count, GFP_KERNEL); 577 if (!kbuf) 578 goto out; 579 } 580 581 error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, &kbuf, &count, 582 ppos); 583 if (error) 584 goto out_free_buf; 585 586 /* careful: calling conventions are nasty here */ 587 error = table->proc_handler(table, write, kbuf, &count, ppos); 588 if (error) 589 goto out_free_buf; 590 591 if (!write) { 592 error = -EFAULT; 593 if (copy_to_user(ubuf, kbuf, count)) 594 goto out_free_buf; 595 } 596 597 error = count; 598 out_free_buf: 599 kfree(kbuf); 600 out: 601 sysctl_head_finish(head); 602 603 return error; 604 } 605 606 static ssize_t proc_sys_read(struct file *filp, char __user *buf, 607 size_t count, loff_t *ppos) 608 { 609 return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 0); 610 } 611 612 static ssize_t proc_sys_write(struct file *filp, const char __user *buf, 613 size_t count, loff_t *ppos) 614 { 615 return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1); 616 } 617 618 static int proc_sys_open(struct inode *inode, struct file *filp) 619 { 620 struct ctl_table_header *head = grab_header(inode); 621 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 622 623 /* sysctl was unregistered */ 624 if (IS_ERR(head)) 625 return PTR_ERR(head); 626 627 if (table->poll) 628 filp->private_data = proc_sys_poll_event(table->poll); 629 630 sysctl_head_finish(head); 631 632 return 0; 633 } 634 635 static __poll_t proc_sys_poll(struct file *filp, poll_table *wait) 636 { 637 struct inode *inode = file_inode(filp); 638 struct ctl_table_header *head = grab_header(inode); 639 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 640 __poll_t ret = DEFAULT_POLLMASK; 641 unsigned long event; 642 643 /* sysctl was unregistered */ 644 if (IS_ERR(head)) 645 return EPOLLERR | EPOLLHUP; 646 647 if (!table->proc_handler) 648 goto out; 649 650 if (!table->poll) 651 goto out; 652 653 event = (unsigned long)filp->private_data; 654 poll_wait(filp, &table->poll->wait, wait); 655 656 if (event != atomic_read(&table->poll->event)) { 657 filp->private_data = proc_sys_poll_event(table->poll); 658 ret = EPOLLIN | EPOLLRDNORM | EPOLLERR | EPOLLPRI; 659 } 660 661 out: 662 sysctl_head_finish(head); 663 664 return ret; 665 } 666 667 static bool proc_sys_fill_cache(struct file *file, 668 struct dir_context *ctx, 669 struct ctl_table_header *head, 670 struct ctl_table *table) 671 { 672 struct dentry *child, *dir = file->f_path.dentry; 673 struct inode *inode; 674 struct qstr qname; 675 ino_t ino = 0; 676 unsigned type = DT_UNKNOWN; 677 678 qname.name = table->procname; 679 qname.len = strlen(table->procname); 680 qname.hash = full_name_hash(dir, qname.name, qname.len); 681 682 child = d_lookup(dir, &qname); 683 if (!child) { 684 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); 685 child = d_alloc_parallel(dir, &qname, &wq); 686 if (IS_ERR(child)) 687 return false; 688 if (d_in_lookup(child)) { 689 struct dentry *res; 690 inode = proc_sys_make_inode(dir->d_sb, head, table); 691 if (IS_ERR(inode)) { 692 d_lookup_done(child); 693 dput(child); 694 return false; 695 } 696 d_set_d_op(child, &proc_sys_dentry_operations); 697 res = d_splice_alias(inode, child); 698 d_lookup_done(child); 699 if (unlikely(res)) { 700 if (IS_ERR(res)) { 701 dput(child); 702 return false; 703 } 704 dput(child); 705 child = res; 706 } 707 } 708 } 709 inode = d_inode(child); 710 ino = inode->i_ino; 711 type = inode->i_mode >> 12; 712 dput(child); 713 return dir_emit(ctx, qname.name, qname.len, ino, type); 714 } 715 716 static bool proc_sys_link_fill_cache(struct file *file, 717 struct dir_context *ctx, 718 struct ctl_table_header *head, 719 struct ctl_table *table) 720 { 721 bool ret = true; 722 723 head = sysctl_head_grab(head); 724 if (IS_ERR(head)) 725 return false; 726 727 /* It is not an error if we can not follow the link ignore it */ 728 if (sysctl_follow_link(&head, &table)) 729 goto out; 730 731 ret = proc_sys_fill_cache(file, ctx, head, table); 732 out: 733 sysctl_head_finish(head); 734 return ret; 735 } 736 737 static int scan(struct ctl_table_header *head, struct ctl_table *table, 738 unsigned long *pos, struct file *file, 739 struct dir_context *ctx) 740 { 741 bool res; 742 743 if ((*pos)++ < ctx->pos) 744 return true; 745 746 if (unlikely(S_ISLNK(table->mode))) 747 res = proc_sys_link_fill_cache(file, ctx, head, table); 748 else 749 res = proc_sys_fill_cache(file, ctx, head, table); 750 751 if (res) 752 ctx->pos = *pos; 753 754 return res; 755 } 756 757 static int proc_sys_readdir(struct file *file, struct dir_context *ctx) 758 { 759 struct ctl_table_header *head = grab_header(file_inode(file)); 760 struct ctl_table_header *h = NULL; 761 struct ctl_table *entry; 762 struct ctl_dir *ctl_dir; 763 unsigned long pos; 764 765 if (IS_ERR(head)) 766 return PTR_ERR(head); 767 768 ctl_dir = container_of(head, struct ctl_dir, header); 769 770 if (!dir_emit_dots(file, ctx)) 771 goto out; 772 773 pos = 2; 774 775 for (first_entry(ctl_dir, &h, &entry); h; next_entry(&h, &entry)) { 776 if (!scan(h, entry, &pos, file, ctx)) { 777 sysctl_head_finish(h); 778 break; 779 } 780 } 781 out: 782 sysctl_head_finish(head); 783 return 0; 784 } 785 786 static int proc_sys_permission(struct inode *inode, int mask) 787 { 788 /* 789 * sysctl entries that are not writeable, 790 * are _NOT_ writeable, capabilities or not. 791 */ 792 struct ctl_table_header *head; 793 struct ctl_table *table; 794 int error; 795 796 /* Executable files are not allowed under /proc/sys/ */ 797 if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) 798 return -EACCES; 799 800 head = grab_header(inode); 801 if (IS_ERR(head)) 802 return PTR_ERR(head); 803 804 table = PROC_I(inode)->sysctl_entry; 805 if (!table) /* global root - r-xr-xr-x */ 806 error = mask & MAY_WRITE ? -EACCES : 0; 807 else /* Use the permissions on the sysctl table entry */ 808 error = sysctl_perm(head, table, mask & ~MAY_NOT_BLOCK); 809 810 sysctl_head_finish(head); 811 return error; 812 } 813 814 static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr) 815 { 816 struct inode *inode = d_inode(dentry); 817 int error; 818 819 if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)) 820 return -EPERM; 821 822 error = setattr_prepare(dentry, attr); 823 if (error) 824 return error; 825 826 setattr_copy(inode, attr); 827 mark_inode_dirty(inode); 828 return 0; 829 } 830 831 static int proc_sys_getattr(const struct path *path, struct kstat *stat, 832 u32 request_mask, unsigned int query_flags) 833 { 834 struct inode *inode = d_inode(path->dentry); 835 struct ctl_table_header *head = grab_header(inode); 836 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 837 838 if (IS_ERR(head)) 839 return PTR_ERR(head); 840 841 generic_fillattr(inode, stat); 842 if (table) 843 stat->mode = (stat->mode & S_IFMT) | table->mode; 844 845 sysctl_head_finish(head); 846 return 0; 847 } 848 849 static const struct file_operations proc_sys_file_operations = { 850 .open = proc_sys_open, 851 .poll = proc_sys_poll, 852 .read = proc_sys_read, 853 .write = proc_sys_write, 854 .llseek = default_llseek, 855 }; 856 857 static const struct file_operations proc_sys_dir_file_operations = { 858 .read = generic_read_dir, 859 .iterate_shared = proc_sys_readdir, 860 .llseek = generic_file_llseek, 861 }; 862 863 static const struct inode_operations proc_sys_inode_operations = { 864 .permission = proc_sys_permission, 865 .setattr = proc_sys_setattr, 866 .getattr = proc_sys_getattr, 867 }; 868 869 static const struct inode_operations proc_sys_dir_operations = { 870 .lookup = proc_sys_lookup, 871 .permission = proc_sys_permission, 872 .setattr = proc_sys_setattr, 873 .getattr = proc_sys_getattr, 874 }; 875 876 static int proc_sys_revalidate(struct dentry *dentry, unsigned int flags) 877 { 878 if (flags & LOOKUP_RCU) 879 return -ECHILD; 880 return !PROC_I(d_inode(dentry))->sysctl->unregistering; 881 } 882 883 static int proc_sys_delete(const struct dentry *dentry) 884 { 885 return !!PROC_I(d_inode(dentry))->sysctl->unregistering; 886 } 887 888 static int sysctl_is_seen(struct ctl_table_header *p) 889 { 890 struct ctl_table_set *set = p->set; 891 int res; 892 spin_lock(&sysctl_lock); 893 if (p->unregistering) 894 res = 0; 895 else if (!set->is_seen) 896 res = 1; 897 else 898 res = set->is_seen(set); 899 spin_unlock(&sysctl_lock); 900 return res; 901 } 902 903 static int proc_sys_compare(const struct dentry *dentry, 904 unsigned int len, const char *str, const struct qstr *name) 905 { 906 struct ctl_table_header *head; 907 struct inode *inode; 908 909 /* Although proc doesn't have negative dentries, rcu-walk means 910 * that inode here can be NULL */ 911 /* AV: can it, indeed? */ 912 inode = d_inode_rcu(dentry); 913 if (!inode) 914 return 1; 915 if (name->len != len) 916 return 1; 917 if (memcmp(name->name, str, len)) 918 return 1; 919 head = rcu_dereference(PROC_I(inode)->sysctl); 920 return !head || !sysctl_is_seen(head); 921 } 922 923 static const struct dentry_operations proc_sys_dentry_operations = { 924 .d_revalidate = proc_sys_revalidate, 925 .d_delete = proc_sys_delete, 926 .d_compare = proc_sys_compare, 927 }; 928 929 static struct ctl_dir *find_subdir(struct ctl_dir *dir, 930 const char *name, int namelen) 931 { 932 struct ctl_table_header *head; 933 struct ctl_table *entry; 934 935 entry = find_entry(&head, dir, name, namelen); 936 if (!entry) 937 return ERR_PTR(-ENOENT); 938 if (!S_ISDIR(entry->mode)) 939 return ERR_PTR(-ENOTDIR); 940 return container_of(head, struct ctl_dir, header); 941 } 942 943 static struct ctl_dir *new_dir(struct ctl_table_set *set, 944 const char *name, int namelen) 945 { 946 struct ctl_table *table; 947 struct ctl_dir *new; 948 struct ctl_node *node; 949 char *new_name; 950 951 new = kzalloc(sizeof(*new) + sizeof(struct ctl_node) + 952 sizeof(struct ctl_table)*2 + namelen + 1, 953 GFP_KERNEL); 954 if (!new) 955 return NULL; 956 957 node = (struct ctl_node *)(new + 1); 958 table = (struct ctl_table *)(node + 1); 959 new_name = (char *)(table + 2); 960 memcpy(new_name, name, namelen); 961 new_name[namelen] = '\0'; 962 table[0].procname = new_name; 963 table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO; 964 init_header(&new->header, set->dir.header.root, set, node, table); 965 966 return new; 967 } 968 969 /** 970 * get_subdir - find or create a subdir with the specified name. 971 * @dir: Directory to create the subdirectory in 972 * @name: The name of the subdirectory to find or create 973 * @namelen: The length of name 974 * 975 * Takes a directory with an elevated reference count so we know that 976 * if we drop the lock the directory will not go away. Upon success 977 * the reference is moved from @dir to the returned subdirectory. 978 * Upon error an error code is returned and the reference on @dir is 979 * simply dropped. 980 */ 981 static struct ctl_dir *get_subdir(struct ctl_dir *dir, 982 const char *name, int namelen) 983 { 984 struct ctl_table_set *set = dir->header.set; 985 struct ctl_dir *subdir, *new = NULL; 986 int err; 987 988 spin_lock(&sysctl_lock); 989 subdir = find_subdir(dir, name, namelen); 990 if (!IS_ERR(subdir)) 991 goto found; 992 if (PTR_ERR(subdir) != -ENOENT) 993 goto failed; 994 995 spin_unlock(&sysctl_lock); 996 new = new_dir(set, name, namelen); 997 spin_lock(&sysctl_lock); 998 subdir = ERR_PTR(-ENOMEM); 999 if (!new) 1000 goto failed; 1001 1002 /* Was the subdir added while we dropped the lock? */ 1003 subdir = find_subdir(dir, name, namelen); 1004 if (!IS_ERR(subdir)) 1005 goto found; 1006 if (PTR_ERR(subdir) != -ENOENT) 1007 goto failed; 1008 1009 /* Nope. Use the our freshly made directory entry. */ 1010 err = insert_header(dir, &new->header); 1011 subdir = ERR_PTR(err); 1012 if (err) 1013 goto failed; 1014 subdir = new; 1015 found: 1016 subdir->header.nreg++; 1017 failed: 1018 if (IS_ERR(subdir)) { 1019 pr_err("sysctl could not get directory: "); 1020 sysctl_print_dir(dir); 1021 pr_cont("/%*.*s %ld\n", 1022 namelen, namelen, name, PTR_ERR(subdir)); 1023 } 1024 drop_sysctl_table(&dir->header); 1025 if (new) 1026 drop_sysctl_table(&new->header); 1027 spin_unlock(&sysctl_lock); 1028 return subdir; 1029 } 1030 1031 static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir) 1032 { 1033 struct ctl_dir *parent; 1034 const char *procname; 1035 if (!dir->header.parent) 1036 return &set->dir; 1037 parent = xlate_dir(set, dir->header.parent); 1038 if (IS_ERR(parent)) 1039 return parent; 1040 procname = dir->header.ctl_table[0].procname; 1041 return find_subdir(parent, procname, strlen(procname)); 1042 } 1043 1044 static int sysctl_follow_link(struct ctl_table_header **phead, 1045 struct ctl_table **pentry) 1046 { 1047 struct ctl_table_header *head; 1048 struct ctl_table_root *root; 1049 struct ctl_table_set *set; 1050 struct ctl_table *entry; 1051 struct ctl_dir *dir; 1052 int ret; 1053 1054 ret = 0; 1055 spin_lock(&sysctl_lock); 1056 root = (*pentry)->data; 1057 set = lookup_header_set(root); 1058 dir = xlate_dir(set, (*phead)->parent); 1059 if (IS_ERR(dir)) 1060 ret = PTR_ERR(dir); 1061 else { 1062 const char *procname = (*pentry)->procname; 1063 head = NULL; 1064 entry = find_entry(&head, dir, procname, strlen(procname)); 1065 ret = -ENOENT; 1066 if (entry && use_table(head)) { 1067 unuse_table(*phead); 1068 *phead = head; 1069 *pentry = entry; 1070 ret = 0; 1071 } 1072 } 1073 1074 spin_unlock(&sysctl_lock); 1075 return ret; 1076 } 1077 1078 static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...) 1079 { 1080 struct va_format vaf; 1081 va_list args; 1082 1083 va_start(args, fmt); 1084 vaf.fmt = fmt; 1085 vaf.va = &args; 1086 1087 pr_err("sysctl table check failed: %s/%s %pV\n", 1088 path, table->procname, &vaf); 1089 1090 va_end(args); 1091 return -EINVAL; 1092 } 1093 1094 static int sysctl_check_table_array(const char *path, struct ctl_table *table) 1095 { 1096 int err = 0; 1097 1098 if ((table->proc_handler == proc_douintvec) || 1099 (table->proc_handler == proc_douintvec_minmax)) { 1100 if (table->maxlen != sizeof(unsigned int)) 1101 err |= sysctl_err(path, table, "array not allowed"); 1102 } 1103 1104 return err; 1105 } 1106 1107 static int sysctl_check_table(const char *path, struct ctl_table *table) 1108 { 1109 int err = 0; 1110 for (; table->procname; table++) { 1111 if (table->child) 1112 err |= sysctl_err(path, table, "Not a file"); 1113 1114 if ((table->proc_handler == proc_dostring) || 1115 (table->proc_handler == proc_dointvec) || 1116 (table->proc_handler == proc_douintvec) || 1117 (table->proc_handler == proc_douintvec_minmax) || 1118 (table->proc_handler == proc_dointvec_minmax) || 1119 (table->proc_handler == proc_dointvec_jiffies) || 1120 (table->proc_handler == proc_dointvec_userhz_jiffies) || 1121 (table->proc_handler == proc_dointvec_ms_jiffies) || 1122 (table->proc_handler == proc_doulongvec_minmax) || 1123 (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) { 1124 if (!table->data) 1125 err |= sysctl_err(path, table, "No data"); 1126 if (!table->maxlen) 1127 err |= sysctl_err(path, table, "No maxlen"); 1128 else 1129 err |= sysctl_check_table_array(path, table); 1130 } 1131 if (!table->proc_handler) 1132 err |= sysctl_err(path, table, "No proc_handler"); 1133 1134 if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode) 1135 err |= sysctl_err(path, table, "bogus .mode 0%o", 1136 table->mode); 1137 } 1138 return err; 1139 } 1140 1141 static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table *table, 1142 struct ctl_table_root *link_root) 1143 { 1144 struct ctl_table *link_table, *entry, *link; 1145 struct ctl_table_header *links; 1146 struct ctl_node *node; 1147 char *link_name; 1148 int nr_entries, name_bytes; 1149 1150 name_bytes = 0; 1151 nr_entries = 0; 1152 for (entry = table; entry->procname; entry++) { 1153 nr_entries++; 1154 name_bytes += strlen(entry->procname) + 1; 1155 } 1156 1157 links = kzalloc(sizeof(struct ctl_table_header) + 1158 sizeof(struct ctl_node)*nr_entries + 1159 sizeof(struct ctl_table)*(nr_entries + 1) + 1160 name_bytes, 1161 GFP_KERNEL); 1162 1163 if (!links) 1164 return NULL; 1165 1166 node = (struct ctl_node *)(links + 1); 1167 link_table = (struct ctl_table *)(node + nr_entries); 1168 link_name = (char *)&link_table[nr_entries + 1]; 1169 1170 for (link = link_table, entry = table; entry->procname; link++, entry++) { 1171 int len = strlen(entry->procname) + 1; 1172 memcpy(link_name, entry->procname, len); 1173 link->procname = link_name; 1174 link->mode = S_IFLNK|S_IRWXUGO; 1175 link->data = link_root; 1176 link_name += len; 1177 } 1178 init_header(links, dir->header.root, dir->header.set, node, link_table); 1179 links->nreg = nr_entries; 1180 1181 return links; 1182 } 1183 1184 static bool get_links(struct ctl_dir *dir, 1185 struct ctl_table *table, struct ctl_table_root *link_root) 1186 { 1187 struct ctl_table_header *head; 1188 struct ctl_table *entry, *link; 1189 1190 /* Are there links available for every entry in table? */ 1191 for (entry = table; entry->procname; entry++) { 1192 const char *procname = entry->procname; 1193 link = find_entry(&head, dir, procname, strlen(procname)); 1194 if (!link) 1195 return false; 1196 if (S_ISDIR(link->mode) && S_ISDIR(entry->mode)) 1197 continue; 1198 if (S_ISLNK(link->mode) && (link->data == link_root)) 1199 continue; 1200 return false; 1201 } 1202 1203 /* The checks passed. Increase the registration count on the links */ 1204 for (entry = table; entry->procname; entry++) { 1205 const char *procname = entry->procname; 1206 link = find_entry(&head, dir, procname, strlen(procname)); 1207 head->nreg++; 1208 } 1209 return true; 1210 } 1211 1212 static int insert_links(struct ctl_table_header *head) 1213 { 1214 struct ctl_table_set *root_set = &sysctl_table_root.default_set; 1215 struct ctl_dir *core_parent = NULL; 1216 struct ctl_table_header *links; 1217 int err; 1218 1219 if (head->set == root_set) 1220 return 0; 1221 1222 core_parent = xlate_dir(root_set, head->parent); 1223 if (IS_ERR(core_parent)) 1224 return 0; 1225 1226 if (get_links(core_parent, head->ctl_table, head->root)) 1227 return 0; 1228 1229 core_parent->header.nreg++; 1230 spin_unlock(&sysctl_lock); 1231 1232 links = new_links(core_parent, head->ctl_table, head->root); 1233 1234 spin_lock(&sysctl_lock); 1235 err = -ENOMEM; 1236 if (!links) 1237 goto out; 1238 1239 err = 0; 1240 if (get_links(core_parent, head->ctl_table, head->root)) { 1241 kfree(links); 1242 goto out; 1243 } 1244 1245 err = insert_header(core_parent, links); 1246 if (err) 1247 kfree(links); 1248 out: 1249 drop_sysctl_table(&core_parent->header); 1250 return err; 1251 } 1252 1253 /** 1254 * __register_sysctl_table - register a leaf sysctl table 1255 * @set: Sysctl tree to register on 1256 * @path: The path to the directory the sysctl table is in. 1257 * @table: the top-level table structure 1258 * 1259 * Register a sysctl table hierarchy. @table should be a filled in ctl_table 1260 * array. A completely 0 filled entry terminates the table. 1261 * 1262 * The members of the &struct ctl_table structure are used as follows: 1263 * 1264 * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not 1265 * enter a sysctl file 1266 * 1267 * data - a pointer to data for use by proc_handler 1268 * 1269 * maxlen - the maximum size in bytes of the data 1270 * 1271 * mode - the file permissions for the /proc/sys file 1272 * 1273 * child - must be %NULL. 1274 * 1275 * proc_handler - the text handler routine (described below) 1276 * 1277 * extra1, extra2 - extra pointers usable by the proc handler routines 1278 * 1279 * Leaf nodes in the sysctl tree will be represented by a single file 1280 * under /proc; non-leaf nodes will be represented by directories. 1281 * 1282 * There must be a proc_handler routine for any terminal nodes. 1283 * Several default handlers are available to cover common cases - 1284 * 1285 * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), 1286 * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), 1287 * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax() 1288 * 1289 * It is the handler's job to read the input buffer from user memory 1290 * and process it. The handler should return 0 on success. 1291 * 1292 * This routine returns %NULL on a failure to register, and a pointer 1293 * to the table header on success. 1294 */ 1295 struct ctl_table_header *__register_sysctl_table( 1296 struct ctl_table_set *set, 1297 const char *path, struct ctl_table *table) 1298 { 1299 struct ctl_table_root *root = set->dir.header.root; 1300 struct ctl_table_header *header; 1301 const char *name, *nextname; 1302 struct ctl_dir *dir; 1303 struct ctl_table *entry; 1304 struct ctl_node *node; 1305 int nr_entries = 0; 1306 1307 for (entry = table; entry->procname; entry++) 1308 nr_entries++; 1309 1310 header = kzalloc(sizeof(struct ctl_table_header) + 1311 sizeof(struct ctl_node)*nr_entries, GFP_KERNEL); 1312 if (!header) 1313 return NULL; 1314 1315 node = (struct ctl_node *)(header + 1); 1316 init_header(header, root, set, node, table); 1317 if (sysctl_check_table(path, table)) 1318 goto fail; 1319 1320 spin_lock(&sysctl_lock); 1321 dir = &set->dir; 1322 /* Reference moved down the diretory tree get_subdir */ 1323 dir->header.nreg++; 1324 spin_unlock(&sysctl_lock); 1325 1326 /* Find the directory for the ctl_table */ 1327 for (name = path; name; name = nextname) { 1328 int namelen; 1329 nextname = strchr(name, '/'); 1330 if (nextname) { 1331 namelen = nextname - name; 1332 nextname++; 1333 } else { 1334 namelen = strlen(name); 1335 } 1336 if (namelen == 0) 1337 continue; 1338 1339 dir = get_subdir(dir, name, namelen); 1340 if (IS_ERR(dir)) 1341 goto fail; 1342 } 1343 1344 spin_lock(&sysctl_lock); 1345 if (insert_header(dir, header)) 1346 goto fail_put_dir_locked; 1347 1348 drop_sysctl_table(&dir->header); 1349 spin_unlock(&sysctl_lock); 1350 1351 return header; 1352 1353 fail_put_dir_locked: 1354 drop_sysctl_table(&dir->header); 1355 spin_unlock(&sysctl_lock); 1356 fail: 1357 kfree(header); 1358 dump_stack(); 1359 return NULL; 1360 } 1361 1362 /** 1363 * register_sysctl - register a sysctl table 1364 * @path: The path to the directory the sysctl table is in. 1365 * @table: the table structure 1366 * 1367 * Register a sysctl table. @table should be a filled in ctl_table 1368 * array. A completely 0 filled entry terminates the table. 1369 * 1370 * See __register_sysctl_table for more details. 1371 */ 1372 struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table) 1373 { 1374 return __register_sysctl_table(&sysctl_table_root.default_set, 1375 path, table); 1376 } 1377 EXPORT_SYMBOL(register_sysctl); 1378 1379 static char *append_path(const char *path, char *pos, const char *name) 1380 { 1381 int namelen; 1382 namelen = strlen(name); 1383 if (((pos - path) + namelen + 2) >= PATH_MAX) 1384 return NULL; 1385 memcpy(pos, name, namelen); 1386 pos[namelen] = '/'; 1387 pos[namelen + 1] = '\0'; 1388 pos += namelen + 1; 1389 return pos; 1390 } 1391 1392 static int count_subheaders(struct ctl_table *table) 1393 { 1394 int has_files = 0; 1395 int nr_subheaders = 0; 1396 struct ctl_table *entry; 1397 1398 /* special case: no directory and empty directory */ 1399 if (!table || !table->procname) 1400 return 1; 1401 1402 for (entry = table; entry->procname; entry++) { 1403 if (entry->child) 1404 nr_subheaders += count_subheaders(entry->child); 1405 else 1406 has_files = 1; 1407 } 1408 return nr_subheaders + has_files; 1409 } 1410 1411 static int register_leaf_sysctl_tables(const char *path, char *pos, 1412 struct ctl_table_header ***subheader, struct ctl_table_set *set, 1413 struct ctl_table *table) 1414 { 1415 struct ctl_table *ctl_table_arg = NULL; 1416 struct ctl_table *entry, *files; 1417 int nr_files = 0; 1418 int nr_dirs = 0; 1419 int err = -ENOMEM; 1420 1421 for (entry = table; entry->procname; entry++) { 1422 if (entry->child) 1423 nr_dirs++; 1424 else 1425 nr_files++; 1426 } 1427 1428 files = table; 1429 /* If there are mixed files and directories we need a new table */ 1430 if (nr_dirs && nr_files) { 1431 struct ctl_table *new; 1432 files = kcalloc(nr_files + 1, sizeof(struct ctl_table), 1433 GFP_KERNEL); 1434 if (!files) 1435 goto out; 1436 1437 ctl_table_arg = files; 1438 for (new = files, entry = table; entry->procname; entry++) { 1439 if (entry->child) 1440 continue; 1441 *new = *entry; 1442 new++; 1443 } 1444 } 1445 1446 /* Register everything except a directory full of subdirectories */ 1447 if (nr_files || !nr_dirs) { 1448 struct ctl_table_header *header; 1449 header = __register_sysctl_table(set, path, files); 1450 if (!header) { 1451 kfree(ctl_table_arg); 1452 goto out; 1453 } 1454 1455 /* Remember if we need to free the file table */ 1456 header->ctl_table_arg = ctl_table_arg; 1457 **subheader = header; 1458 (*subheader)++; 1459 } 1460 1461 /* Recurse into the subdirectories. */ 1462 for (entry = table; entry->procname; entry++) { 1463 char *child_pos; 1464 1465 if (!entry->child) 1466 continue; 1467 1468 err = -ENAMETOOLONG; 1469 child_pos = append_path(path, pos, entry->procname); 1470 if (!child_pos) 1471 goto out; 1472 1473 err = register_leaf_sysctl_tables(path, child_pos, subheader, 1474 set, entry->child); 1475 pos[0] = '\0'; 1476 if (err) 1477 goto out; 1478 } 1479 err = 0; 1480 out: 1481 /* On failure our caller will unregister all registered subheaders */ 1482 return err; 1483 } 1484 1485 /** 1486 * __register_sysctl_paths - register a sysctl table hierarchy 1487 * @set: Sysctl tree to register on 1488 * @path: The path to the directory the sysctl table is in. 1489 * @table: the top-level table structure 1490 * 1491 * Register a sysctl table hierarchy. @table should be a filled in ctl_table 1492 * array. A completely 0 filled entry terminates the table. 1493 * 1494 * See __register_sysctl_table for more details. 1495 */ 1496 struct ctl_table_header *__register_sysctl_paths( 1497 struct ctl_table_set *set, 1498 const struct ctl_path *path, struct ctl_table *table) 1499 { 1500 struct ctl_table *ctl_table_arg = table; 1501 int nr_subheaders = count_subheaders(table); 1502 struct ctl_table_header *header = NULL, **subheaders, **subheader; 1503 const struct ctl_path *component; 1504 char *new_path, *pos; 1505 1506 pos = new_path = kmalloc(PATH_MAX, GFP_KERNEL); 1507 if (!new_path) 1508 return NULL; 1509 1510 pos[0] = '\0'; 1511 for (component = path; component->procname; component++) { 1512 pos = append_path(new_path, pos, component->procname); 1513 if (!pos) 1514 goto out; 1515 } 1516 while (table->procname && table->child && !table[1].procname) { 1517 pos = append_path(new_path, pos, table->procname); 1518 if (!pos) 1519 goto out; 1520 table = table->child; 1521 } 1522 if (nr_subheaders == 1) { 1523 header = __register_sysctl_table(set, new_path, table); 1524 if (header) 1525 header->ctl_table_arg = ctl_table_arg; 1526 } else { 1527 header = kzalloc(sizeof(*header) + 1528 sizeof(*subheaders)*nr_subheaders, GFP_KERNEL); 1529 if (!header) 1530 goto out; 1531 1532 subheaders = (struct ctl_table_header **) (header + 1); 1533 subheader = subheaders; 1534 header->ctl_table_arg = ctl_table_arg; 1535 1536 if (register_leaf_sysctl_tables(new_path, pos, &subheader, 1537 set, table)) 1538 goto err_register_leaves; 1539 } 1540 1541 out: 1542 kfree(new_path); 1543 return header; 1544 1545 err_register_leaves: 1546 while (subheader > subheaders) { 1547 struct ctl_table_header *subh = *(--subheader); 1548 struct ctl_table *table = subh->ctl_table_arg; 1549 unregister_sysctl_table(subh); 1550 kfree(table); 1551 } 1552 kfree(header); 1553 header = NULL; 1554 goto out; 1555 } 1556 1557 /** 1558 * register_sysctl_table_path - register a sysctl table hierarchy 1559 * @path: The path to the directory the sysctl table is in. 1560 * @table: the top-level table structure 1561 * 1562 * Register a sysctl table hierarchy. @table should be a filled in ctl_table 1563 * array. A completely 0 filled entry terminates the table. 1564 * 1565 * See __register_sysctl_paths for more details. 1566 */ 1567 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, 1568 struct ctl_table *table) 1569 { 1570 return __register_sysctl_paths(&sysctl_table_root.default_set, 1571 path, table); 1572 } 1573 EXPORT_SYMBOL(register_sysctl_paths); 1574 1575 /** 1576 * register_sysctl_table - register a sysctl table hierarchy 1577 * @table: the top-level table structure 1578 * 1579 * Register a sysctl table hierarchy. @table should be a filled in ctl_table 1580 * array. A completely 0 filled entry terminates the table. 1581 * 1582 * See register_sysctl_paths for more details. 1583 */ 1584 struct ctl_table_header *register_sysctl_table(struct ctl_table *table) 1585 { 1586 static const struct ctl_path null_path[] = { {} }; 1587 1588 return register_sysctl_paths(null_path, table); 1589 } 1590 EXPORT_SYMBOL(register_sysctl_table); 1591 1592 static void put_links(struct ctl_table_header *header) 1593 { 1594 struct ctl_table_set *root_set = &sysctl_table_root.default_set; 1595 struct ctl_table_root *root = header->root; 1596 struct ctl_dir *parent = header->parent; 1597 struct ctl_dir *core_parent; 1598 struct ctl_table *entry; 1599 1600 if (header->set == root_set) 1601 return; 1602 1603 core_parent = xlate_dir(root_set, parent); 1604 if (IS_ERR(core_parent)) 1605 return; 1606 1607 for (entry = header->ctl_table; entry->procname; entry++) { 1608 struct ctl_table_header *link_head; 1609 struct ctl_table *link; 1610 const char *name = entry->procname; 1611 1612 link = find_entry(&link_head, core_parent, name, strlen(name)); 1613 if (link && 1614 ((S_ISDIR(link->mode) && S_ISDIR(entry->mode)) || 1615 (S_ISLNK(link->mode) && (link->data == root)))) { 1616 drop_sysctl_table(link_head); 1617 } 1618 else { 1619 pr_err("sysctl link missing during unregister: "); 1620 sysctl_print_dir(parent); 1621 pr_cont("/%s\n", name); 1622 } 1623 } 1624 } 1625 1626 static void drop_sysctl_table(struct ctl_table_header *header) 1627 { 1628 struct ctl_dir *parent = header->parent; 1629 1630 if (--header->nreg) 1631 return; 1632 1633 if (parent) { 1634 put_links(header); 1635 start_unregistering(header); 1636 } 1637 1638 if (!--header->count) 1639 kfree_rcu(header, rcu); 1640 1641 if (parent) 1642 drop_sysctl_table(&parent->header); 1643 } 1644 1645 /** 1646 * unregister_sysctl_table - unregister a sysctl table hierarchy 1647 * @header: the header returned from register_sysctl_table 1648 * 1649 * Unregisters the sysctl table and all children. proc entries may not 1650 * actually be removed until they are no longer used by anyone. 1651 */ 1652 void unregister_sysctl_table(struct ctl_table_header * header) 1653 { 1654 int nr_subheaders; 1655 might_sleep(); 1656 1657 if (header == NULL) 1658 return; 1659 1660 nr_subheaders = count_subheaders(header->ctl_table_arg); 1661 if (unlikely(nr_subheaders > 1)) { 1662 struct ctl_table_header **subheaders; 1663 int i; 1664 1665 subheaders = (struct ctl_table_header **)(header + 1); 1666 for (i = nr_subheaders -1; i >= 0; i--) { 1667 struct ctl_table_header *subh = subheaders[i]; 1668 struct ctl_table *table = subh->ctl_table_arg; 1669 unregister_sysctl_table(subh); 1670 kfree(table); 1671 } 1672 kfree(header); 1673 return; 1674 } 1675 1676 spin_lock(&sysctl_lock); 1677 drop_sysctl_table(header); 1678 spin_unlock(&sysctl_lock); 1679 } 1680 EXPORT_SYMBOL(unregister_sysctl_table); 1681 1682 void setup_sysctl_set(struct ctl_table_set *set, 1683 struct ctl_table_root *root, 1684 int (*is_seen)(struct ctl_table_set *)) 1685 { 1686 memset(set, 0, sizeof(*set)); 1687 set->is_seen = is_seen; 1688 init_header(&set->dir.header, root, set, NULL, root_table); 1689 } 1690 1691 void retire_sysctl_set(struct ctl_table_set *set) 1692 { 1693 WARN_ON(!RB_EMPTY_ROOT(&set->dir.root)); 1694 } 1695 1696 int __init proc_sys_init(void) 1697 { 1698 struct proc_dir_entry *proc_sys_root; 1699 1700 proc_sys_root = proc_mkdir("sys", NULL); 1701 proc_sys_root->proc_iops = &proc_sys_dir_operations; 1702 proc_sys_root->proc_dir_ops = &proc_sys_dir_file_operations; 1703 proc_sys_root->nlink = 0; 1704 1705 return sysctl_init(); 1706 } 1707 1708 struct sysctl_alias { 1709 const char *kernel_param; 1710 const char *sysctl_param; 1711 }; 1712 1713 /* 1714 * Historically some settings had both sysctl and a command line parameter. 1715 * With the generic sysctl. parameter support, we can handle them at a single 1716 * place and only keep the historical name for compatibility. This is not meant 1717 * to add brand new aliases. When adding existing aliases, consider whether 1718 * the possibly different moment of changing the value (e.g. from early_param 1719 * to the moment do_sysctl_args() is called) is an issue for the specific 1720 * parameter. 1721 */ 1722 static const struct sysctl_alias sysctl_aliases[] = { 1723 {"hardlockup_all_cpu_backtrace", "kernel.hardlockup_all_cpu_backtrace" }, 1724 {"hung_task_panic", "kernel.hung_task_panic" }, 1725 {"numa_zonelist_order", "vm.numa_zonelist_order" }, 1726 {"softlockup_all_cpu_backtrace", "kernel.softlockup_all_cpu_backtrace" }, 1727 {"softlockup_panic", "kernel.softlockup_panic" }, 1728 { } 1729 }; 1730 1731 static const char *sysctl_find_alias(char *param) 1732 { 1733 const struct sysctl_alias *alias; 1734 1735 for (alias = &sysctl_aliases[0]; alias->kernel_param != NULL; alias++) { 1736 if (strcmp(alias->kernel_param, param) == 0) 1737 return alias->sysctl_param; 1738 } 1739 1740 return NULL; 1741 } 1742 1743 /* Set sysctl value passed on kernel command line. */ 1744 static int process_sysctl_arg(char *param, char *val, 1745 const char *unused, void *arg) 1746 { 1747 char *path; 1748 struct vfsmount **proc_mnt = arg; 1749 struct file_system_type *proc_fs_type; 1750 struct file *file; 1751 int len; 1752 int err; 1753 loff_t pos = 0; 1754 ssize_t wret; 1755 1756 if (strncmp(param, "sysctl", sizeof("sysctl") - 1) == 0) { 1757 param += sizeof("sysctl") - 1; 1758 1759 if (param[0] != '/' && param[0] != '.') 1760 return 0; 1761 1762 param++; 1763 } else { 1764 param = (char *) sysctl_find_alias(param); 1765 if (!param) 1766 return 0; 1767 } 1768 1769 /* 1770 * To set sysctl options, we use a temporary mount of proc, look up the 1771 * respective sys/ file and write to it. To avoid mounting it when no 1772 * options were given, we mount it only when the first sysctl option is 1773 * found. Why not a persistent mount? There are problems with a 1774 * persistent mount of proc in that it forces userspace not to use any 1775 * proc mount options. 1776 */ 1777 if (!*proc_mnt) { 1778 proc_fs_type = get_fs_type("proc"); 1779 if (!proc_fs_type) { 1780 pr_err("Failed to find procfs to set sysctl from command line\n"); 1781 return 0; 1782 } 1783 *proc_mnt = kern_mount(proc_fs_type); 1784 put_filesystem(proc_fs_type); 1785 if (IS_ERR(*proc_mnt)) { 1786 pr_err("Failed to mount procfs to set sysctl from command line\n"); 1787 return 0; 1788 } 1789 } 1790 1791 path = kasprintf(GFP_KERNEL, "sys/%s", param); 1792 if (!path) 1793 panic("%s: Failed to allocate path for %s\n", __func__, param); 1794 strreplace(path, '.', '/'); 1795 1796 file = file_open_root((*proc_mnt)->mnt_root, *proc_mnt, path, O_WRONLY, 0); 1797 if (IS_ERR(file)) { 1798 err = PTR_ERR(file); 1799 if (err == -ENOENT) 1800 pr_err("Failed to set sysctl parameter '%s=%s': parameter not found\n", 1801 param, val); 1802 else if (err == -EACCES) 1803 pr_err("Failed to set sysctl parameter '%s=%s': permission denied (read-only?)\n", 1804 param, val); 1805 else 1806 pr_err("Error %pe opening proc file to set sysctl parameter '%s=%s'\n", 1807 file, param, val); 1808 goto out; 1809 } 1810 len = strlen(val); 1811 wret = kernel_write(file, val, len, &pos); 1812 if (wret < 0) { 1813 err = wret; 1814 if (err == -EINVAL) 1815 pr_err("Failed to set sysctl parameter '%s=%s': invalid value\n", 1816 param, val); 1817 else 1818 pr_err("Error %pe writing to proc file to set sysctl parameter '%s=%s'\n", 1819 ERR_PTR(err), param, val); 1820 } else if (wret != len) { 1821 pr_err("Wrote only %zd bytes of %d writing to proc file %s to set sysctl parameter '%s=%s\n", 1822 wret, len, path, param, val); 1823 } 1824 1825 err = filp_close(file, NULL); 1826 if (err) 1827 pr_err("Error %pe closing proc file to set sysctl parameter '%s=%s\n", 1828 ERR_PTR(err), param, val); 1829 out: 1830 kfree(path); 1831 return 0; 1832 } 1833 1834 void do_sysctl_args(void) 1835 { 1836 char *command_line; 1837 struct vfsmount *proc_mnt = NULL; 1838 1839 command_line = kstrdup(saved_command_line, GFP_KERNEL); 1840 if (!command_line) 1841 panic("%s: Failed to allocate copy of command line\n", __func__); 1842 1843 parse_args("Setting sysctl args", command_line, 1844 NULL, 0, -1, -1, &proc_mnt, process_sysctl_arg); 1845 1846 if (proc_mnt) 1847 kern_unmount(proc_mnt); 1848 1849 kfree(command_line); 1850 } 1851