1 /* 2 * /proc/sys support 3 */ 4 #include <linux/init.h> 5 #include <linux/sysctl.h> 6 #include <linux/poll.h> 7 #include <linux/proc_fs.h> 8 #include <linux/security.h> 9 #include <linux/namei.h> 10 #include <linux/module.h> 11 #include "internal.h" 12 13 static const struct dentry_operations proc_sys_dentry_operations; 14 static const struct file_operations proc_sys_file_operations; 15 static const struct inode_operations proc_sys_inode_operations; 16 static const struct file_operations proc_sys_dir_file_operations; 17 static const struct inode_operations proc_sys_dir_operations; 18 19 void proc_sys_poll_notify(struct ctl_table_poll *poll) 20 { 21 if (!poll) 22 return; 23 24 atomic_inc(&poll->event); 25 wake_up_interruptible(&poll->wait); 26 } 27 28 static struct ctl_table root_table[] = { 29 { 30 .procname = "", 31 .mode = S_IRUGO|S_IXUGO, 32 .child = &root_table[1], 33 }, 34 { } 35 }; 36 static struct ctl_table_root sysctl_table_root; 37 static struct ctl_table_header root_table_header = { 38 {{.count = 1, 39 .nreg = 1, 40 .ctl_table = root_table, 41 .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, 42 .root = &sysctl_table_root, 43 .set = &sysctl_table_root.default_set, 44 }; 45 static struct ctl_table_root sysctl_table_root = { 46 .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list), 47 .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry), 48 .default_set.root = &sysctl_table_root, 49 }; 50 51 static DEFINE_SPINLOCK(sysctl_lock); 52 53 static int namecmp(const char *name1, int len1, const char *name2, int len2) 54 { 55 int minlen; 56 int cmp; 57 58 minlen = len1; 59 if (minlen > len2) 60 minlen = len2; 61 62 cmp = memcmp(name1, name2, minlen); 63 if (cmp == 0) 64 cmp = len1 - len2; 65 return cmp; 66 } 67 68 static struct ctl_table *find_entry(struct ctl_table_header **phead, 69 struct ctl_table_set *set, 70 struct ctl_table_header *dir_head, struct ctl_table *dir, 71 const char *name, int namelen) 72 { 73 struct ctl_table_header *head; 74 struct ctl_table *entry; 75 76 if (dir_head->set == set) { 77 for (entry = dir; entry->procname; entry++) { 78 const char *procname = entry->procname; 79 if (namecmp(procname, strlen(procname), name, namelen) == 0) { 80 *phead = dir_head; 81 return entry; 82 } 83 } 84 } 85 86 list_for_each_entry(head, &set->list, ctl_entry) { 87 if (head->unregistering) 88 continue; 89 if (head->attached_to != dir) 90 continue; 91 for (entry = head->attached_by; entry->procname; entry++) { 92 const char *procname = entry->procname; 93 if (namecmp(procname, strlen(procname), name, namelen) == 0) { 94 *phead = head; 95 return entry; 96 } 97 } 98 } 99 return NULL; 100 } 101 102 static void init_header(struct ctl_table_header *head, 103 struct ctl_table_root *root, struct ctl_table_set *set, 104 struct ctl_table *table) 105 { 106 head->ctl_table_arg = table; 107 INIT_LIST_HEAD(&head->ctl_entry); 108 head->used = 0; 109 head->count = 1; 110 head->nreg = 1; 111 head->unregistering = NULL; 112 head->root = root; 113 head->set = set; 114 head->parent = NULL; 115 } 116 117 static void erase_header(struct ctl_table_header *head) 118 { 119 list_del_init(&head->ctl_entry); 120 } 121 122 static void insert_header(struct ctl_table_header *header) 123 { 124 header->parent->count++; 125 list_add_tail(&header->ctl_entry, &header->set->list); 126 } 127 128 /* called under sysctl_lock */ 129 static int use_table(struct ctl_table_header *p) 130 { 131 if (unlikely(p->unregistering)) 132 return 0; 133 p->used++; 134 return 1; 135 } 136 137 /* called under sysctl_lock */ 138 static void unuse_table(struct ctl_table_header *p) 139 { 140 if (!--p->used) 141 if (unlikely(p->unregistering)) 142 complete(p->unregistering); 143 } 144 145 /* called under sysctl_lock, will reacquire if has to wait */ 146 static void start_unregistering(struct ctl_table_header *p) 147 { 148 /* 149 * if p->used is 0, nobody will ever touch that entry again; 150 * we'll eliminate all paths to it before dropping sysctl_lock 151 */ 152 if (unlikely(p->used)) { 153 struct completion wait; 154 init_completion(&wait); 155 p->unregistering = &wait; 156 spin_unlock(&sysctl_lock); 157 wait_for_completion(&wait); 158 spin_lock(&sysctl_lock); 159 } else { 160 /* anything non-NULL; we'll never dereference it */ 161 p->unregistering = ERR_PTR(-EINVAL); 162 } 163 /* 164 * do not remove from the list until nobody holds it; walking the 165 * list in do_sysctl() relies on that. 166 */ 167 erase_header(p); 168 } 169 170 static void sysctl_head_get(struct ctl_table_header *head) 171 { 172 spin_lock(&sysctl_lock); 173 head->count++; 174 spin_unlock(&sysctl_lock); 175 } 176 177 void sysctl_head_put(struct ctl_table_header *head) 178 { 179 spin_lock(&sysctl_lock); 180 if (!--head->count) 181 kfree_rcu(head, rcu); 182 spin_unlock(&sysctl_lock); 183 } 184 185 static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) 186 { 187 if (!head) 188 BUG(); 189 spin_lock(&sysctl_lock); 190 if (!use_table(head)) 191 head = ERR_PTR(-ENOENT); 192 spin_unlock(&sysctl_lock); 193 return head; 194 } 195 196 static void sysctl_head_finish(struct ctl_table_header *head) 197 { 198 if (!head) 199 return; 200 spin_lock(&sysctl_lock); 201 unuse_table(head); 202 spin_unlock(&sysctl_lock); 203 } 204 205 static struct ctl_table_set * 206 lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces) 207 { 208 struct ctl_table_set *set = &root->default_set; 209 if (root->lookup) 210 set = root->lookup(root, namespaces); 211 return set; 212 } 213 214 static struct list_head * 215 lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces) 216 { 217 struct ctl_table_set *set = lookup_header_set(root, namespaces); 218 return &set->list; 219 } 220 221 static struct ctl_table *lookup_entry(struct ctl_table_header **phead, 222 struct ctl_table_header *dir_head, 223 struct ctl_table *dir, 224 const char *name, int namelen) 225 { 226 struct ctl_table_header *head; 227 struct ctl_table *entry; 228 struct ctl_table_root *root; 229 struct ctl_table_set *set; 230 231 spin_lock(&sysctl_lock); 232 root = &sysctl_table_root; 233 do { 234 set = lookup_header_set(root, current->nsproxy); 235 entry = find_entry(&head, set, dir_head, dir, name, namelen); 236 if (entry && use_table(head)) 237 *phead = head; 238 else 239 entry = NULL; 240 root = list_entry(root->root_list.next, 241 struct ctl_table_root, root_list); 242 } while (!entry && root != &sysctl_table_root); 243 spin_unlock(&sysctl_lock); 244 return entry; 245 } 246 247 static struct ctl_table_header *next_usable_entry(struct ctl_table *dir, 248 struct ctl_table_root *root, struct list_head *tmp) 249 { 250 struct nsproxy *namespaces = current->nsproxy; 251 struct list_head *header_list; 252 struct ctl_table_header *head; 253 254 goto next; 255 for (;;) { 256 head = list_entry(tmp, struct ctl_table_header, ctl_entry); 257 root = head->root; 258 259 if (head->attached_to != dir || 260 !head->attached_by->procname || 261 !use_table(head)) 262 goto next; 263 264 return head; 265 next: 266 tmp = tmp->next; 267 header_list = lookup_header_list(root, namespaces); 268 if (tmp != header_list) 269 continue; 270 271 do { 272 root = list_entry(root->root_list.next, 273 struct ctl_table_root, root_list); 274 if (root == &sysctl_table_root) 275 goto out; 276 header_list = lookup_header_list(root, namespaces); 277 } while (list_empty(header_list)); 278 tmp = header_list->next; 279 } 280 out: 281 return NULL; 282 } 283 284 static void first_entry( 285 struct ctl_table_header *dir_head, struct ctl_table *dir, 286 struct ctl_table_header **phead, struct ctl_table **pentry) 287 { 288 struct ctl_table_header *head = dir_head; 289 struct ctl_table *entry = dir; 290 291 spin_lock(&sysctl_lock); 292 if (entry->procname) { 293 use_table(head); 294 } else { 295 head = next_usable_entry(dir, &sysctl_table_root, 296 &sysctl_table_root.default_set.list); 297 if (head) 298 entry = head->attached_by; 299 } 300 spin_unlock(&sysctl_lock); 301 *phead = head; 302 *pentry = entry; 303 } 304 305 static void next_entry(struct ctl_table *dir, 306 struct ctl_table_header **phead, struct ctl_table **pentry) 307 { 308 struct ctl_table_header *head = *phead; 309 struct ctl_table *entry = *pentry; 310 311 entry++; 312 if (!entry->procname) { 313 struct ctl_table_root *root = head->root; 314 struct list_head *tmp = &head->ctl_entry; 315 if (head->attached_to != dir) { 316 root = &sysctl_table_root; 317 tmp = &sysctl_table_root.default_set.list; 318 } 319 spin_lock(&sysctl_lock); 320 unuse_table(head); 321 head = next_usable_entry(dir, root, tmp); 322 spin_unlock(&sysctl_lock); 323 if (head) 324 entry = head->attached_by; 325 } 326 *phead = head; 327 *pentry = entry; 328 } 329 330 void register_sysctl_root(struct ctl_table_root *root) 331 { 332 spin_lock(&sysctl_lock); 333 list_add_tail(&root->root_list, &sysctl_table_root.root_list); 334 spin_unlock(&sysctl_lock); 335 } 336 337 /* 338 * sysctl_perm does NOT grant the superuser all rights automatically, because 339 * some sysctl variables are readonly even to root. 340 */ 341 342 static int test_perm(int mode, int op) 343 { 344 if (!current_euid()) 345 mode >>= 6; 346 else if (in_egroup_p(0)) 347 mode >>= 3; 348 if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0) 349 return 0; 350 return -EACCES; 351 } 352 353 static int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) 354 { 355 int mode; 356 357 if (root->permissions) 358 mode = root->permissions(root, current->nsproxy, table); 359 else 360 mode = table->mode; 361 362 return test_perm(mode, op); 363 } 364 365 static struct inode *proc_sys_make_inode(struct super_block *sb, 366 struct ctl_table_header *head, struct ctl_table *table) 367 { 368 struct inode *inode; 369 struct proc_inode *ei; 370 371 inode = new_inode(sb); 372 if (!inode) 373 goto out; 374 375 inode->i_ino = get_next_ino(); 376 377 sysctl_head_get(head); 378 ei = PROC_I(inode); 379 ei->sysctl = head; 380 ei->sysctl_entry = table; 381 382 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 383 inode->i_mode = table->mode; 384 if (!table->child) { 385 inode->i_mode |= S_IFREG; 386 inode->i_op = &proc_sys_inode_operations; 387 inode->i_fop = &proc_sys_file_operations; 388 } else { 389 inode->i_mode |= S_IFDIR; 390 inode->i_op = &proc_sys_dir_operations; 391 inode->i_fop = &proc_sys_dir_file_operations; 392 } 393 out: 394 return inode; 395 } 396 397 static struct ctl_table_header *grab_header(struct inode *inode) 398 { 399 struct ctl_table_header *head = PROC_I(inode)->sysctl; 400 if (!head) 401 head = &root_table_header; 402 return sysctl_head_grab(head); 403 } 404 405 static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, 406 struct nameidata *nd) 407 { 408 struct ctl_table_header *head = grab_header(dir); 409 struct ctl_table *table = PROC_I(dir)->sysctl_entry; 410 struct ctl_table_header *h = NULL; 411 struct qstr *name = &dentry->d_name; 412 struct ctl_table *p; 413 struct inode *inode; 414 struct dentry *err = ERR_PTR(-ENOENT); 415 416 if (IS_ERR(head)) 417 return ERR_CAST(head); 418 419 if (table && !table->child) { 420 WARN_ON(1); 421 goto out; 422 } 423 424 table = table ? table->child : &head->ctl_table[1]; 425 426 p = lookup_entry(&h, head, table, name->name, name->len); 427 if (!p) 428 goto out; 429 430 err = ERR_PTR(-ENOMEM); 431 inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); 432 if (h) 433 sysctl_head_finish(h); 434 435 if (!inode) 436 goto out; 437 438 err = NULL; 439 d_set_d_op(dentry, &proc_sys_dentry_operations); 440 d_add(dentry, inode); 441 442 out: 443 sysctl_head_finish(head); 444 return err; 445 } 446 447 static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, 448 size_t count, loff_t *ppos, int write) 449 { 450 struct inode *inode = filp->f_path.dentry->d_inode; 451 struct ctl_table_header *head = grab_header(inode); 452 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 453 ssize_t error; 454 size_t res; 455 456 if (IS_ERR(head)) 457 return PTR_ERR(head); 458 459 /* 460 * At this point we know that the sysctl was not unregistered 461 * and won't be until we finish. 462 */ 463 error = -EPERM; 464 if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ)) 465 goto out; 466 467 /* if that can happen at all, it should be -EINVAL, not -EISDIR */ 468 error = -EINVAL; 469 if (!table->proc_handler) 470 goto out; 471 472 /* careful: calling conventions are nasty here */ 473 res = count; 474 error = table->proc_handler(table, write, buf, &res, ppos); 475 if (!error) 476 error = res; 477 out: 478 sysctl_head_finish(head); 479 480 return error; 481 } 482 483 static ssize_t proc_sys_read(struct file *filp, char __user *buf, 484 size_t count, loff_t *ppos) 485 { 486 return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 0); 487 } 488 489 static ssize_t proc_sys_write(struct file *filp, const char __user *buf, 490 size_t count, loff_t *ppos) 491 { 492 return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1); 493 } 494 495 static int proc_sys_open(struct inode *inode, struct file *filp) 496 { 497 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 498 499 if (table->poll) 500 filp->private_data = proc_sys_poll_event(table->poll); 501 502 return 0; 503 } 504 505 static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) 506 { 507 struct inode *inode = filp->f_path.dentry->d_inode; 508 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 509 unsigned long event = (unsigned long)filp->private_data; 510 unsigned int ret = DEFAULT_POLLMASK; 511 512 if (!table->proc_handler) 513 goto out; 514 515 if (!table->poll) 516 goto out; 517 518 poll_wait(filp, &table->poll->wait, wait); 519 520 if (event != atomic_read(&table->poll->event)) { 521 filp->private_data = proc_sys_poll_event(table->poll); 522 ret = POLLIN | POLLRDNORM | POLLERR | POLLPRI; 523 } 524 525 out: 526 return ret; 527 } 528 529 static int proc_sys_fill_cache(struct file *filp, void *dirent, 530 filldir_t filldir, 531 struct ctl_table_header *head, 532 struct ctl_table *table) 533 { 534 struct dentry *child, *dir = filp->f_path.dentry; 535 struct inode *inode; 536 struct qstr qname; 537 ino_t ino = 0; 538 unsigned type = DT_UNKNOWN; 539 540 qname.name = table->procname; 541 qname.len = strlen(table->procname); 542 qname.hash = full_name_hash(qname.name, qname.len); 543 544 child = d_lookup(dir, &qname); 545 if (!child) { 546 child = d_alloc(dir, &qname); 547 if (child) { 548 inode = proc_sys_make_inode(dir->d_sb, head, table); 549 if (!inode) { 550 dput(child); 551 return -ENOMEM; 552 } else { 553 d_set_d_op(child, &proc_sys_dentry_operations); 554 d_add(child, inode); 555 } 556 } else { 557 return -ENOMEM; 558 } 559 } 560 inode = child->d_inode; 561 ino = inode->i_ino; 562 type = inode->i_mode >> 12; 563 dput(child); 564 return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); 565 } 566 567 static int scan(struct ctl_table_header *head, ctl_table *table, 568 unsigned long *pos, struct file *file, 569 void *dirent, filldir_t filldir) 570 { 571 int res; 572 573 if ((*pos)++ < file->f_pos) 574 return 0; 575 576 res = proc_sys_fill_cache(file, dirent, filldir, head, table); 577 578 if (res == 0) 579 file->f_pos = *pos; 580 581 return res; 582 } 583 584 static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) 585 { 586 struct dentry *dentry = filp->f_path.dentry; 587 struct inode *inode = dentry->d_inode; 588 struct ctl_table_header *head = grab_header(inode); 589 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 590 struct ctl_table_header *h = NULL; 591 struct ctl_table *entry; 592 unsigned long pos; 593 int ret = -EINVAL; 594 595 if (IS_ERR(head)) 596 return PTR_ERR(head); 597 598 if (table && !table->child) { 599 WARN_ON(1); 600 goto out; 601 } 602 603 table = table ? table->child : &head->ctl_table[1]; 604 605 ret = 0; 606 /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ 607 if (filp->f_pos == 0) { 608 if (filldir(dirent, ".", 1, filp->f_pos, 609 inode->i_ino, DT_DIR) < 0) 610 goto out; 611 filp->f_pos++; 612 } 613 if (filp->f_pos == 1) { 614 if (filldir(dirent, "..", 2, filp->f_pos, 615 parent_ino(dentry), DT_DIR) < 0) 616 goto out; 617 filp->f_pos++; 618 } 619 pos = 2; 620 621 for (first_entry(head, table, &h, &entry); h; next_entry(table, &h, &entry)) { 622 ret = scan(h, entry, &pos, filp, dirent, filldir); 623 if (ret) { 624 sysctl_head_finish(h); 625 break; 626 } 627 } 628 ret = 1; 629 out: 630 sysctl_head_finish(head); 631 return ret; 632 } 633 634 static int proc_sys_permission(struct inode *inode, int mask) 635 { 636 /* 637 * sysctl entries that are not writeable, 638 * are _NOT_ writeable, capabilities or not. 639 */ 640 struct ctl_table_header *head; 641 struct ctl_table *table; 642 int error; 643 644 /* Executable files are not allowed under /proc/sys/ */ 645 if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) 646 return -EACCES; 647 648 head = grab_header(inode); 649 if (IS_ERR(head)) 650 return PTR_ERR(head); 651 652 table = PROC_I(inode)->sysctl_entry; 653 if (!table) /* global root - r-xr-xr-x */ 654 error = mask & MAY_WRITE ? -EACCES : 0; 655 else /* Use the permissions on the sysctl table entry */ 656 error = sysctl_perm(head->root, table, mask & ~MAY_NOT_BLOCK); 657 658 sysctl_head_finish(head); 659 return error; 660 } 661 662 static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr) 663 { 664 struct inode *inode = dentry->d_inode; 665 int error; 666 667 if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)) 668 return -EPERM; 669 670 error = inode_change_ok(inode, attr); 671 if (error) 672 return error; 673 674 if ((attr->ia_valid & ATTR_SIZE) && 675 attr->ia_size != i_size_read(inode)) { 676 error = vmtruncate(inode, attr->ia_size); 677 if (error) 678 return error; 679 } 680 681 setattr_copy(inode, attr); 682 mark_inode_dirty(inode); 683 return 0; 684 } 685 686 static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 687 { 688 struct inode *inode = dentry->d_inode; 689 struct ctl_table_header *head = grab_header(inode); 690 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 691 692 if (IS_ERR(head)) 693 return PTR_ERR(head); 694 695 generic_fillattr(inode, stat); 696 if (table) 697 stat->mode = (stat->mode & S_IFMT) | table->mode; 698 699 sysctl_head_finish(head); 700 return 0; 701 } 702 703 static const struct file_operations proc_sys_file_operations = { 704 .open = proc_sys_open, 705 .poll = proc_sys_poll, 706 .read = proc_sys_read, 707 .write = proc_sys_write, 708 .llseek = default_llseek, 709 }; 710 711 static const struct file_operations proc_sys_dir_file_operations = { 712 .read = generic_read_dir, 713 .readdir = proc_sys_readdir, 714 .llseek = generic_file_llseek, 715 }; 716 717 static const struct inode_operations proc_sys_inode_operations = { 718 .permission = proc_sys_permission, 719 .setattr = proc_sys_setattr, 720 .getattr = proc_sys_getattr, 721 }; 722 723 static const struct inode_operations proc_sys_dir_operations = { 724 .lookup = proc_sys_lookup, 725 .permission = proc_sys_permission, 726 .setattr = proc_sys_setattr, 727 .getattr = proc_sys_getattr, 728 }; 729 730 static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd) 731 { 732 if (nd->flags & LOOKUP_RCU) 733 return -ECHILD; 734 return !PROC_I(dentry->d_inode)->sysctl->unregistering; 735 } 736 737 static int proc_sys_delete(const struct dentry *dentry) 738 { 739 return !!PROC_I(dentry->d_inode)->sysctl->unregistering; 740 } 741 742 static int sysctl_is_seen(struct ctl_table_header *p) 743 { 744 struct ctl_table_set *set = p->set; 745 int res; 746 spin_lock(&sysctl_lock); 747 if (p->unregistering) 748 res = 0; 749 else if (!set->is_seen) 750 res = 1; 751 else 752 res = set->is_seen(set); 753 spin_unlock(&sysctl_lock); 754 return res; 755 } 756 757 static int proc_sys_compare(const struct dentry *parent, 758 const struct inode *pinode, 759 const struct dentry *dentry, const struct inode *inode, 760 unsigned int len, const char *str, const struct qstr *name) 761 { 762 struct ctl_table_header *head; 763 /* Although proc doesn't have negative dentries, rcu-walk means 764 * that inode here can be NULL */ 765 /* AV: can it, indeed? */ 766 if (!inode) 767 return 1; 768 if (name->len != len) 769 return 1; 770 if (memcmp(name->name, str, len)) 771 return 1; 772 head = rcu_dereference(PROC_I(inode)->sysctl); 773 return !head || !sysctl_is_seen(head); 774 } 775 776 static const struct dentry_operations proc_sys_dentry_operations = { 777 .d_revalidate = proc_sys_revalidate, 778 .d_delete = proc_sys_delete, 779 .d_compare = proc_sys_compare, 780 }; 781 782 static struct ctl_table *is_branch_in(struct ctl_table *branch, 783 struct ctl_table *table) 784 { 785 struct ctl_table *p; 786 const char *s = branch->procname; 787 788 /* branch should have named subdirectory as its first element */ 789 if (!s || !branch->child) 790 return NULL; 791 792 /* ... and nothing else */ 793 if (branch[1].procname) 794 return NULL; 795 796 /* table should contain subdirectory with the same name */ 797 for (p = table; p->procname; p++) { 798 if (!p->child) 799 continue; 800 if (p->procname && strcmp(p->procname, s) == 0) 801 return p; 802 } 803 return NULL; 804 } 805 806 /* see if attaching q to p would be an improvement */ 807 static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) 808 { 809 struct ctl_table *to = p->ctl_table, *by = q->ctl_table; 810 struct ctl_table *next; 811 int is_better = 0; 812 int not_in_parent = !p->attached_by; 813 814 while ((next = is_branch_in(by, to)) != NULL) { 815 if (by == q->attached_by) 816 is_better = 1; 817 if (to == p->attached_by) 818 not_in_parent = 1; 819 by = by->child; 820 to = next->child; 821 } 822 823 if (is_better && not_in_parent) { 824 q->attached_by = by; 825 q->attached_to = to; 826 q->parent = p; 827 } 828 } 829 830 static int sysctl_check_table_dups(const char *path, struct ctl_table *old, 831 struct ctl_table *table) 832 { 833 struct ctl_table *entry, *test; 834 int error = 0; 835 836 for (entry = old; entry->procname; entry++) { 837 for (test = table; test->procname; test++) { 838 if (strcmp(entry->procname, test->procname) == 0) { 839 printk(KERN_ERR "sysctl duplicate entry: %s/%s\n", 840 path, test->procname); 841 error = -EEXIST; 842 } 843 } 844 } 845 return error; 846 } 847 848 static int sysctl_check_dups(struct nsproxy *namespaces, 849 struct ctl_table_header *header, 850 const char *path, struct ctl_table *table) 851 { 852 struct ctl_table_root *root; 853 struct ctl_table_set *set; 854 struct ctl_table_header *dir_head, *head; 855 struct ctl_table *dir_table; 856 int error = 0; 857 858 /* No dups if we are the only member of our directory */ 859 if (header->attached_by != table) 860 return 0; 861 862 dir_head = header->parent; 863 dir_table = header->attached_to; 864 865 error = sysctl_check_table_dups(path, dir_table, table); 866 867 root = &sysctl_table_root; 868 do { 869 set = lookup_header_set(root, namespaces); 870 871 list_for_each_entry(head, &set->list, ctl_entry) { 872 if (head->unregistering) 873 continue; 874 if (head->attached_to != dir_table) 875 continue; 876 error = sysctl_check_table_dups(path, head->attached_by, 877 table); 878 } 879 root = list_entry(root->root_list.next, 880 struct ctl_table_root, root_list); 881 } while (root != &sysctl_table_root); 882 return error; 883 } 884 885 static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...) 886 { 887 struct va_format vaf; 888 va_list args; 889 890 va_start(args, fmt); 891 vaf.fmt = fmt; 892 vaf.va = &args; 893 894 printk(KERN_ERR "sysctl table check failed: %s/%s %pV\n", 895 path, table->procname, &vaf); 896 897 va_end(args); 898 return -EINVAL; 899 } 900 901 static int sysctl_check_table(const char *path, struct ctl_table *table) 902 { 903 int err = 0; 904 for (; table->procname; table++) { 905 if (table->child) 906 err = sysctl_err(path, table, "Not a file"); 907 908 if ((table->proc_handler == proc_dostring) || 909 (table->proc_handler == proc_dointvec) || 910 (table->proc_handler == proc_dointvec_minmax) || 911 (table->proc_handler == proc_dointvec_jiffies) || 912 (table->proc_handler == proc_dointvec_userhz_jiffies) || 913 (table->proc_handler == proc_dointvec_ms_jiffies) || 914 (table->proc_handler == proc_doulongvec_minmax) || 915 (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) { 916 if (!table->data) 917 err = sysctl_err(path, table, "No data"); 918 if (!table->maxlen) 919 err = sysctl_err(path, table, "No maxlen"); 920 } 921 if (!table->proc_handler) 922 err = sysctl_err(path, table, "No proc_handler"); 923 924 if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode) 925 err = sysctl_err(path, table, "bogus .mode 0%o", 926 table->mode); 927 } 928 return err; 929 } 930 931 /** 932 * __register_sysctl_table - register a leaf sysctl table 933 * @root: List of sysctl headers to register on 934 * @namespaces: Data to compute which lists of sysctl entries are visible 935 * @path: The path to the directory the sysctl table is in. 936 * @table: the top-level table structure 937 * 938 * Register a sysctl table hierarchy. @table should be a filled in ctl_table 939 * array. A completely 0 filled entry terminates the table. 940 * 941 * The members of the &struct ctl_table structure are used as follows: 942 * 943 * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not 944 * enter a sysctl file 945 * 946 * data - a pointer to data for use by proc_handler 947 * 948 * maxlen - the maximum size in bytes of the data 949 * 950 * mode - the file permissions for the /proc/sys file 951 * 952 * child - must be %NULL. 953 * 954 * proc_handler - the text handler routine (described below) 955 * 956 * extra1, extra2 - extra pointers usable by the proc handler routines 957 * 958 * Leaf nodes in the sysctl tree will be represented by a single file 959 * under /proc; non-leaf nodes will be represented by directories. 960 * 961 * There must be a proc_handler routine for any terminal nodes. 962 * Several default handlers are available to cover common cases - 963 * 964 * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), 965 * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), 966 * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax() 967 * 968 * It is the handler's job to read the input buffer from user memory 969 * and process it. The handler should return 0 on success. 970 * 971 * This routine returns %NULL on a failure to register, and a pointer 972 * to the table header on success. 973 */ 974 struct ctl_table_header *__register_sysctl_table( 975 struct ctl_table_root *root, 976 struct nsproxy *namespaces, 977 const char *path, struct ctl_table *table) 978 { 979 struct ctl_table_header *header; 980 struct ctl_table *new, **prevp; 981 const char *name, *nextname; 982 unsigned int npath = 0; 983 struct ctl_table_set *set; 984 size_t path_bytes = 0; 985 char *new_name; 986 987 /* Count the path components */ 988 for (name = path; name; name = nextname) { 989 int namelen; 990 nextname = strchr(name, '/'); 991 if (nextname) { 992 namelen = nextname - name; 993 nextname++; 994 } else { 995 namelen = strlen(name); 996 } 997 if (namelen == 0) 998 continue; 999 path_bytes += namelen + 1; 1000 npath++; 1001 } 1002 1003 /* 1004 * For each path component, allocate a 2-element ctl_table array. 1005 * The first array element will be filled with the sysctl entry 1006 * for this, the second will be the sentinel (procname == 0). 1007 * 1008 * We allocate everything in one go so that we don't have to 1009 * worry about freeing additional memory in unregister_sysctl_table. 1010 */ 1011 header = kzalloc(sizeof(struct ctl_table_header) + path_bytes + 1012 (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL); 1013 if (!header) 1014 return NULL; 1015 1016 new = (struct ctl_table *) (header + 1); 1017 new_name = (char *)(new + (2 * npath)); 1018 1019 /* Now connect the dots */ 1020 prevp = &header->ctl_table; 1021 for (name = path; name; name = nextname) { 1022 int namelen; 1023 nextname = strchr(name, '/'); 1024 if (nextname) { 1025 namelen = nextname - name; 1026 nextname++; 1027 } else { 1028 namelen = strlen(name); 1029 } 1030 if (namelen == 0) 1031 continue; 1032 memcpy(new_name, name, namelen); 1033 new_name[namelen] = '\0'; 1034 1035 new->procname = new_name; 1036 new->mode = 0555; 1037 1038 *prevp = new; 1039 prevp = &new->child; 1040 1041 new += 2; 1042 new_name += namelen + 1; 1043 } 1044 *prevp = table; 1045 1046 init_header(header, root, NULL, table); 1047 if (sysctl_check_table(path, table)) 1048 goto fail; 1049 1050 spin_lock(&sysctl_lock); 1051 header->set = lookup_header_set(root, namespaces); 1052 header->attached_by = header->ctl_table; 1053 header->attached_to = &root_table[1]; 1054 header->parent = &root_table_header; 1055 set = header->set; 1056 root = header->root; 1057 for (;;) { 1058 struct ctl_table_header *p; 1059 list_for_each_entry(p, &set->list, ctl_entry) { 1060 if (p->unregistering) 1061 continue; 1062 try_attach(p, header); 1063 } 1064 if (root == &sysctl_table_root) 1065 break; 1066 root = list_entry(root->root_list.prev, 1067 struct ctl_table_root, root_list); 1068 set = lookup_header_set(root, namespaces); 1069 } 1070 if (sysctl_check_dups(namespaces, header, path, table)) 1071 goto fail_locked; 1072 insert_header(header); 1073 spin_unlock(&sysctl_lock); 1074 1075 return header; 1076 fail_locked: 1077 spin_unlock(&sysctl_lock); 1078 fail: 1079 kfree(header); 1080 dump_stack(); 1081 return NULL; 1082 } 1083 1084 static char *append_path(const char *path, char *pos, const char *name) 1085 { 1086 int namelen; 1087 namelen = strlen(name); 1088 if (((pos - path) + namelen + 2) >= PATH_MAX) 1089 return NULL; 1090 memcpy(pos, name, namelen); 1091 pos[namelen] = '/'; 1092 pos[namelen + 1] = '\0'; 1093 pos += namelen + 1; 1094 return pos; 1095 } 1096 1097 static int count_subheaders(struct ctl_table *table) 1098 { 1099 int has_files = 0; 1100 int nr_subheaders = 0; 1101 struct ctl_table *entry; 1102 1103 /* special case: no directory and empty directory */ 1104 if (!table || !table->procname) 1105 return 1; 1106 1107 for (entry = table; entry->procname; entry++) { 1108 if (entry->child) 1109 nr_subheaders += count_subheaders(entry->child); 1110 else 1111 has_files = 1; 1112 } 1113 return nr_subheaders + has_files; 1114 } 1115 1116 static int register_leaf_sysctl_tables(const char *path, char *pos, 1117 struct ctl_table_header ***subheader, 1118 struct ctl_table_root *root, struct nsproxy *namespaces, 1119 struct ctl_table *table) 1120 { 1121 struct ctl_table *ctl_table_arg = NULL; 1122 struct ctl_table *entry, *files; 1123 int nr_files = 0; 1124 int nr_dirs = 0; 1125 int err = -ENOMEM; 1126 1127 for (entry = table; entry->procname; entry++) { 1128 if (entry->child) 1129 nr_dirs++; 1130 else 1131 nr_files++; 1132 } 1133 1134 files = table; 1135 /* If there are mixed files and directories we need a new table */ 1136 if (nr_dirs && nr_files) { 1137 struct ctl_table *new; 1138 files = kzalloc(sizeof(struct ctl_table) * (nr_files + 1), 1139 GFP_KERNEL); 1140 if (!files) 1141 goto out; 1142 1143 ctl_table_arg = files; 1144 for (new = files, entry = table; entry->procname; entry++) { 1145 if (entry->child) 1146 continue; 1147 *new = *entry; 1148 new++; 1149 } 1150 } 1151 1152 /* Register everything except a directory full of subdirectories */ 1153 if (nr_files || !nr_dirs) { 1154 struct ctl_table_header *header; 1155 header = __register_sysctl_table(root, namespaces, path, files); 1156 if (!header) { 1157 kfree(ctl_table_arg); 1158 goto out; 1159 } 1160 1161 /* Remember if we need to free the file table */ 1162 header->ctl_table_arg = ctl_table_arg; 1163 **subheader = header; 1164 (*subheader)++; 1165 } 1166 1167 /* Recurse into the subdirectories. */ 1168 for (entry = table; entry->procname; entry++) { 1169 char *child_pos; 1170 1171 if (!entry->child) 1172 continue; 1173 1174 err = -ENAMETOOLONG; 1175 child_pos = append_path(path, pos, entry->procname); 1176 if (!child_pos) 1177 goto out; 1178 1179 err = register_leaf_sysctl_tables(path, child_pos, subheader, 1180 root, namespaces, entry->child); 1181 pos[0] = '\0'; 1182 if (err) 1183 goto out; 1184 } 1185 err = 0; 1186 out: 1187 /* On failure our caller will unregister all registered subheaders */ 1188 return err; 1189 } 1190 1191 /** 1192 * __register_sysctl_paths - register a sysctl table hierarchy 1193 * @root: List of sysctl headers to register on 1194 * @namespaces: Data to compute which lists of sysctl entries are visible 1195 * @path: The path to the directory the sysctl table is in. 1196 * @table: the top-level table structure 1197 * 1198 * Register a sysctl table hierarchy. @table should be a filled in ctl_table 1199 * array. A completely 0 filled entry terminates the table. 1200 * 1201 * See __register_sysctl_table for more details. 1202 */ 1203 struct ctl_table_header *__register_sysctl_paths( 1204 struct ctl_table_root *root, 1205 struct nsproxy *namespaces, 1206 const struct ctl_path *path, struct ctl_table *table) 1207 { 1208 struct ctl_table *ctl_table_arg = table; 1209 int nr_subheaders = count_subheaders(table); 1210 struct ctl_table_header *header = NULL, **subheaders, **subheader; 1211 const struct ctl_path *component; 1212 char *new_path, *pos; 1213 1214 pos = new_path = kmalloc(PATH_MAX, GFP_KERNEL); 1215 if (!new_path) 1216 return NULL; 1217 1218 pos[0] = '\0'; 1219 for (component = path; component->procname; component++) { 1220 pos = append_path(new_path, pos, component->procname); 1221 if (!pos) 1222 goto out; 1223 } 1224 while (table->procname && table->child && !table[1].procname) { 1225 pos = append_path(new_path, pos, table->procname); 1226 if (!pos) 1227 goto out; 1228 table = table->child; 1229 } 1230 if (nr_subheaders == 1) { 1231 header = __register_sysctl_table(root, namespaces, new_path, table); 1232 if (header) 1233 header->ctl_table_arg = ctl_table_arg; 1234 } else { 1235 header = kzalloc(sizeof(*header) + 1236 sizeof(*subheaders)*nr_subheaders, GFP_KERNEL); 1237 if (!header) 1238 goto out; 1239 1240 subheaders = (struct ctl_table_header **) (header + 1); 1241 subheader = subheaders; 1242 header->ctl_table_arg = ctl_table_arg; 1243 1244 if (register_leaf_sysctl_tables(new_path, pos, &subheader, 1245 root, namespaces, table)) 1246 goto err_register_leaves; 1247 } 1248 1249 out: 1250 kfree(new_path); 1251 return header; 1252 1253 err_register_leaves: 1254 while (subheader > subheaders) { 1255 struct ctl_table_header *subh = *(--subheader); 1256 struct ctl_table *table = subh->ctl_table_arg; 1257 unregister_sysctl_table(subh); 1258 kfree(table); 1259 } 1260 kfree(header); 1261 header = NULL; 1262 goto out; 1263 } 1264 1265 /** 1266 * register_sysctl_table_path - register a sysctl table hierarchy 1267 * @path: The path to the directory the sysctl table is in. 1268 * @table: the top-level table structure 1269 * 1270 * Register a sysctl table hierarchy. @table should be a filled in ctl_table 1271 * array. A completely 0 filled entry terminates the table. 1272 * 1273 * See __register_sysctl_paths for more details. 1274 */ 1275 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, 1276 struct ctl_table *table) 1277 { 1278 return __register_sysctl_paths(&sysctl_table_root, current->nsproxy, 1279 path, table); 1280 } 1281 EXPORT_SYMBOL(register_sysctl_paths); 1282 1283 /** 1284 * register_sysctl_table - register a sysctl table hierarchy 1285 * @table: the top-level table structure 1286 * 1287 * Register a sysctl table hierarchy. @table should be a filled in ctl_table 1288 * array. A completely 0 filled entry terminates the table. 1289 * 1290 * See register_sysctl_paths for more details. 1291 */ 1292 struct ctl_table_header *register_sysctl_table(struct ctl_table *table) 1293 { 1294 static const struct ctl_path null_path[] = { {} }; 1295 1296 return register_sysctl_paths(null_path, table); 1297 } 1298 EXPORT_SYMBOL(register_sysctl_table); 1299 1300 static void drop_sysctl_table(struct ctl_table_header *header) 1301 { 1302 if (--header->nreg) 1303 return; 1304 1305 start_unregistering(header); 1306 if (!--header->parent->count) { 1307 WARN_ON(1); 1308 kfree_rcu(header->parent, rcu); 1309 } 1310 if (!--header->count) 1311 kfree_rcu(header, rcu); 1312 } 1313 1314 /** 1315 * unregister_sysctl_table - unregister a sysctl table hierarchy 1316 * @header: the header returned from register_sysctl_table 1317 * 1318 * Unregisters the sysctl table and all children. proc entries may not 1319 * actually be removed until they are no longer used by anyone. 1320 */ 1321 void unregister_sysctl_table(struct ctl_table_header * header) 1322 { 1323 int nr_subheaders; 1324 might_sleep(); 1325 1326 if (header == NULL) 1327 return; 1328 1329 nr_subheaders = count_subheaders(header->ctl_table_arg); 1330 if (unlikely(nr_subheaders > 1)) { 1331 struct ctl_table_header **subheaders; 1332 int i; 1333 1334 subheaders = (struct ctl_table_header **)(header + 1); 1335 for (i = nr_subheaders -1; i >= 0; i--) { 1336 struct ctl_table_header *subh = subheaders[i]; 1337 struct ctl_table *table = subh->ctl_table_arg; 1338 unregister_sysctl_table(subh); 1339 kfree(table); 1340 } 1341 kfree(header); 1342 return; 1343 } 1344 1345 spin_lock(&sysctl_lock); 1346 drop_sysctl_table(header); 1347 spin_unlock(&sysctl_lock); 1348 } 1349 EXPORT_SYMBOL(unregister_sysctl_table); 1350 1351 void setup_sysctl_set(struct ctl_table_set *p, 1352 struct ctl_table_root *root, 1353 int (*is_seen)(struct ctl_table_set *)) 1354 { 1355 INIT_LIST_HEAD(&p->list); 1356 p->root = root; 1357 p->is_seen = is_seen; 1358 } 1359 1360 void retire_sysctl_set(struct ctl_table_set *set) 1361 { 1362 WARN_ON(!list_empty(&set->list)); 1363 } 1364 1365 int __init proc_sys_init(void) 1366 { 1367 struct proc_dir_entry *proc_sys_root; 1368 1369 proc_sys_root = proc_mkdir("sys", NULL); 1370 proc_sys_root->proc_iops = &proc_sys_dir_operations; 1371 proc_sys_root->proc_fops = &proc_sys_dir_file_operations; 1372 proc_sys_root->nlink = 0; 1373 1374 return sysctl_init(); 1375 } 1376