1 /* 2 * /proc/sys support 3 */ 4 #include <linux/init.h> 5 #include <linux/sysctl.h> 6 #include <linux/poll.h> 7 #include <linux/proc_fs.h> 8 #include <linux/security.h> 9 #include <linux/namei.h> 10 #include <linux/module.h> 11 #include "internal.h" 12 13 static const struct dentry_operations proc_sys_dentry_operations; 14 static const struct file_operations proc_sys_file_operations; 15 static const struct inode_operations proc_sys_inode_operations; 16 static const struct file_operations proc_sys_dir_file_operations; 17 static const struct inode_operations proc_sys_dir_operations; 18 19 void proc_sys_poll_notify(struct ctl_table_poll *poll) 20 { 21 if (!poll) 22 return; 23 24 atomic_inc(&poll->event); 25 wake_up_interruptible(&poll->wait); 26 } 27 28 static struct ctl_table root_table[1]; 29 static struct ctl_table_root sysctl_table_root; 30 static struct ctl_table_header root_table_header = { 31 {{.count = 1, 32 .nreg = 1, 33 .ctl_table = root_table, 34 .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, 35 .root = &sysctl_table_root, 36 .set = &sysctl_table_root.default_set, 37 }; 38 static struct ctl_table_root sysctl_table_root = { 39 .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list), 40 .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry), 41 }; 42 43 static DEFINE_SPINLOCK(sysctl_lock); 44 45 static void init_header(struct ctl_table_header *head, 46 struct ctl_table_root *root, struct ctl_table_set *set, 47 struct ctl_table *table) 48 { 49 head->ctl_table_arg = table; 50 INIT_LIST_HEAD(&head->ctl_entry); 51 head->used = 0; 52 head->count = 1; 53 head->nreg = 1; 54 head->unregistering = NULL; 55 head->root = root; 56 head->set = set; 57 head->parent = NULL; 58 } 59 60 /* called under sysctl_lock */ 61 static int use_table(struct ctl_table_header *p) 62 { 63 if (unlikely(p->unregistering)) 64 return 0; 65 p->used++; 66 return 1; 67 } 68 69 /* called under sysctl_lock */ 70 static void unuse_table(struct ctl_table_header *p) 71 { 72 if (!--p->used) 73 if (unlikely(p->unregistering)) 74 complete(p->unregistering); 75 } 76 77 /* called under sysctl_lock, will reacquire if has to wait */ 78 static void start_unregistering(struct ctl_table_header *p) 79 { 80 /* 81 * if p->used is 0, nobody will ever touch that entry again; 82 * we'll eliminate all paths to it before dropping sysctl_lock 83 */ 84 if (unlikely(p->used)) { 85 struct completion wait; 86 init_completion(&wait); 87 p->unregistering = &wait; 88 spin_unlock(&sysctl_lock); 89 wait_for_completion(&wait); 90 spin_lock(&sysctl_lock); 91 } else { 92 /* anything non-NULL; we'll never dereference it */ 93 p->unregistering = ERR_PTR(-EINVAL); 94 } 95 /* 96 * do not remove from the list until nobody holds it; walking the 97 * list in do_sysctl() relies on that. 98 */ 99 list_del_init(&p->ctl_entry); 100 } 101 102 static void sysctl_head_get(struct ctl_table_header *head) 103 { 104 spin_lock(&sysctl_lock); 105 head->count++; 106 spin_unlock(&sysctl_lock); 107 } 108 109 void sysctl_head_put(struct ctl_table_header *head) 110 { 111 spin_lock(&sysctl_lock); 112 if (!--head->count) 113 kfree_rcu(head, rcu); 114 spin_unlock(&sysctl_lock); 115 } 116 117 static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) 118 { 119 if (!head) 120 BUG(); 121 spin_lock(&sysctl_lock); 122 if (!use_table(head)) 123 head = ERR_PTR(-ENOENT); 124 spin_unlock(&sysctl_lock); 125 return head; 126 } 127 128 static void sysctl_head_finish(struct ctl_table_header *head) 129 { 130 if (!head) 131 return; 132 spin_lock(&sysctl_lock); 133 unuse_table(head); 134 spin_unlock(&sysctl_lock); 135 } 136 137 static struct ctl_table_set * 138 lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces) 139 { 140 struct ctl_table_set *set = &root->default_set; 141 if (root->lookup) 142 set = root->lookup(root, namespaces); 143 return set; 144 } 145 146 static struct list_head * 147 lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces) 148 { 149 struct ctl_table_set *set = lookup_header_set(root, namespaces); 150 return &set->list; 151 } 152 153 static struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, 154 struct ctl_table_header *prev) 155 { 156 struct ctl_table_root *root; 157 struct list_head *header_list; 158 struct ctl_table_header *head; 159 struct list_head *tmp; 160 161 spin_lock(&sysctl_lock); 162 if (prev) { 163 head = prev; 164 tmp = &prev->ctl_entry; 165 unuse_table(prev); 166 goto next; 167 } 168 tmp = &root_table_header.ctl_entry; 169 for (;;) { 170 head = list_entry(tmp, struct ctl_table_header, ctl_entry); 171 172 if (!use_table(head)) 173 goto next; 174 spin_unlock(&sysctl_lock); 175 return head; 176 next: 177 root = head->root; 178 tmp = tmp->next; 179 header_list = lookup_header_list(root, namespaces); 180 if (tmp != header_list) 181 continue; 182 183 do { 184 root = list_entry(root->root_list.next, 185 struct ctl_table_root, root_list); 186 if (root == &sysctl_table_root) 187 goto out; 188 header_list = lookup_header_list(root, namespaces); 189 } while (list_empty(header_list)); 190 tmp = header_list->next; 191 } 192 out: 193 spin_unlock(&sysctl_lock); 194 return NULL; 195 } 196 197 static struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev) 198 { 199 return __sysctl_head_next(current->nsproxy, prev); 200 } 201 202 void register_sysctl_root(struct ctl_table_root *root) 203 { 204 spin_lock(&sysctl_lock); 205 list_add_tail(&root->root_list, &sysctl_table_root.root_list); 206 spin_unlock(&sysctl_lock); 207 } 208 209 /* 210 * sysctl_perm does NOT grant the superuser all rights automatically, because 211 * some sysctl variables are readonly even to root. 212 */ 213 214 static int test_perm(int mode, int op) 215 { 216 if (!current_euid()) 217 mode >>= 6; 218 else if (in_egroup_p(0)) 219 mode >>= 3; 220 if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0) 221 return 0; 222 return -EACCES; 223 } 224 225 static int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) 226 { 227 int mode; 228 229 if (root->permissions) 230 mode = root->permissions(root, current->nsproxy, table); 231 else 232 mode = table->mode; 233 234 return test_perm(mode, op); 235 } 236 237 static struct inode *proc_sys_make_inode(struct super_block *sb, 238 struct ctl_table_header *head, struct ctl_table *table) 239 { 240 struct inode *inode; 241 struct proc_inode *ei; 242 243 inode = new_inode(sb); 244 if (!inode) 245 goto out; 246 247 inode->i_ino = get_next_ino(); 248 249 sysctl_head_get(head); 250 ei = PROC_I(inode); 251 ei->sysctl = head; 252 ei->sysctl_entry = table; 253 254 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 255 inode->i_mode = table->mode; 256 if (!table->child) { 257 inode->i_mode |= S_IFREG; 258 inode->i_op = &proc_sys_inode_operations; 259 inode->i_fop = &proc_sys_file_operations; 260 } else { 261 inode->i_mode |= S_IFDIR; 262 inode->i_op = &proc_sys_dir_operations; 263 inode->i_fop = &proc_sys_dir_file_operations; 264 } 265 out: 266 return inode; 267 } 268 269 static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name) 270 { 271 for ( ; p->procname; p++) { 272 if (strlen(p->procname) != name->len) 273 continue; 274 275 if (memcmp(p->procname, name->name, name->len) != 0) 276 continue; 277 278 /* I have a match */ 279 return p; 280 } 281 return NULL; 282 } 283 284 static struct ctl_table_header *grab_header(struct inode *inode) 285 { 286 struct ctl_table_header *head = PROC_I(inode)->sysctl; 287 if (!head) 288 head = &root_table_header; 289 return sysctl_head_grab(head); 290 } 291 292 static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, 293 struct nameidata *nd) 294 { 295 struct ctl_table_header *head = grab_header(dir); 296 struct ctl_table *table = PROC_I(dir)->sysctl_entry; 297 struct ctl_table_header *h = NULL; 298 struct qstr *name = &dentry->d_name; 299 struct ctl_table *p; 300 struct inode *inode; 301 struct dentry *err = ERR_PTR(-ENOENT); 302 303 if (IS_ERR(head)) 304 return ERR_CAST(head); 305 306 if (table && !table->child) { 307 WARN_ON(1); 308 goto out; 309 } 310 311 table = table ? table->child : head->ctl_table; 312 313 p = find_in_table(table, name); 314 if (!p) { 315 for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) { 316 if (h->attached_to != table) 317 continue; 318 p = find_in_table(h->attached_by, name); 319 if (p) 320 break; 321 } 322 } 323 324 if (!p) 325 goto out; 326 327 err = ERR_PTR(-ENOMEM); 328 inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); 329 if (h) 330 sysctl_head_finish(h); 331 332 if (!inode) 333 goto out; 334 335 err = NULL; 336 d_set_d_op(dentry, &proc_sys_dentry_operations); 337 d_add(dentry, inode); 338 339 out: 340 sysctl_head_finish(head); 341 return err; 342 } 343 344 static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, 345 size_t count, loff_t *ppos, int write) 346 { 347 struct inode *inode = filp->f_path.dentry->d_inode; 348 struct ctl_table_header *head = grab_header(inode); 349 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 350 ssize_t error; 351 size_t res; 352 353 if (IS_ERR(head)) 354 return PTR_ERR(head); 355 356 /* 357 * At this point we know that the sysctl was not unregistered 358 * and won't be until we finish. 359 */ 360 error = -EPERM; 361 if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ)) 362 goto out; 363 364 /* if that can happen at all, it should be -EINVAL, not -EISDIR */ 365 error = -EINVAL; 366 if (!table->proc_handler) 367 goto out; 368 369 /* careful: calling conventions are nasty here */ 370 res = count; 371 error = table->proc_handler(table, write, buf, &res, ppos); 372 if (!error) 373 error = res; 374 out: 375 sysctl_head_finish(head); 376 377 return error; 378 } 379 380 static ssize_t proc_sys_read(struct file *filp, char __user *buf, 381 size_t count, loff_t *ppos) 382 { 383 return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 0); 384 } 385 386 static ssize_t proc_sys_write(struct file *filp, const char __user *buf, 387 size_t count, loff_t *ppos) 388 { 389 return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1); 390 } 391 392 static int proc_sys_open(struct inode *inode, struct file *filp) 393 { 394 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 395 396 if (table->poll) 397 filp->private_data = proc_sys_poll_event(table->poll); 398 399 return 0; 400 } 401 402 static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) 403 { 404 struct inode *inode = filp->f_path.dentry->d_inode; 405 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 406 unsigned long event = (unsigned long)filp->private_data; 407 unsigned int ret = DEFAULT_POLLMASK; 408 409 if (!table->proc_handler) 410 goto out; 411 412 if (!table->poll) 413 goto out; 414 415 poll_wait(filp, &table->poll->wait, wait); 416 417 if (event != atomic_read(&table->poll->event)) { 418 filp->private_data = proc_sys_poll_event(table->poll); 419 ret = POLLIN | POLLRDNORM | POLLERR | POLLPRI; 420 } 421 422 out: 423 return ret; 424 } 425 426 static int proc_sys_fill_cache(struct file *filp, void *dirent, 427 filldir_t filldir, 428 struct ctl_table_header *head, 429 struct ctl_table *table) 430 { 431 struct dentry *child, *dir = filp->f_path.dentry; 432 struct inode *inode; 433 struct qstr qname; 434 ino_t ino = 0; 435 unsigned type = DT_UNKNOWN; 436 437 qname.name = table->procname; 438 qname.len = strlen(table->procname); 439 qname.hash = full_name_hash(qname.name, qname.len); 440 441 child = d_lookup(dir, &qname); 442 if (!child) { 443 child = d_alloc(dir, &qname); 444 if (child) { 445 inode = proc_sys_make_inode(dir->d_sb, head, table); 446 if (!inode) { 447 dput(child); 448 return -ENOMEM; 449 } else { 450 d_set_d_op(child, &proc_sys_dentry_operations); 451 d_add(child, inode); 452 } 453 } else { 454 return -ENOMEM; 455 } 456 } 457 inode = child->d_inode; 458 ino = inode->i_ino; 459 type = inode->i_mode >> 12; 460 dput(child); 461 return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); 462 } 463 464 static int scan(struct ctl_table_header *head, ctl_table *table, 465 unsigned long *pos, struct file *file, 466 void *dirent, filldir_t filldir) 467 { 468 469 for (; table->procname; table++, (*pos)++) { 470 int res; 471 472 if (*pos < file->f_pos) 473 continue; 474 475 res = proc_sys_fill_cache(file, dirent, filldir, head, table); 476 if (res) 477 return res; 478 479 file->f_pos = *pos + 1; 480 } 481 return 0; 482 } 483 484 static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) 485 { 486 struct dentry *dentry = filp->f_path.dentry; 487 struct inode *inode = dentry->d_inode; 488 struct ctl_table_header *head = grab_header(inode); 489 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 490 struct ctl_table_header *h = NULL; 491 unsigned long pos; 492 int ret = -EINVAL; 493 494 if (IS_ERR(head)) 495 return PTR_ERR(head); 496 497 if (table && !table->child) { 498 WARN_ON(1); 499 goto out; 500 } 501 502 table = table ? table->child : head->ctl_table; 503 504 ret = 0; 505 /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ 506 if (filp->f_pos == 0) { 507 if (filldir(dirent, ".", 1, filp->f_pos, 508 inode->i_ino, DT_DIR) < 0) 509 goto out; 510 filp->f_pos++; 511 } 512 if (filp->f_pos == 1) { 513 if (filldir(dirent, "..", 2, filp->f_pos, 514 parent_ino(dentry), DT_DIR) < 0) 515 goto out; 516 filp->f_pos++; 517 } 518 pos = 2; 519 520 ret = scan(head, table, &pos, filp, dirent, filldir); 521 if (ret) 522 goto out; 523 524 for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) { 525 if (h->attached_to != table) 526 continue; 527 ret = scan(h, h->attached_by, &pos, filp, dirent, filldir); 528 if (ret) { 529 sysctl_head_finish(h); 530 break; 531 } 532 } 533 ret = 1; 534 out: 535 sysctl_head_finish(head); 536 return ret; 537 } 538 539 static int proc_sys_permission(struct inode *inode, int mask) 540 { 541 /* 542 * sysctl entries that are not writeable, 543 * are _NOT_ writeable, capabilities or not. 544 */ 545 struct ctl_table_header *head; 546 struct ctl_table *table; 547 int error; 548 549 /* Executable files are not allowed under /proc/sys/ */ 550 if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) 551 return -EACCES; 552 553 head = grab_header(inode); 554 if (IS_ERR(head)) 555 return PTR_ERR(head); 556 557 table = PROC_I(inode)->sysctl_entry; 558 if (!table) /* global root - r-xr-xr-x */ 559 error = mask & MAY_WRITE ? -EACCES : 0; 560 else /* Use the permissions on the sysctl table entry */ 561 error = sysctl_perm(head->root, table, mask & ~MAY_NOT_BLOCK); 562 563 sysctl_head_finish(head); 564 return error; 565 } 566 567 static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr) 568 { 569 struct inode *inode = dentry->d_inode; 570 int error; 571 572 if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)) 573 return -EPERM; 574 575 error = inode_change_ok(inode, attr); 576 if (error) 577 return error; 578 579 if ((attr->ia_valid & ATTR_SIZE) && 580 attr->ia_size != i_size_read(inode)) { 581 error = vmtruncate(inode, attr->ia_size); 582 if (error) 583 return error; 584 } 585 586 setattr_copy(inode, attr); 587 mark_inode_dirty(inode); 588 return 0; 589 } 590 591 static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 592 { 593 struct inode *inode = dentry->d_inode; 594 struct ctl_table_header *head = grab_header(inode); 595 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 596 597 if (IS_ERR(head)) 598 return PTR_ERR(head); 599 600 generic_fillattr(inode, stat); 601 if (table) 602 stat->mode = (stat->mode & S_IFMT) | table->mode; 603 604 sysctl_head_finish(head); 605 return 0; 606 } 607 608 static const struct file_operations proc_sys_file_operations = { 609 .open = proc_sys_open, 610 .poll = proc_sys_poll, 611 .read = proc_sys_read, 612 .write = proc_sys_write, 613 .llseek = default_llseek, 614 }; 615 616 static const struct file_operations proc_sys_dir_file_operations = { 617 .read = generic_read_dir, 618 .readdir = proc_sys_readdir, 619 .llseek = generic_file_llseek, 620 }; 621 622 static const struct inode_operations proc_sys_inode_operations = { 623 .permission = proc_sys_permission, 624 .setattr = proc_sys_setattr, 625 .getattr = proc_sys_getattr, 626 }; 627 628 static const struct inode_operations proc_sys_dir_operations = { 629 .lookup = proc_sys_lookup, 630 .permission = proc_sys_permission, 631 .setattr = proc_sys_setattr, 632 .getattr = proc_sys_getattr, 633 }; 634 635 static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd) 636 { 637 if (nd->flags & LOOKUP_RCU) 638 return -ECHILD; 639 return !PROC_I(dentry->d_inode)->sysctl->unregistering; 640 } 641 642 static int proc_sys_delete(const struct dentry *dentry) 643 { 644 return !!PROC_I(dentry->d_inode)->sysctl->unregistering; 645 } 646 647 static int sysctl_is_seen(struct ctl_table_header *p) 648 { 649 struct ctl_table_set *set = p->set; 650 int res; 651 spin_lock(&sysctl_lock); 652 if (p->unregistering) 653 res = 0; 654 else if (!set->is_seen) 655 res = 1; 656 else 657 res = set->is_seen(set); 658 spin_unlock(&sysctl_lock); 659 return res; 660 } 661 662 static int proc_sys_compare(const struct dentry *parent, 663 const struct inode *pinode, 664 const struct dentry *dentry, const struct inode *inode, 665 unsigned int len, const char *str, const struct qstr *name) 666 { 667 struct ctl_table_header *head; 668 /* Although proc doesn't have negative dentries, rcu-walk means 669 * that inode here can be NULL */ 670 /* AV: can it, indeed? */ 671 if (!inode) 672 return 1; 673 if (name->len != len) 674 return 1; 675 if (memcmp(name->name, str, len)) 676 return 1; 677 head = rcu_dereference(PROC_I(inode)->sysctl); 678 return !head || !sysctl_is_seen(head); 679 } 680 681 static const struct dentry_operations proc_sys_dentry_operations = { 682 .d_revalidate = proc_sys_revalidate, 683 .d_delete = proc_sys_delete, 684 .d_compare = proc_sys_compare, 685 }; 686 687 static struct ctl_table *is_branch_in(struct ctl_table *branch, 688 struct ctl_table *table) 689 { 690 struct ctl_table *p; 691 const char *s = branch->procname; 692 693 /* branch should have named subdirectory as its first element */ 694 if (!s || !branch->child) 695 return NULL; 696 697 /* ... and nothing else */ 698 if (branch[1].procname) 699 return NULL; 700 701 /* table should contain subdirectory with the same name */ 702 for (p = table; p->procname; p++) { 703 if (!p->child) 704 continue; 705 if (p->procname && strcmp(p->procname, s) == 0) 706 return p; 707 } 708 return NULL; 709 } 710 711 /* see if attaching q to p would be an improvement */ 712 static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) 713 { 714 struct ctl_table *to = p->ctl_table, *by = q->ctl_table; 715 struct ctl_table *next; 716 int is_better = 0; 717 int not_in_parent = !p->attached_by; 718 719 while ((next = is_branch_in(by, to)) != NULL) { 720 if (by == q->attached_by) 721 is_better = 1; 722 if (to == p->attached_by) 723 not_in_parent = 1; 724 by = by->child; 725 to = next->child; 726 } 727 728 if (is_better && not_in_parent) { 729 q->attached_by = by; 730 q->attached_to = to; 731 q->parent = p; 732 } 733 } 734 735 static int sysctl_check_table_dups(const char *path, struct ctl_table *old, 736 struct ctl_table *table) 737 { 738 struct ctl_table *entry, *test; 739 int error = 0; 740 741 for (entry = old; entry->procname; entry++) { 742 for (test = table; test->procname; test++) { 743 if (strcmp(entry->procname, test->procname) == 0) { 744 printk(KERN_ERR "sysctl duplicate entry: %s/%s\n", 745 path, test->procname); 746 error = -EEXIST; 747 } 748 } 749 } 750 return error; 751 } 752 753 static int sysctl_check_dups(struct nsproxy *namespaces, 754 struct ctl_table_header *header, 755 const char *path, struct ctl_table *table) 756 { 757 struct ctl_table_root *root; 758 struct ctl_table_set *set; 759 struct ctl_table_header *dir_head, *head; 760 struct ctl_table *dir_table; 761 int error = 0; 762 763 /* No dups if we are the only member of our directory */ 764 if (header->attached_by != table) 765 return 0; 766 767 dir_head = header->parent; 768 dir_table = header->attached_to; 769 770 error = sysctl_check_table_dups(path, dir_table, table); 771 772 root = &sysctl_table_root; 773 do { 774 set = lookup_header_set(root, namespaces); 775 776 list_for_each_entry(head, &set->list, ctl_entry) { 777 if (head->unregistering) 778 continue; 779 if (head->attached_to != dir_table) 780 continue; 781 error = sysctl_check_table_dups(path, head->attached_by, 782 table); 783 } 784 root = list_entry(root->root_list.next, 785 struct ctl_table_root, root_list); 786 } while (root != &sysctl_table_root); 787 return error; 788 } 789 790 static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...) 791 { 792 struct va_format vaf; 793 va_list args; 794 795 va_start(args, fmt); 796 vaf.fmt = fmt; 797 vaf.va = &args; 798 799 printk(KERN_ERR "sysctl table check failed: %s/%s %pV\n", 800 path, table->procname, &vaf); 801 802 va_end(args); 803 return -EINVAL; 804 } 805 806 static int sysctl_check_table(const char *path, struct ctl_table *table) 807 { 808 int err = 0; 809 for (; table->procname; table++) { 810 if (table->child) 811 err = sysctl_err(path, table, "Not a file"); 812 813 if ((table->proc_handler == proc_dostring) || 814 (table->proc_handler == proc_dointvec) || 815 (table->proc_handler == proc_dointvec_minmax) || 816 (table->proc_handler == proc_dointvec_jiffies) || 817 (table->proc_handler == proc_dointvec_userhz_jiffies) || 818 (table->proc_handler == proc_dointvec_ms_jiffies) || 819 (table->proc_handler == proc_doulongvec_minmax) || 820 (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) { 821 if (!table->data) 822 err = sysctl_err(path, table, "No data"); 823 if (!table->maxlen) 824 err = sysctl_err(path, table, "No maxlen"); 825 } 826 if (!table->proc_handler) 827 err = sysctl_err(path, table, "No proc_handler"); 828 829 if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode) 830 err = sysctl_err(path, table, "bogus .mode 0%o", 831 table->mode); 832 } 833 return err; 834 } 835 836 /** 837 * __register_sysctl_table - register a leaf sysctl table 838 * @root: List of sysctl headers to register on 839 * @namespaces: Data to compute which lists of sysctl entries are visible 840 * @path: The path to the directory the sysctl table is in. 841 * @table: the top-level table structure 842 * 843 * Register a sysctl table hierarchy. @table should be a filled in ctl_table 844 * array. A completely 0 filled entry terminates the table. 845 * 846 * The members of the &struct ctl_table structure are used as follows: 847 * 848 * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not 849 * enter a sysctl file 850 * 851 * data - a pointer to data for use by proc_handler 852 * 853 * maxlen - the maximum size in bytes of the data 854 * 855 * mode - the file permissions for the /proc/sys file 856 * 857 * child - must be %NULL. 858 * 859 * proc_handler - the text handler routine (described below) 860 * 861 * extra1, extra2 - extra pointers usable by the proc handler routines 862 * 863 * Leaf nodes in the sysctl tree will be represented by a single file 864 * under /proc; non-leaf nodes will be represented by directories. 865 * 866 * There must be a proc_handler routine for any terminal nodes. 867 * Several default handlers are available to cover common cases - 868 * 869 * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), 870 * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), 871 * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax() 872 * 873 * It is the handler's job to read the input buffer from user memory 874 * and process it. The handler should return 0 on success. 875 * 876 * This routine returns %NULL on a failure to register, and a pointer 877 * to the table header on success. 878 */ 879 struct ctl_table_header *__register_sysctl_table( 880 struct ctl_table_root *root, 881 struct nsproxy *namespaces, 882 const char *path, struct ctl_table *table) 883 { 884 struct ctl_table_header *header; 885 struct ctl_table *new, **prevp; 886 const char *name, *nextname; 887 unsigned int npath = 0; 888 struct ctl_table_set *set; 889 size_t path_bytes = 0; 890 char *new_name; 891 892 /* Count the path components */ 893 for (name = path; name; name = nextname) { 894 int namelen; 895 nextname = strchr(name, '/'); 896 if (nextname) { 897 namelen = nextname - name; 898 nextname++; 899 } else { 900 namelen = strlen(name); 901 } 902 if (namelen == 0) 903 continue; 904 path_bytes += namelen + 1; 905 npath++; 906 } 907 908 /* 909 * For each path component, allocate a 2-element ctl_table array. 910 * The first array element will be filled with the sysctl entry 911 * for this, the second will be the sentinel (procname == 0). 912 * 913 * We allocate everything in one go so that we don't have to 914 * worry about freeing additional memory in unregister_sysctl_table. 915 */ 916 header = kzalloc(sizeof(struct ctl_table_header) + path_bytes + 917 (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL); 918 if (!header) 919 return NULL; 920 921 new = (struct ctl_table *) (header + 1); 922 new_name = (char *)(new + (2 * npath)); 923 924 /* Now connect the dots */ 925 prevp = &header->ctl_table; 926 for (name = path; name; name = nextname) { 927 int namelen; 928 nextname = strchr(name, '/'); 929 if (nextname) { 930 namelen = nextname - name; 931 nextname++; 932 } else { 933 namelen = strlen(name); 934 } 935 if (namelen == 0) 936 continue; 937 memcpy(new_name, name, namelen); 938 new_name[namelen] = '\0'; 939 940 new->procname = new_name; 941 new->mode = 0555; 942 943 *prevp = new; 944 prevp = &new->child; 945 946 new += 2; 947 new_name += namelen + 1; 948 } 949 *prevp = table; 950 951 init_header(header, root, NULL, table); 952 if (sysctl_check_table(path, table)) 953 goto fail; 954 955 spin_lock(&sysctl_lock); 956 header->set = lookup_header_set(root, namespaces); 957 header->attached_by = header->ctl_table; 958 header->attached_to = root_table; 959 header->parent = &root_table_header; 960 set = header->set; 961 root = header->root; 962 for (;;) { 963 struct ctl_table_header *p; 964 list_for_each_entry(p, &set->list, ctl_entry) { 965 if (p->unregistering) 966 continue; 967 try_attach(p, header); 968 } 969 if (root == &sysctl_table_root) 970 break; 971 root = list_entry(root->root_list.prev, 972 struct ctl_table_root, root_list); 973 set = lookup_header_set(root, namespaces); 974 } 975 if (sysctl_check_dups(namespaces, header, path, table)) 976 goto fail_locked; 977 header->parent->count++; 978 list_add_tail(&header->ctl_entry, &header->set->list); 979 spin_unlock(&sysctl_lock); 980 981 return header; 982 fail_locked: 983 spin_unlock(&sysctl_lock); 984 fail: 985 kfree(header); 986 dump_stack(); 987 return NULL; 988 } 989 990 static char *append_path(const char *path, char *pos, const char *name) 991 { 992 int namelen; 993 namelen = strlen(name); 994 if (((pos - path) + namelen + 2) >= PATH_MAX) 995 return NULL; 996 memcpy(pos, name, namelen); 997 pos[namelen] = '/'; 998 pos[namelen + 1] = '\0'; 999 pos += namelen + 1; 1000 return pos; 1001 } 1002 1003 static int count_subheaders(struct ctl_table *table) 1004 { 1005 int has_files = 0; 1006 int nr_subheaders = 0; 1007 struct ctl_table *entry; 1008 1009 /* special case: no directory and empty directory */ 1010 if (!table || !table->procname) 1011 return 1; 1012 1013 for (entry = table; entry->procname; entry++) { 1014 if (entry->child) 1015 nr_subheaders += count_subheaders(entry->child); 1016 else 1017 has_files = 1; 1018 } 1019 return nr_subheaders + has_files; 1020 } 1021 1022 static int register_leaf_sysctl_tables(const char *path, char *pos, 1023 struct ctl_table_header ***subheader, 1024 struct ctl_table_root *root, struct nsproxy *namespaces, 1025 struct ctl_table *table) 1026 { 1027 struct ctl_table *ctl_table_arg = NULL; 1028 struct ctl_table *entry, *files; 1029 int nr_files = 0; 1030 int nr_dirs = 0; 1031 int err = -ENOMEM; 1032 1033 for (entry = table; entry->procname; entry++) { 1034 if (entry->child) 1035 nr_dirs++; 1036 else 1037 nr_files++; 1038 } 1039 1040 files = table; 1041 /* If there are mixed files and directories we need a new table */ 1042 if (nr_dirs && nr_files) { 1043 struct ctl_table *new; 1044 files = kzalloc(sizeof(struct ctl_table) * (nr_files + 1), 1045 GFP_KERNEL); 1046 if (!files) 1047 goto out; 1048 1049 ctl_table_arg = files; 1050 for (new = files, entry = table; entry->procname; entry++) { 1051 if (entry->child) 1052 continue; 1053 *new = *entry; 1054 new++; 1055 } 1056 } 1057 1058 /* Register everything except a directory full of subdirectories */ 1059 if (nr_files || !nr_dirs) { 1060 struct ctl_table_header *header; 1061 header = __register_sysctl_table(root, namespaces, path, files); 1062 if (!header) { 1063 kfree(ctl_table_arg); 1064 goto out; 1065 } 1066 1067 /* Remember if we need to free the file table */ 1068 header->ctl_table_arg = ctl_table_arg; 1069 **subheader = header; 1070 (*subheader)++; 1071 } 1072 1073 /* Recurse into the subdirectories. */ 1074 for (entry = table; entry->procname; entry++) { 1075 char *child_pos; 1076 1077 if (!entry->child) 1078 continue; 1079 1080 err = -ENAMETOOLONG; 1081 child_pos = append_path(path, pos, entry->procname); 1082 if (!child_pos) 1083 goto out; 1084 1085 err = register_leaf_sysctl_tables(path, child_pos, subheader, 1086 root, namespaces, entry->child); 1087 pos[0] = '\0'; 1088 if (err) 1089 goto out; 1090 } 1091 err = 0; 1092 out: 1093 /* On failure our caller will unregister all registered subheaders */ 1094 return err; 1095 } 1096 1097 /** 1098 * __register_sysctl_paths - register a sysctl table hierarchy 1099 * @root: List of sysctl headers to register on 1100 * @namespaces: Data to compute which lists of sysctl entries are visible 1101 * @path: The path to the directory the sysctl table is in. 1102 * @table: the top-level table structure 1103 * 1104 * Register a sysctl table hierarchy. @table should be a filled in ctl_table 1105 * array. A completely 0 filled entry terminates the table. 1106 * 1107 * See __register_sysctl_table for more details. 1108 */ 1109 struct ctl_table_header *__register_sysctl_paths( 1110 struct ctl_table_root *root, 1111 struct nsproxy *namespaces, 1112 const struct ctl_path *path, struct ctl_table *table) 1113 { 1114 struct ctl_table *ctl_table_arg = table; 1115 int nr_subheaders = count_subheaders(table); 1116 struct ctl_table_header *header = NULL, **subheaders, **subheader; 1117 const struct ctl_path *component; 1118 char *new_path, *pos; 1119 1120 pos = new_path = kmalloc(PATH_MAX, GFP_KERNEL); 1121 if (!new_path) 1122 return NULL; 1123 1124 pos[0] = '\0'; 1125 for (component = path; component->procname; component++) { 1126 pos = append_path(new_path, pos, component->procname); 1127 if (!pos) 1128 goto out; 1129 } 1130 while (table->procname && table->child && !table[1].procname) { 1131 pos = append_path(new_path, pos, table->procname); 1132 if (!pos) 1133 goto out; 1134 table = table->child; 1135 } 1136 if (nr_subheaders == 1) { 1137 header = __register_sysctl_table(root, namespaces, new_path, table); 1138 if (header) 1139 header->ctl_table_arg = ctl_table_arg; 1140 } else { 1141 header = kzalloc(sizeof(*header) + 1142 sizeof(*subheaders)*nr_subheaders, GFP_KERNEL); 1143 if (!header) 1144 goto out; 1145 1146 subheaders = (struct ctl_table_header **) (header + 1); 1147 subheader = subheaders; 1148 header->ctl_table_arg = ctl_table_arg; 1149 1150 if (register_leaf_sysctl_tables(new_path, pos, &subheader, 1151 root, namespaces, table)) 1152 goto err_register_leaves; 1153 } 1154 1155 out: 1156 kfree(new_path); 1157 return header; 1158 1159 err_register_leaves: 1160 while (subheader > subheaders) { 1161 struct ctl_table_header *subh = *(--subheader); 1162 struct ctl_table *table = subh->ctl_table_arg; 1163 unregister_sysctl_table(subh); 1164 kfree(table); 1165 } 1166 kfree(header); 1167 header = NULL; 1168 goto out; 1169 } 1170 1171 /** 1172 * register_sysctl_table_path - register a sysctl table hierarchy 1173 * @path: The path to the directory the sysctl table is in. 1174 * @table: the top-level table structure 1175 * 1176 * Register a sysctl table hierarchy. @table should be a filled in ctl_table 1177 * array. A completely 0 filled entry terminates the table. 1178 * 1179 * See __register_sysctl_paths for more details. 1180 */ 1181 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, 1182 struct ctl_table *table) 1183 { 1184 return __register_sysctl_paths(&sysctl_table_root, current->nsproxy, 1185 path, table); 1186 } 1187 EXPORT_SYMBOL(register_sysctl_paths); 1188 1189 /** 1190 * register_sysctl_table - register a sysctl table hierarchy 1191 * @table: the top-level table structure 1192 * 1193 * Register a sysctl table hierarchy. @table should be a filled in ctl_table 1194 * array. A completely 0 filled entry terminates the table. 1195 * 1196 * See register_sysctl_paths for more details. 1197 */ 1198 struct ctl_table_header *register_sysctl_table(struct ctl_table *table) 1199 { 1200 static const struct ctl_path null_path[] = { {} }; 1201 1202 return register_sysctl_paths(null_path, table); 1203 } 1204 EXPORT_SYMBOL(register_sysctl_table); 1205 1206 static void drop_sysctl_table(struct ctl_table_header *header) 1207 { 1208 if (--header->nreg) 1209 return; 1210 1211 start_unregistering(header); 1212 if (!--header->parent->count) { 1213 WARN_ON(1); 1214 kfree_rcu(header->parent, rcu); 1215 } 1216 if (!--header->count) 1217 kfree_rcu(header, rcu); 1218 } 1219 1220 /** 1221 * unregister_sysctl_table - unregister a sysctl table hierarchy 1222 * @header: the header returned from register_sysctl_table 1223 * 1224 * Unregisters the sysctl table and all children. proc entries may not 1225 * actually be removed until they are no longer used by anyone. 1226 */ 1227 void unregister_sysctl_table(struct ctl_table_header * header) 1228 { 1229 int nr_subheaders; 1230 might_sleep(); 1231 1232 if (header == NULL) 1233 return; 1234 1235 nr_subheaders = count_subheaders(header->ctl_table_arg); 1236 if (unlikely(nr_subheaders > 1)) { 1237 struct ctl_table_header **subheaders; 1238 int i; 1239 1240 subheaders = (struct ctl_table_header **)(header + 1); 1241 for (i = nr_subheaders -1; i >= 0; i--) { 1242 struct ctl_table_header *subh = subheaders[i]; 1243 struct ctl_table *table = subh->ctl_table_arg; 1244 unregister_sysctl_table(subh); 1245 kfree(table); 1246 } 1247 kfree(header); 1248 return; 1249 } 1250 1251 spin_lock(&sysctl_lock); 1252 drop_sysctl_table(header); 1253 spin_unlock(&sysctl_lock); 1254 } 1255 EXPORT_SYMBOL(unregister_sysctl_table); 1256 1257 void setup_sysctl_set(struct ctl_table_set *p, 1258 int (*is_seen)(struct ctl_table_set *)) 1259 { 1260 INIT_LIST_HEAD(&p->list); 1261 p->is_seen = is_seen; 1262 } 1263 1264 void retire_sysctl_set(struct ctl_table_set *set) 1265 { 1266 WARN_ON(!list_empty(&set->list)); 1267 } 1268 1269 int __init proc_sys_init(void) 1270 { 1271 struct proc_dir_entry *proc_sys_root; 1272 1273 proc_sys_root = proc_mkdir("sys", NULL); 1274 proc_sys_root->proc_iops = &proc_sys_dir_operations; 1275 proc_sys_root->proc_fops = &proc_sys_dir_file_operations; 1276 proc_sys_root->nlink = 0; 1277 1278 return sysctl_init(); 1279 } 1280