1 /* 2 * /proc/sys support 3 */ 4 #include <linux/init.h> 5 #include <linux/sysctl.h> 6 #include <linux/poll.h> 7 #include <linux/proc_fs.h> 8 #include <linux/security.h> 9 #include <linux/namei.h> 10 #include <linux/module.h> 11 #include "internal.h" 12 13 static const struct dentry_operations proc_sys_dentry_operations; 14 static const struct file_operations proc_sys_file_operations; 15 static const struct inode_operations proc_sys_inode_operations; 16 static const struct file_operations proc_sys_dir_file_operations; 17 static const struct inode_operations proc_sys_dir_operations; 18 19 void proc_sys_poll_notify(struct ctl_table_poll *poll) 20 { 21 if (!poll) 22 return; 23 24 atomic_inc(&poll->event); 25 wake_up_interruptible(&poll->wait); 26 } 27 28 static struct ctl_table root_table[1]; 29 static struct ctl_table_root sysctl_table_root; 30 static struct ctl_table_header root_table_header = { 31 {{.count = 1, 32 .ctl_table = root_table, 33 .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, 34 .root = &sysctl_table_root, 35 .set = &sysctl_table_root.default_set, 36 }; 37 static struct ctl_table_root sysctl_table_root = { 38 .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list), 39 .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry), 40 }; 41 42 static DEFINE_SPINLOCK(sysctl_lock); 43 44 /* called under sysctl_lock */ 45 static int use_table(struct ctl_table_header *p) 46 { 47 if (unlikely(p->unregistering)) 48 return 0; 49 p->used++; 50 return 1; 51 } 52 53 /* called under sysctl_lock */ 54 static void unuse_table(struct ctl_table_header *p) 55 { 56 if (!--p->used) 57 if (unlikely(p->unregistering)) 58 complete(p->unregistering); 59 } 60 61 /* called under sysctl_lock, will reacquire if has to wait */ 62 static void start_unregistering(struct ctl_table_header *p) 63 { 64 /* 65 * if p->used is 0, nobody will ever touch that entry again; 66 * we'll eliminate all paths to it before dropping sysctl_lock 67 */ 68 if (unlikely(p->used)) { 69 struct completion wait; 70 init_completion(&wait); 71 p->unregistering = &wait; 72 spin_unlock(&sysctl_lock); 73 wait_for_completion(&wait); 74 spin_lock(&sysctl_lock); 75 } else { 76 /* anything non-NULL; we'll never dereference it */ 77 p->unregistering = ERR_PTR(-EINVAL); 78 } 79 /* 80 * do not remove from the list until nobody holds it; walking the 81 * list in do_sysctl() relies on that. 82 */ 83 list_del_init(&p->ctl_entry); 84 } 85 86 static void sysctl_head_get(struct ctl_table_header *head) 87 { 88 spin_lock(&sysctl_lock); 89 head->count++; 90 spin_unlock(&sysctl_lock); 91 } 92 93 void sysctl_head_put(struct ctl_table_header *head) 94 { 95 spin_lock(&sysctl_lock); 96 if (!--head->count) 97 kfree_rcu(head, rcu); 98 spin_unlock(&sysctl_lock); 99 } 100 101 static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) 102 { 103 if (!head) 104 BUG(); 105 spin_lock(&sysctl_lock); 106 if (!use_table(head)) 107 head = ERR_PTR(-ENOENT); 108 spin_unlock(&sysctl_lock); 109 return head; 110 } 111 112 static void sysctl_head_finish(struct ctl_table_header *head) 113 { 114 if (!head) 115 return; 116 spin_lock(&sysctl_lock); 117 unuse_table(head); 118 spin_unlock(&sysctl_lock); 119 } 120 121 static struct ctl_table_set * 122 lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces) 123 { 124 struct ctl_table_set *set = &root->default_set; 125 if (root->lookup) 126 set = root->lookup(root, namespaces); 127 return set; 128 } 129 130 static struct list_head * 131 lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces) 132 { 133 struct ctl_table_set *set = lookup_header_set(root, namespaces); 134 return &set->list; 135 } 136 137 static struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, 138 struct ctl_table_header *prev) 139 { 140 struct ctl_table_root *root; 141 struct list_head *header_list; 142 struct ctl_table_header *head; 143 struct list_head *tmp; 144 145 spin_lock(&sysctl_lock); 146 if (prev) { 147 head = prev; 148 tmp = &prev->ctl_entry; 149 unuse_table(prev); 150 goto next; 151 } 152 tmp = &root_table_header.ctl_entry; 153 for (;;) { 154 head = list_entry(tmp, struct ctl_table_header, ctl_entry); 155 156 if (!use_table(head)) 157 goto next; 158 spin_unlock(&sysctl_lock); 159 return head; 160 next: 161 root = head->root; 162 tmp = tmp->next; 163 header_list = lookup_header_list(root, namespaces); 164 if (tmp != header_list) 165 continue; 166 167 do { 168 root = list_entry(root->root_list.next, 169 struct ctl_table_root, root_list); 170 if (root == &sysctl_table_root) 171 goto out; 172 header_list = lookup_header_list(root, namespaces); 173 } while (list_empty(header_list)); 174 tmp = header_list->next; 175 } 176 out: 177 spin_unlock(&sysctl_lock); 178 return NULL; 179 } 180 181 static struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev) 182 { 183 return __sysctl_head_next(current->nsproxy, prev); 184 } 185 186 void register_sysctl_root(struct ctl_table_root *root) 187 { 188 spin_lock(&sysctl_lock); 189 list_add_tail(&root->root_list, &sysctl_table_root.root_list); 190 spin_unlock(&sysctl_lock); 191 } 192 193 /* 194 * sysctl_perm does NOT grant the superuser all rights automatically, because 195 * some sysctl variables are readonly even to root. 196 */ 197 198 static int test_perm(int mode, int op) 199 { 200 if (!current_euid()) 201 mode >>= 6; 202 else if (in_egroup_p(0)) 203 mode >>= 3; 204 if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0) 205 return 0; 206 return -EACCES; 207 } 208 209 static int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) 210 { 211 int mode; 212 213 if (root->permissions) 214 mode = root->permissions(root, current->nsproxy, table); 215 else 216 mode = table->mode; 217 218 return test_perm(mode, op); 219 } 220 221 static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table) 222 { 223 for (; table->procname; table++) { 224 table->parent = parent; 225 if (table->child) 226 sysctl_set_parent(table, table->child); 227 } 228 } 229 230 231 static struct inode *proc_sys_make_inode(struct super_block *sb, 232 struct ctl_table_header *head, struct ctl_table *table) 233 { 234 struct inode *inode; 235 struct proc_inode *ei; 236 237 inode = new_inode(sb); 238 if (!inode) 239 goto out; 240 241 inode->i_ino = get_next_ino(); 242 243 sysctl_head_get(head); 244 ei = PROC_I(inode); 245 ei->sysctl = head; 246 ei->sysctl_entry = table; 247 248 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 249 inode->i_mode = table->mode; 250 if (!table->child) { 251 inode->i_mode |= S_IFREG; 252 inode->i_op = &proc_sys_inode_operations; 253 inode->i_fop = &proc_sys_file_operations; 254 } else { 255 inode->i_mode |= S_IFDIR; 256 inode->i_op = &proc_sys_dir_operations; 257 inode->i_fop = &proc_sys_dir_file_operations; 258 } 259 out: 260 return inode; 261 } 262 263 static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name) 264 { 265 for ( ; p->procname; p++) { 266 if (strlen(p->procname) != name->len) 267 continue; 268 269 if (memcmp(p->procname, name->name, name->len) != 0) 270 continue; 271 272 /* I have a match */ 273 return p; 274 } 275 return NULL; 276 } 277 278 static struct ctl_table_header *grab_header(struct inode *inode) 279 { 280 if (PROC_I(inode)->sysctl) 281 return sysctl_head_grab(PROC_I(inode)->sysctl); 282 else 283 return sysctl_head_next(NULL); 284 } 285 286 static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, 287 struct nameidata *nd) 288 { 289 struct ctl_table_header *head = grab_header(dir); 290 struct ctl_table *table = PROC_I(dir)->sysctl_entry; 291 struct ctl_table_header *h = NULL; 292 struct qstr *name = &dentry->d_name; 293 struct ctl_table *p; 294 struct inode *inode; 295 struct dentry *err = ERR_PTR(-ENOENT); 296 297 if (IS_ERR(head)) 298 return ERR_CAST(head); 299 300 if (table && !table->child) { 301 WARN_ON(1); 302 goto out; 303 } 304 305 table = table ? table->child : head->ctl_table; 306 307 p = find_in_table(table, name); 308 if (!p) { 309 for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) { 310 if (h->attached_to != table) 311 continue; 312 p = find_in_table(h->attached_by, name); 313 if (p) 314 break; 315 } 316 } 317 318 if (!p) 319 goto out; 320 321 err = ERR_PTR(-ENOMEM); 322 inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); 323 if (h) 324 sysctl_head_finish(h); 325 326 if (!inode) 327 goto out; 328 329 err = NULL; 330 d_set_d_op(dentry, &proc_sys_dentry_operations); 331 d_add(dentry, inode); 332 333 out: 334 sysctl_head_finish(head); 335 return err; 336 } 337 338 static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, 339 size_t count, loff_t *ppos, int write) 340 { 341 struct inode *inode = filp->f_path.dentry->d_inode; 342 struct ctl_table_header *head = grab_header(inode); 343 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 344 ssize_t error; 345 size_t res; 346 347 if (IS_ERR(head)) 348 return PTR_ERR(head); 349 350 /* 351 * At this point we know that the sysctl was not unregistered 352 * and won't be until we finish. 353 */ 354 error = -EPERM; 355 if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ)) 356 goto out; 357 358 /* if that can happen at all, it should be -EINVAL, not -EISDIR */ 359 error = -EINVAL; 360 if (!table->proc_handler) 361 goto out; 362 363 /* careful: calling conventions are nasty here */ 364 res = count; 365 error = table->proc_handler(table, write, buf, &res, ppos); 366 if (!error) 367 error = res; 368 out: 369 sysctl_head_finish(head); 370 371 return error; 372 } 373 374 static ssize_t proc_sys_read(struct file *filp, char __user *buf, 375 size_t count, loff_t *ppos) 376 { 377 return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 0); 378 } 379 380 static ssize_t proc_sys_write(struct file *filp, const char __user *buf, 381 size_t count, loff_t *ppos) 382 { 383 return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1); 384 } 385 386 static int proc_sys_open(struct inode *inode, struct file *filp) 387 { 388 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 389 390 if (table->poll) 391 filp->private_data = proc_sys_poll_event(table->poll); 392 393 return 0; 394 } 395 396 static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) 397 { 398 struct inode *inode = filp->f_path.dentry->d_inode; 399 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 400 unsigned long event = (unsigned long)filp->private_data; 401 unsigned int ret = DEFAULT_POLLMASK; 402 403 if (!table->proc_handler) 404 goto out; 405 406 if (!table->poll) 407 goto out; 408 409 poll_wait(filp, &table->poll->wait, wait); 410 411 if (event != atomic_read(&table->poll->event)) { 412 filp->private_data = proc_sys_poll_event(table->poll); 413 ret = POLLIN | POLLRDNORM | POLLERR | POLLPRI; 414 } 415 416 out: 417 return ret; 418 } 419 420 static int proc_sys_fill_cache(struct file *filp, void *dirent, 421 filldir_t filldir, 422 struct ctl_table_header *head, 423 struct ctl_table *table) 424 { 425 struct dentry *child, *dir = filp->f_path.dentry; 426 struct inode *inode; 427 struct qstr qname; 428 ino_t ino = 0; 429 unsigned type = DT_UNKNOWN; 430 431 qname.name = table->procname; 432 qname.len = strlen(table->procname); 433 qname.hash = full_name_hash(qname.name, qname.len); 434 435 child = d_lookup(dir, &qname); 436 if (!child) { 437 child = d_alloc(dir, &qname); 438 if (child) { 439 inode = proc_sys_make_inode(dir->d_sb, head, table); 440 if (!inode) { 441 dput(child); 442 return -ENOMEM; 443 } else { 444 d_set_d_op(child, &proc_sys_dentry_operations); 445 d_add(child, inode); 446 } 447 } else { 448 return -ENOMEM; 449 } 450 } 451 inode = child->d_inode; 452 ino = inode->i_ino; 453 type = inode->i_mode >> 12; 454 dput(child); 455 return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); 456 } 457 458 static int scan(struct ctl_table_header *head, ctl_table *table, 459 unsigned long *pos, struct file *file, 460 void *dirent, filldir_t filldir) 461 { 462 463 for (; table->procname; table++, (*pos)++) { 464 int res; 465 466 if (*pos < file->f_pos) 467 continue; 468 469 res = proc_sys_fill_cache(file, dirent, filldir, head, table); 470 if (res) 471 return res; 472 473 file->f_pos = *pos + 1; 474 } 475 return 0; 476 } 477 478 static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) 479 { 480 struct dentry *dentry = filp->f_path.dentry; 481 struct inode *inode = dentry->d_inode; 482 struct ctl_table_header *head = grab_header(inode); 483 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 484 struct ctl_table_header *h = NULL; 485 unsigned long pos; 486 int ret = -EINVAL; 487 488 if (IS_ERR(head)) 489 return PTR_ERR(head); 490 491 if (table && !table->child) { 492 WARN_ON(1); 493 goto out; 494 } 495 496 table = table ? table->child : head->ctl_table; 497 498 ret = 0; 499 /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ 500 if (filp->f_pos == 0) { 501 if (filldir(dirent, ".", 1, filp->f_pos, 502 inode->i_ino, DT_DIR) < 0) 503 goto out; 504 filp->f_pos++; 505 } 506 if (filp->f_pos == 1) { 507 if (filldir(dirent, "..", 2, filp->f_pos, 508 parent_ino(dentry), DT_DIR) < 0) 509 goto out; 510 filp->f_pos++; 511 } 512 pos = 2; 513 514 ret = scan(head, table, &pos, filp, dirent, filldir); 515 if (ret) 516 goto out; 517 518 for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) { 519 if (h->attached_to != table) 520 continue; 521 ret = scan(h, h->attached_by, &pos, filp, dirent, filldir); 522 if (ret) { 523 sysctl_head_finish(h); 524 break; 525 } 526 } 527 ret = 1; 528 out: 529 sysctl_head_finish(head); 530 return ret; 531 } 532 533 static int proc_sys_permission(struct inode *inode, int mask) 534 { 535 /* 536 * sysctl entries that are not writeable, 537 * are _NOT_ writeable, capabilities or not. 538 */ 539 struct ctl_table_header *head; 540 struct ctl_table *table; 541 int error; 542 543 /* Executable files are not allowed under /proc/sys/ */ 544 if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) 545 return -EACCES; 546 547 head = grab_header(inode); 548 if (IS_ERR(head)) 549 return PTR_ERR(head); 550 551 table = PROC_I(inode)->sysctl_entry; 552 if (!table) /* global root - r-xr-xr-x */ 553 error = mask & MAY_WRITE ? -EACCES : 0; 554 else /* Use the permissions on the sysctl table entry */ 555 error = sysctl_perm(head->root, table, mask & ~MAY_NOT_BLOCK); 556 557 sysctl_head_finish(head); 558 return error; 559 } 560 561 static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr) 562 { 563 struct inode *inode = dentry->d_inode; 564 int error; 565 566 if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)) 567 return -EPERM; 568 569 error = inode_change_ok(inode, attr); 570 if (error) 571 return error; 572 573 if ((attr->ia_valid & ATTR_SIZE) && 574 attr->ia_size != i_size_read(inode)) { 575 error = vmtruncate(inode, attr->ia_size); 576 if (error) 577 return error; 578 } 579 580 setattr_copy(inode, attr); 581 mark_inode_dirty(inode); 582 return 0; 583 } 584 585 static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 586 { 587 struct inode *inode = dentry->d_inode; 588 struct ctl_table_header *head = grab_header(inode); 589 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 590 591 if (IS_ERR(head)) 592 return PTR_ERR(head); 593 594 generic_fillattr(inode, stat); 595 if (table) 596 stat->mode = (stat->mode & S_IFMT) | table->mode; 597 598 sysctl_head_finish(head); 599 return 0; 600 } 601 602 static const struct file_operations proc_sys_file_operations = { 603 .open = proc_sys_open, 604 .poll = proc_sys_poll, 605 .read = proc_sys_read, 606 .write = proc_sys_write, 607 .llseek = default_llseek, 608 }; 609 610 static const struct file_operations proc_sys_dir_file_operations = { 611 .read = generic_read_dir, 612 .readdir = proc_sys_readdir, 613 .llseek = generic_file_llseek, 614 }; 615 616 static const struct inode_operations proc_sys_inode_operations = { 617 .permission = proc_sys_permission, 618 .setattr = proc_sys_setattr, 619 .getattr = proc_sys_getattr, 620 }; 621 622 static const struct inode_operations proc_sys_dir_operations = { 623 .lookup = proc_sys_lookup, 624 .permission = proc_sys_permission, 625 .setattr = proc_sys_setattr, 626 .getattr = proc_sys_getattr, 627 }; 628 629 static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd) 630 { 631 if (nd->flags & LOOKUP_RCU) 632 return -ECHILD; 633 return !PROC_I(dentry->d_inode)->sysctl->unregistering; 634 } 635 636 static int proc_sys_delete(const struct dentry *dentry) 637 { 638 return !!PROC_I(dentry->d_inode)->sysctl->unregistering; 639 } 640 641 static int sysctl_is_seen(struct ctl_table_header *p) 642 { 643 struct ctl_table_set *set = p->set; 644 int res; 645 spin_lock(&sysctl_lock); 646 if (p->unregistering) 647 res = 0; 648 else if (!set->is_seen) 649 res = 1; 650 else 651 res = set->is_seen(set); 652 spin_unlock(&sysctl_lock); 653 return res; 654 } 655 656 static int proc_sys_compare(const struct dentry *parent, 657 const struct inode *pinode, 658 const struct dentry *dentry, const struct inode *inode, 659 unsigned int len, const char *str, const struct qstr *name) 660 { 661 struct ctl_table_header *head; 662 /* Although proc doesn't have negative dentries, rcu-walk means 663 * that inode here can be NULL */ 664 /* AV: can it, indeed? */ 665 if (!inode) 666 return 1; 667 if (name->len != len) 668 return 1; 669 if (memcmp(name->name, str, len)) 670 return 1; 671 head = rcu_dereference(PROC_I(inode)->sysctl); 672 return !head || !sysctl_is_seen(head); 673 } 674 675 static const struct dentry_operations proc_sys_dentry_operations = { 676 .d_revalidate = proc_sys_revalidate, 677 .d_delete = proc_sys_delete, 678 .d_compare = proc_sys_compare, 679 }; 680 681 static struct ctl_table *is_branch_in(struct ctl_table *branch, 682 struct ctl_table *table) 683 { 684 struct ctl_table *p; 685 const char *s = branch->procname; 686 687 /* branch should have named subdirectory as its first element */ 688 if (!s || !branch->child) 689 return NULL; 690 691 /* ... and nothing else */ 692 if (branch[1].procname) 693 return NULL; 694 695 /* table should contain subdirectory with the same name */ 696 for (p = table; p->procname; p++) { 697 if (!p->child) 698 continue; 699 if (p->procname && strcmp(p->procname, s) == 0) 700 return p; 701 } 702 return NULL; 703 } 704 705 /* see if attaching q to p would be an improvement */ 706 static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) 707 { 708 struct ctl_table *to = p->ctl_table, *by = q->ctl_table; 709 struct ctl_table *next; 710 int is_better = 0; 711 int not_in_parent = !p->attached_by; 712 713 while ((next = is_branch_in(by, to)) != NULL) { 714 if (by == q->attached_by) 715 is_better = 1; 716 if (to == p->attached_by) 717 not_in_parent = 1; 718 by = by->child; 719 to = next->child; 720 } 721 722 if (is_better && not_in_parent) { 723 q->attached_by = by; 724 q->attached_to = to; 725 q->parent = p; 726 } 727 } 728 729 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK 730 static int sysctl_depth(struct ctl_table *table) 731 { 732 struct ctl_table *tmp; 733 int depth; 734 735 depth = 0; 736 for (tmp = table; tmp->parent; tmp = tmp->parent) 737 depth++; 738 739 return depth; 740 } 741 742 static struct ctl_table *sysctl_parent(struct ctl_table *table, int n) 743 { 744 int i; 745 746 for (i = 0; table && i < n; i++) 747 table = table->parent; 748 749 return table; 750 } 751 752 753 static void sysctl_print_path(struct ctl_table *table) 754 { 755 struct ctl_table *tmp; 756 int depth, i; 757 depth = sysctl_depth(table); 758 if (table->procname) { 759 for (i = depth; i >= 0; i--) { 760 tmp = sysctl_parent(table, i); 761 printk("/%s", tmp->procname?tmp->procname:""); 762 } 763 } 764 printk(" "); 765 } 766 767 static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces, 768 struct ctl_table *table) 769 { 770 struct ctl_table_header *head; 771 struct ctl_table *ref, *test; 772 int depth, cur_depth; 773 774 depth = sysctl_depth(table); 775 776 for (head = __sysctl_head_next(namespaces, NULL); head; 777 head = __sysctl_head_next(namespaces, head)) { 778 cur_depth = depth; 779 ref = head->ctl_table; 780 repeat: 781 test = sysctl_parent(table, cur_depth); 782 for (; ref->procname; ref++) { 783 int match = 0; 784 if (cur_depth && !ref->child) 785 continue; 786 787 if (test->procname && ref->procname && 788 (strcmp(test->procname, ref->procname) == 0)) 789 match++; 790 791 if (match) { 792 if (cur_depth != 0) { 793 cur_depth--; 794 ref = ref->child; 795 goto repeat; 796 } 797 goto out; 798 } 799 } 800 } 801 ref = NULL; 802 out: 803 sysctl_head_finish(head); 804 return ref; 805 } 806 807 static void set_fail(const char **fail, struct ctl_table *table, const char *str) 808 { 809 if (*fail) { 810 printk(KERN_ERR "sysctl table check failed: "); 811 sysctl_print_path(table); 812 printk(" %s\n", *fail); 813 dump_stack(); 814 } 815 *fail = str; 816 } 817 818 static void sysctl_check_leaf(struct nsproxy *namespaces, 819 struct ctl_table *table, const char **fail) 820 { 821 struct ctl_table *ref; 822 823 ref = sysctl_check_lookup(namespaces, table); 824 if (ref && (ref != table)) 825 set_fail(fail, table, "Sysctl already exists"); 826 } 827 828 static int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table) 829 { 830 int error = 0; 831 for (; table->procname; table++) { 832 const char *fail = NULL; 833 834 if (table->parent) { 835 if (!table->parent->procname) 836 set_fail(&fail, table, "Parent without procname"); 837 } 838 if (table->child) { 839 if (table->data) 840 set_fail(&fail, table, "Directory with data?"); 841 if (table->maxlen) 842 set_fail(&fail, table, "Directory with maxlen?"); 843 if ((table->mode & (S_IRUGO|S_IXUGO)) != table->mode) 844 set_fail(&fail, table, "Writable sysctl directory"); 845 if (table->proc_handler) 846 set_fail(&fail, table, "Directory with proc_handler"); 847 if (table->extra1) 848 set_fail(&fail, table, "Directory with extra1"); 849 if (table->extra2) 850 set_fail(&fail, table, "Directory with extra2"); 851 } else { 852 if ((table->proc_handler == proc_dostring) || 853 (table->proc_handler == proc_dointvec) || 854 (table->proc_handler == proc_dointvec_minmax) || 855 (table->proc_handler == proc_dointvec_jiffies) || 856 (table->proc_handler == proc_dointvec_userhz_jiffies) || 857 (table->proc_handler == proc_dointvec_ms_jiffies) || 858 (table->proc_handler == proc_doulongvec_minmax) || 859 (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) { 860 if (!table->data) 861 set_fail(&fail, table, "No data"); 862 if (!table->maxlen) 863 set_fail(&fail, table, "No maxlen"); 864 } 865 #ifdef CONFIG_PROC_SYSCTL 866 if (!table->proc_handler) 867 set_fail(&fail, table, "No proc_handler"); 868 #endif 869 sysctl_check_leaf(namespaces, table, &fail); 870 } 871 if (table->mode > 0777) 872 set_fail(&fail, table, "bogus .mode"); 873 if (fail) { 874 set_fail(&fail, table, NULL); 875 error = -EINVAL; 876 } 877 if (table->child) 878 error |= sysctl_check_table(namespaces, table->child); 879 } 880 return error; 881 } 882 #endif /* CONFIG_SYSCTL_SYSCALL_CHECK */ 883 884 /** 885 * __register_sysctl_paths - register a sysctl hierarchy 886 * @root: List of sysctl headers to register on 887 * @namespaces: Data to compute which lists of sysctl entries are visible 888 * @path: The path to the directory the sysctl table is in. 889 * @table: the top-level table structure 890 * 891 * Register a sysctl table hierarchy. @table should be a filled in ctl_table 892 * array. A completely 0 filled entry terminates the table. 893 * 894 * The members of the &struct ctl_table structure are used as follows: 895 * 896 * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not 897 * enter a sysctl file 898 * 899 * data - a pointer to data for use by proc_handler 900 * 901 * maxlen - the maximum size in bytes of the data 902 * 903 * mode - the file permissions for the /proc/sys file, and for sysctl(2) 904 * 905 * child - a pointer to the child sysctl table if this entry is a directory, or 906 * %NULL. 907 * 908 * proc_handler - the text handler routine (described below) 909 * 910 * de - for internal use by the sysctl routines 911 * 912 * extra1, extra2 - extra pointers usable by the proc handler routines 913 * 914 * Leaf nodes in the sysctl tree will be represented by a single file 915 * under /proc; non-leaf nodes will be represented by directories. 916 * 917 * sysctl(2) can automatically manage read and write requests through 918 * the sysctl table. The data and maxlen fields of the ctl_table 919 * struct enable minimal validation of the values being written to be 920 * performed, and the mode field allows minimal authentication. 921 * 922 * There must be a proc_handler routine for any terminal nodes 923 * mirrored under /proc/sys (non-terminals are handled by a built-in 924 * directory handler). Several default handlers are available to 925 * cover common cases - 926 * 927 * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), 928 * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), 929 * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax() 930 * 931 * It is the handler's job to read the input buffer from user memory 932 * and process it. The handler should return 0 on success. 933 * 934 * This routine returns %NULL on a failure to register, and a pointer 935 * to the table header on success. 936 */ 937 struct ctl_table_header *__register_sysctl_paths( 938 struct ctl_table_root *root, 939 struct nsproxy *namespaces, 940 const struct ctl_path *path, struct ctl_table *table) 941 { 942 struct ctl_table_header *header; 943 struct ctl_table *new, **prevp; 944 unsigned int n, npath; 945 struct ctl_table_set *set; 946 947 /* Count the path components */ 948 for (npath = 0; path[npath].procname; ++npath) 949 ; 950 951 /* 952 * For each path component, allocate a 2-element ctl_table array. 953 * The first array element will be filled with the sysctl entry 954 * for this, the second will be the sentinel (procname == 0). 955 * 956 * We allocate everything in one go so that we don't have to 957 * worry about freeing additional memory in unregister_sysctl_table. 958 */ 959 header = kzalloc(sizeof(struct ctl_table_header) + 960 (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL); 961 if (!header) 962 return NULL; 963 964 new = (struct ctl_table *) (header + 1); 965 966 /* Now connect the dots */ 967 prevp = &header->ctl_table; 968 for (n = 0; n < npath; ++n, ++path) { 969 /* Copy the procname */ 970 new->procname = path->procname; 971 new->mode = 0555; 972 973 *prevp = new; 974 prevp = &new->child; 975 976 new += 2; 977 } 978 *prevp = table; 979 header->ctl_table_arg = table; 980 981 INIT_LIST_HEAD(&header->ctl_entry); 982 header->used = 0; 983 header->unregistering = NULL; 984 header->root = root; 985 sysctl_set_parent(NULL, header->ctl_table); 986 header->count = 1; 987 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK 988 if (sysctl_check_table(namespaces, header->ctl_table)) { 989 kfree(header); 990 return NULL; 991 } 992 #endif 993 spin_lock(&sysctl_lock); 994 header->set = lookup_header_set(root, namespaces); 995 header->attached_by = header->ctl_table; 996 header->attached_to = root_table; 997 header->parent = &root_table_header; 998 for (set = header->set; set; set = set->parent) { 999 struct ctl_table_header *p; 1000 list_for_each_entry(p, &set->list, ctl_entry) { 1001 if (p->unregistering) 1002 continue; 1003 try_attach(p, header); 1004 } 1005 } 1006 header->parent->count++; 1007 list_add_tail(&header->ctl_entry, &header->set->list); 1008 spin_unlock(&sysctl_lock); 1009 1010 return header; 1011 } 1012 1013 /** 1014 * register_sysctl_table_path - register a sysctl table hierarchy 1015 * @path: The path to the directory the sysctl table is in. 1016 * @table: the top-level table structure 1017 * 1018 * Register a sysctl table hierarchy. @table should be a filled in ctl_table 1019 * array. A completely 0 filled entry terminates the table. 1020 * 1021 * See __register_sysctl_paths for more details. 1022 */ 1023 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, 1024 struct ctl_table *table) 1025 { 1026 return __register_sysctl_paths(&sysctl_table_root, current->nsproxy, 1027 path, table); 1028 } 1029 EXPORT_SYMBOL(register_sysctl_paths); 1030 1031 /** 1032 * register_sysctl_table - register a sysctl table hierarchy 1033 * @table: the top-level table structure 1034 * 1035 * Register a sysctl table hierarchy. @table should be a filled in ctl_table 1036 * array. A completely 0 filled entry terminates the table. 1037 * 1038 * See register_sysctl_paths for more details. 1039 */ 1040 struct ctl_table_header *register_sysctl_table(struct ctl_table *table) 1041 { 1042 static const struct ctl_path null_path[] = { {} }; 1043 1044 return register_sysctl_paths(null_path, table); 1045 } 1046 EXPORT_SYMBOL(register_sysctl_table); 1047 1048 /** 1049 * unregister_sysctl_table - unregister a sysctl table hierarchy 1050 * @header: the header returned from register_sysctl_table 1051 * 1052 * Unregisters the sysctl table and all children. proc entries may not 1053 * actually be removed until they are no longer used by anyone. 1054 */ 1055 void unregister_sysctl_table(struct ctl_table_header * header) 1056 { 1057 might_sleep(); 1058 1059 if (header == NULL) 1060 return; 1061 1062 spin_lock(&sysctl_lock); 1063 start_unregistering(header); 1064 if (!--header->parent->count) { 1065 WARN_ON(1); 1066 kfree_rcu(header->parent, rcu); 1067 } 1068 if (!--header->count) 1069 kfree_rcu(header, rcu); 1070 spin_unlock(&sysctl_lock); 1071 } 1072 EXPORT_SYMBOL(unregister_sysctl_table); 1073 1074 void setup_sysctl_set(struct ctl_table_set *p, 1075 struct ctl_table_set *parent, 1076 int (*is_seen)(struct ctl_table_set *)) 1077 { 1078 INIT_LIST_HEAD(&p->list); 1079 p->parent = parent ? parent : &sysctl_table_root.default_set; 1080 p->is_seen = is_seen; 1081 } 1082 1083 1084 int __init proc_sys_init(void) 1085 { 1086 struct proc_dir_entry *proc_sys_root; 1087 1088 proc_sys_root = proc_mkdir("sys", NULL); 1089 proc_sys_root->proc_iops = &proc_sys_dir_operations; 1090 proc_sys_root->proc_fops = &proc_sys_dir_file_operations; 1091 proc_sys_root->nlink = 0; 1092 1093 return sysctl_init(); 1094 } 1095