1 /* 2 * linux/fs/namespace.c 3 * 4 * (C) Copyright Al Viro 2000, 2001 5 * Released under GPL v2. 6 * 7 * Based on code from fs/super.c, copyright Linus Torvalds and others. 8 * Heavily rewritten. 9 */ 10 11 #include <linux/config.h> 12 #include <linux/syscalls.h> 13 #include <linux/slab.h> 14 #include <linux/sched.h> 15 #include <linux/smp_lock.h> 16 #include <linux/init.h> 17 #include <linux/quotaops.h> 18 #include <linux/acct.h> 19 #include <linux/module.h> 20 #include <linux/seq_file.h> 21 #include <linux/namespace.h> 22 #include <linux/namei.h> 23 #include <linux/security.h> 24 #include <linux/mount.h> 25 #include <asm/uaccess.h> 26 #include <asm/unistd.h> 27 28 extern int __init init_rootfs(void); 29 30 #ifdef CONFIG_SYSFS 31 extern int __init sysfs_init(void); 32 #else 33 static inline int sysfs_init(void) 34 { 35 return 0; 36 } 37 #endif 38 39 /* spinlock for vfsmount related operations, inplace of dcache_lock */ 40 __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); 41 42 static struct list_head *mount_hashtable; 43 static int hash_mask __read_mostly, hash_bits __read_mostly; 44 static kmem_cache_t *mnt_cache; 45 46 static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) 47 { 48 unsigned long tmp = ((unsigned long) mnt / L1_CACHE_BYTES); 49 tmp += ((unsigned long) dentry / L1_CACHE_BYTES); 50 tmp = tmp + (tmp >> hash_bits); 51 return tmp & hash_mask; 52 } 53 54 struct vfsmount *alloc_vfsmnt(const char *name) 55 { 56 struct vfsmount *mnt = kmem_cache_alloc(mnt_cache, GFP_KERNEL); 57 if (mnt) { 58 memset(mnt, 0, sizeof(struct vfsmount)); 59 atomic_set(&mnt->mnt_count,1); 60 INIT_LIST_HEAD(&mnt->mnt_hash); 61 INIT_LIST_HEAD(&mnt->mnt_child); 62 INIT_LIST_HEAD(&mnt->mnt_mounts); 63 INIT_LIST_HEAD(&mnt->mnt_list); 64 INIT_LIST_HEAD(&mnt->mnt_expire); 65 if (name) { 66 int size = strlen(name)+1; 67 char *newname = kmalloc(size, GFP_KERNEL); 68 if (newname) { 69 memcpy(newname, name, size); 70 mnt->mnt_devname = newname; 71 } 72 } 73 } 74 return mnt; 75 } 76 77 void free_vfsmnt(struct vfsmount *mnt) 78 { 79 kfree(mnt->mnt_devname); 80 kmem_cache_free(mnt_cache, mnt); 81 } 82 83 /* 84 * Now, lookup_mnt increments the ref count before returning 85 * the vfsmount struct. 86 */ 87 struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) 88 { 89 struct list_head * head = mount_hashtable + hash(mnt, dentry); 90 struct list_head * tmp = head; 91 struct vfsmount *p, *found = NULL; 92 93 spin_lock(&vfsmount_lock); 94 for (;;) { 95 tmp = tmp->next; 96 p = NULL; 97 if (tmp == head) 98 break; 99 p = list_entry(tmp, struct vfsmount, mnt_hash); 100 if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry) { 101 found = mntget(p); 102 break; 103 } 104 } 105 spin_unlock(&vfsmount_lock); 106 return found; 107 } 108 109 static inline int check_mnt(struct vfsmount *mnt) 110 { 111 return mnt->mnt_namespace == current->namespace; 112 } 113 114 static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd) 115 { 116 old_nd->dentry = mnt->mnt_mountpoint; 117 old_nd->mnt = mnt->mnt_parent; 118 mnt->mnt_parent = mnt; 119 mnt->mnt_mountpoint = mnt->mnt_root; 120 list_del_init(&mnt->mnt_child); 121 list_del_init(&mnt->mnt_hash); 122 old_nd->dentry->d_mounted--; 123 } 124 125 static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd) 126 { 127 mnt->mnt_parent = mntget(nd->mnt); 128 mnt->mnt_mountpoint = dget(nd->dentry); 129 list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry)); 130 list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts); 131 nd->dentry->d_mounted++; 132 } 133 134 static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root) 135 { 136 struct list_head *next = p->mnt_mounts.next; 137 if (next == &p->mnt_mounts) { 138 while (1) { 139 if (p == root) 140 return NULL; 141 next = p->mnt_child.next; 142 if (next != &p->mnt_parent->mnt_mounts) 143 break; 144 p = p->mnt_parent; 145 } 146 } 147 return list_entry(next, struct vfsmount, mnt_child); 148 } 149 150 static struct vfsmount * 151 clone_mnt(struct vfsmount *old, struct dentry *root) 152 { 153 struct super_block *sb = old->mnt_sb; 154 struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname); 155 156 if (mnt) { 157 mnt->mnt_flags = old->mnt_flags; 158 atomic_inc(&sb->s_active); 159 mnt->mnt_sb = sb; 160 mnt->mnt_root = dget(root); 161 mnt->mnt_mountpoint = mnt->mnt_root; 162 mnt->mnt_parent = mnt; 163 mnt->mnt_namespace = current->namespace; 164 165 /* stick the duplicate mount on the same expiry list 166 * as the original if that was on one */ 167 spin_lock(&vfsmount_lock); 168 if (!list_empty(&old->mnt_expire)) 169 list_add(&mnt->mnt_expire, &old->mnt_expire); 170 spin_unlock(&vfsmount_lock); 171 } 172 return mnt; 173 } 174 175 void __mntput(struct vfsmount *mnt) 176 { 177 struct super_block *sb = mnt->mnt_sb; 178 dput(mnt->mnt_root); 179 free_vfsmnt(mnt); 180 deactivate_super(sb); 181 } 182 183 EXPORT_SYMBOL(__mntput); 184 185 /* iterator */ 186 static void *m_start(struct seq_file *m, loff_t *pos) 187 { 188 struct namespace *n = m->private; 189 struct list_head *p; 190 loff_t l = *pos; 191 192 down_read(&n->sem); 193 list_for_each(p, &n->list) 194 if (!l--) 195 return list_entry(p, struct vfsmount, mnt_list); 196 return NULL; 197 } 198 199 static void *m_next(struct seq_file *m, void *v, loff_t *pos) 200 { 201 struct namespace *n = m->private; 202 struct list_head *p = ((struct vfsmount *)v)->mnt_list.next; 203 (*pos)++; 204 return p==&n->list ? NULL : list_entry(p, struct vfsmount, mnt_list); 205 } 206 207 static void m_stop(struct seq_file *m, void *v) 208 { 209 struct namespace *n = m->private; 210 up_read(&n->sem); 211 } 212 213 static inline void mangle(struct seq_file *m, const char *s) 214 { 215 seq_escape(m, s, " \t\n\\"); 216 } 217 218 static int show_vfsmnt(struct seq_file *m, void *v) 219 { 220 struct vfsmount *mnt = v; 221 int err = 0; 222 static struct proc_fs_info { 223 int flag; 224 char *str; 225 } fs_info[] = { 226 { MS_SYNCHRONOUS, ",sync" }, 227 { MS_DIRSYNC, ",dirsync" }, 228 { MS_MANDLOCK, ",mand" }, 229 { MS_NOATIME, ",noatime" }, 230 { MS_NODIRATIME, ",nodiratime" }, 231 { 0, NULL } 232 }; 233 static struct proc_fs_info mnt_info[] = { 234 { MNT_NOSUID, ",nosuid" }, 235 { MNT_NODEV, ",nodev" }, 236 { MNT_NOEXEC, ",noexec" }, 237 { 0, NULL } 238 }; 239 struct proc_fs_info *fs_infop; 240 241 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); 242 seq_putc(m, ' '); 243 seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); 244 seq_putc(m, ' '); 245 mangle(m, mnt->mnt_sb->s_type->name); 246 seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw"); 247 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) { 248 if (mnt->mnt_sb->s_flags & fs_infop->flag) 249 seq_puts(m, fs_infop->str); 250 } 251 for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) { 252 if (mnt->mnt_flags & fs_infop->flag) 253 seq_puts(m, fs_infop->str); 254 } 255 if (mnt->mnt_sb->s_op->show_options) 256 err = mnt->mnt_sb->s_op->show_options(m, mnt); 257 seq_puts(m, " 0 0\n"); 258 return err; 259 } 260 261 struct seq_operations mounts_op = { 262 .start = m_start, 263 .next = m_next, 264 .stop = m_stop, 265 .show = show_vfsmnt 266 }; 267 268 /** 269 * may_umount_tree - check if a mount tree is busy 270 * @mnt: root of mount tree 271 * 272 * This is called to check if a tree of mounts has any 273 * open files, pwds, chroots or sub mounts that are 274 * busy. 275 */ 276 int may_umount_tree(struct vfsmount *mnt) 277 { 278 struct list_head *next; 279 struct vfsmount *this_parent = mnt; 280 int actual_refs; 281 int minimum_refs; 282 283 spin_lock(&vfsmount_lock); 284 actual_refs = atomic_read(&mnt->mnt_count); 285 minimum_refs = 2; 286 repeat: 287 next = this_parent->mnt_mounts.next; 288 resume: 289 while (next != &this_parent->mnt_mounts) { 290 struct vfsmount *p = list_entry(next, struct vfsmount, mnt_child); 291 292 next = next->next; 293 294 actual_refs += atomic_read(&p->mnt_count); 295 minimum_refs += 2; 296 297 if (!list_empty(&p->mnt_mounts)) { 298 this_parent = p; 299 goto repeat; 300 } 301 } 302 303 if (this_parent != mnt) { 304 next = this_parent->mnt_child.next; 305 this_parent = this_parent->mnt_parent; 306 goto resume; 307 } 308 spin_unlock(&vfsmount_lock); 309 310 if (actual_refs > minimum_refs) 311 return -EBUSY; 312 313 return 0; 314 } 315 316 EXPORT_SYMBOL(may_umount_tree); 317 318 /** 319 * may_umount - check if a mount point is busy 320 * @mnt: root of mount 321 * 322 * This is called to check if a mount point has any 323 * open files, pwds, chroots or sub mounts. If the 324 * mount has sub mounts this will return busy 325 * regardless of whether the sub mounts are busy. 326 * 327 * Doesn't take quota and stuff into account. IOW, in some cases it will 328 * give false negatives. The main reason why it's here is that we need 329 * a non-destructive way to look for easily umountable filesystems. 330 */ 331 int may_umount(struct vfsmount *mnt) 332 { 333 if (atomic_read(&mnt->mnt_count) > 2) 334 return -EBUSY; 335 return 0; 336 } 337 338 EXPORT_SYMBOL(may_umount); 339 340 static void umount_tree(struct vfsmount *mnt) 341 { 342 struct vfsmount *p; 343 LIST_HEAD(kill); 344 345 for (p = mnt; p; p = next_mnt(p, mnt)) { 346 list_del(&p->mnt_list); 347 list_add(&p->mnt_list, &kill); 348 p->mnt_namespace = NULL; 349 } 350 351 while (!list_empty(&kill)) { 352 mnt = list_entry(kill.next, struct vfsmount, mnt_list); 353 list_del_init(&mnt->mnt_list); 354 list_del_init(&mnt->mnt_expire); 355 if (mnt->mnt_parent == mnt) { 356 spin_unlock(&vfsmount_lock); 357 } else { 358 struct nameidata old_nd; 359 detach_mnt(mnt, &old_nd); 360 spin_unlock(&vfsmount_lock); 361 path_release(&old_nd); 362 } 363 mntput(mnt); 364 spin_lock(&vfsmount_lock); 365 } 366 } 367 368 static int do_umount(struct vfsmount *mnt, int flags) 369 { 370 struct super_block * sb = mnt->mnt_sb; 371 int retval; 372 373 retval = security_sb_umount(mnt, flags); 374 if (retval) 375 return retval; 376 377 /* 378 * Allow userspace to request a mountpoint be expired rather than 379 * unmounting unconditionally. Unmount only happens if: 380 * (1) the mark is already set (the mark is cleared by mntput()) 381 * (2) the usage count == 1 [parent vfsmount] + 1 [sys_umount] 382 */ 383 if (flags & MNT_EXPIRE) { 384 if (mnt == current->fs->rootmnt || 385 flags & (MNT_FORCE | MNT_DETACH)) 386 return -EINVAL; 387 388 if (atomic_read(&mnt->mnt_count) != 2) 389 return -EBUSY; 390 391 if (!xchg(&mnt->mnt_expiry_mark, 1)) 392 return -EAGAIN; 393 } 394 395 /* 396 * If we may have to abort operations to get out of this 397 * mount, and they will themselves hold resources we must 398 * allow the fs to do things. In the Unix tradition of 399 * 'Gee thats tricky lets do it in userspace' the umount_begin 400 * might fail to complete on the first run through as other tasks 401 * must return, and the like. Thats for the mount program to worry 402 * about for the moment. 403 */ 404 405 lock_kernel(); 406 if( (flags&MNT_FORCE) && sb->s_op->umount_begin) 407 sb->s_op->umount_begin(sb); 408 unlock_kernel(); 409 410 /* 411 * No sense to grab the lock for this test, but test itself looks 412 * somewhat bogus. Suggestions for better replacement? 413 * Ho-hum... In principle, we might treat that as umount + switch 414 * to rootfs. GC would eventually take care of the old vfsmount. 415 * Actually it makes sense, especially if rootfs would contain a 416 * /reboot - static binary that would close all descriptors and 417 * call reboot(9). Then init(8) could umount root and exec /reboot. 418 */ 419 if (mnt == current->fs->rootmnt && !(flags & MNT_DETACH)) { 420 /* 421 * Special case for "unmounting" root ... 422 * we just try to remount it readonly. 423 */ 424 down_write(&sb->s_umount); 425 if (!(sb->s_flags & MS_RDONLY)) { 426 lock_kernel(); 427 DQUOT_OFF(sb); 428 retval = do_remount_sb(sb, MS_RDONLY, NULL, 0); 429 unlock_kernel(); 430 } 431 up_write(&sb->s_umount); 432 return retval; 433 } 434 435 down_write(¤t->namespace->sem); 436 spin_lock(&vfsmount_lock); 437 438 if (atomic_read(&sb->s_active) == 1) { 439 /* last instance - try to be smart */ 440 spin_unlock(&vfsmount_lock); 441 lock_kernel(); 442 DQUOT_OFF(sb); 443 acct_auto_close(sb); 444 unlock_kernel(); 445 security_sb_umount_close(mnt); 446 spin_lock(&vfsmount_lock); 447 } 448 retval = -EBUSY; 449 if (atomic_read(&mnt->mnt_count) == 2 || flags & MNT_DETACH) { 450 if (!list_empty(&mnt->mnt_list)) 451 umount_tree(mnt); 452 retval = 0; 453 } 454 spin_unlock(&vfsmount_lock); 455 if (retval) 456 security_sb_umount_busy(mnt); 457 up_write(¤t->namespace->sem); 458 return retval; 459 } 460 461 /* 462 * Now umount can handle mount points as well as block devices. 463 * This is important for filesystems which use unnamed block devices. 464 * 465 * We now support a flag for forced unmount like the other 'big iron' 466 * unixes. Our API is identical to OSF/1 to avoid making a mess of AMD 467 */ 468 469 asmlinkage long sys_umount(char __user * name, int flags) 470 { 471 struct nameidata nd; 472 int retval; 473 474 retval = __user_walk(name, LOOKUP_FOLLOW, &nd); 475 if (retval) 476 goto out; 477 retval = -EINVAL; 478 if (nd.dentry != nd.mnt->mnt_root) 479 goto dput_and_out; 480 if (!check_mnt(nd.mnt)) 481 goto dput_and_out; 482 483 retval = -EPERM; 484 if (!capable(CAP_SYS_ADMIN)) 485 goto dput_and_out; 486 487 retval = do_umount(nd.mnt, flags); 488 dput_and_out: 489 path_release_on_umount(&nd); 490 out: 491 return retval; 492 } 493 494 #ifdef __ARCH_WANT_SYS_OLDUMOUNT 495 496 /* 497 * The 2.0 compatible umount. No flags. 498 */ 499 500 asmlinkage long sys_oldumount(char __user * name) 501 { 502 return sys_umount(name,0); 503 } 504 505 #endif 506 507 static int mount_is_safe(struct nameidata *nd) 508 { 509 if (capable(CAP_SYS_ADMIN)) 510 return 0; 511 return -EPERM; 512 #ifdef notyet 513 if (S_ISLNK(nd->dentry->d_inode->i_mode)) 514 return -EPERM; 515 if (nd->dentry->d_inode->i_mode & S_ISVTX) { 516 if (current->uid != nd->dentry->d_inode->i_uid) 517 return -EPERM; 518 } 519 if (permission(nd->dentry->d_inode, MAY_WRITE, nd)) 520 return -EPERM; 521 return 0; 522 #endif 523 } 524 525 static int 526 lives_below_in_same_fs(struct dentry *d, struct dentry *dentry) 527 { 528 while (1) { 529 if (d == dentry) 530 return 1; 531 if (d == NULL || d == d->d_parent) 532 return 0; 533 d = d->d_parent; 534 } 535 } 536 537 static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry) 538 { 539 struct vfsmount *res, *p, *q, *r, *s; 540 struct nameidata nd; 541 542 res = q = clone_mnt(mnt, dentry); 543 if (!q) 544 goto Enomem; 545 q->mnt_mountpoint = mnt->mnt_mountpoint; 546 547 p = mnt; 548 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) { 549 if (!lives_below_in_same_fs(r->mnt_mountpoint, dentry)) 550 continue; 551 552 for (s = r; s; s = next_mnt(s, r)) { 553 while (p != s->mnt_parent) { 554 p = p->mnt_parent; 555 q = q->mnt_parent; 556 } 557 p = s; 558 nd.mnt = q; 559 nd.dentry = p->mnt_mountpoint; 560 q = clone_mnt(p, p->mnt_root); 561 if (!q) 562 goto Enomem; 563 spin_lock(&vfsmount_lock); 564 list_add_tail(&q->mnt_list, &res->mnt_list); 565 attach_mnt(q, &nd); 566 spin_unlock(&vfsmount_lock); 567 } 568 } 569 return res; 570 Enomem: 571 if (res) { 572 spin_lock(&vfsmount_lock); 573 umount_tree(res); 574 spin_unlock(&vfsmount_lock); 575 } 576 return NULL; 577 } 578 579 static int graft_tree(struct vfsmount *mnt, struct nameidata *nd) 580 { 581 int err; 582 if (mnt->mnt_sb->s_flags & MS_NOUSER) 583 return -EINVAL; 584 585 if (S_ISDIR(nd->dentry->d_inode->i_mode) != 586 S_ISDIR(mnt->mnt_root->d_inode->i_mode)) 587 return -ENOTDIR; 588 589 err = -ENOENT; 590 down(&nd->dentry->d_inode->i_sem); 591 if (IS_DEADDIR(nd->dentry->d_inode)) 592 goto out_unlock; 593 594 err = security_sb_check_sb(mnt, nd); 595 if (err) 596 goto out_unlock; 597 598 err = -ENOENT; 599 spin_lock(&vfsmount_lock); 600 if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry)) { 601 struct list_head head; 602 603 attach_mnt(mnt, nd); 604 list_add_tail(&head, &mnt->mnt_list); 605 list_splice(&head, current->namespace->list.prev); 606 mntget(mnt); 607 err = 0; 608 } 609 spin_unlock(&vfsmount_lock); 610 out_unlock: 611 up(&nd->dentry->d_inode->i_sem); 612 if (!err) 613 security_sb_post_addmount(mnt, nd); 614 return err; 615 } 616 617 /* 618 * do loopback mount. 619 */ 620 static int do_loopback(struct nameidata *nd, char *old_name, int recurse) 621 { 622 struct nameidata old_nd; 623 struct vfsmount *mnt = NULL; 624 int err = mount_is_safe(nd); 625 if (err) 626 return err; 627 if (!old_name || !*old_name) 628 return -EINVAL; 629 err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); 630 if (err) 631 return err; 632 633 down_write(¤t->namespace->sem); 634 err = -EINVAL; 635 if (check_mnt(nd->mnt) && (!recurse || check_mnt(old_nd.mnt))) { 636 err = -ENOMEM; 637 if (recurse) 638 mnt = copy_tree(old_nd.mnt, old_nd.dentry); 639 else 640 mnt = clone_mnt(old_nd.mnt, old_nd.dentry); 641 } 642 643 if (mnt) { 644 /* stop bind mounts from expiring */ 645 spin_lock(&vfsmount_lock); 646 list_del_init(&mnt->mnt_expire); 647 spin_unlock(&vfsmount_lock); 648 649 err = graft_tree(mnt, nd); 650 if (err) { 651 spin_lock(&vfsmount_lock); 652 umount_tree(mnt); 653 spin_unlock(&vfsmount_lock); 654 } else 655 mntput(mnt); 656 } 657 658 up_write(¤t->namespace->sem); 659 path_release(&old_nd); 660 return err; 661 } 662 663 /* 664 * change filesystem flags. dir should be a physical root of filesystem. 665 * If you've mounted a non-root directory somewhere and want to do remount 666 * on it - tough luck. 667 */ 668 669 static int do_remount(struct nameidata *nd, int flags, int mnt_flags, 670 void *data) 671 { 672 int err; 673 struct super_block * sb = nd->mnt->mnt_sb; 674 675 if (!capable(CAP_SYS_ADMIN)) 676 return -EPERM; 677 678 if (!check_mnt(nd->mnt)) 679 return -EINVAL; 680 681 if (nd->dentry != nd->mnt->mnt_root) 682 return -EINVAL; 683 684 down_write(&sb->s_umount); 685 err = do_remount_sb(sb, flags, data, 0); 686 if (!err) 687 nd->mnt->mnt_flags=mnt_flags; 688 up_write(&sb->s_umount); 689 if (!err) 690 security_sb_post_remount(nd->mnt, flags, data); 691 return err; 692 } 693 694 static int do_move_mount(struct nameidata *nd, char *old_name) 695 { 696 struct nameidata old_nd, parent_nd; 697 struct vfsmount *p; 698 int err = 0; 699 if (!capable(CAP_SYS_ADMIN)) 700 return -EPERM; 701 if (!old_name || !*old_name) 702 return -EINVAL; 703 err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); 704 if (err) 705 return err; 706 707 down_write(¤t->namespace->sem); 708 while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry)) 709 ; 710 err = -EINVAL; 711 if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt)) 712 goto out; 713 714 err = -ENOENT; 715 down(&nd->dentry->d_inode->i_sem); 716 if (IS_DEADDIR(nd->dentry->d_inode)) 717 goto out1; 718 719 spin_lock(&vfsmount_lock); 720 if (!IS_ROOT(nd->dentry) && d_unhashed(nd->dentry)) 721 goto out2; 722 723 err = -EINVAL; 724 if (old_nd.dentry != old_nd.mnt->mnt_root) 725 goto out2; 726 727 if (old_nd.mnt == old_nd.mnt->mnt_parent) 728 goto out2; 729 730 if (S_ISDIR(nd->dentry->d_inode->i_mode) != 731 S_ISDIR(old_nd.dentry->d_inode->i_mode)) 732 goto out2; 733 734 err = -ELOOP; 735 for (p = nd->mnt; p->mnt_parent!=p; p = p->mnt_parent) 736 if (p == old_nd.mnt) 737 goto out2; 738 err = 0; 739 740 detach_mnt(old_nd.mnt, &parent_nd); 741 attach_mnt(old_nd.mnt, nd); 742 743 /* if the mount is moved, it should no longer be expire 744 * automatically */ 745 list_del_init(&old_nd.mnt->mnt_expire); 746 out2: 747 spin_unlock(&vfsmount_lock); 748 out1: 749 up(&nd->dentry->d_inode->i_sem); 750 out: 751 up_write(¤t->namespace->sem); 752 if (!err) 753 path_release(&parent_nd); 754 path_release(&old_nd); 755 return err; 756 } 757 758 /* 759 * create a new mount for userspace and request it to be added into the 760 * namespace's tree 761 */ 762 static int do_new_mount(struct nameidata *nd, char *type, int flags, 763 int mnt_flags, char *name, void *data) 764 { 765 struct vfsmount *mnt; 766 767 if (!type || !memchr(type, 0, PAGE_SIZE)) 768 return -EINVAL; 769 770 /* we need capabilities... */ 771 if (!capable(CAP_SYS_ADMIN)) 772 return -EPERM; 773 774 mnt = do_kern_mount(type, flags, name, data); 775 if (IS_ERR(mnt)) 776 return PTR_ERR(mnt); 777 778 return do_add_mount(mnt, nd, mnt_flags, NULL); 779 } 780 781 /* 782 * add a mount into a namespace's mount tree 783 * - provide the option of adding the new mount to an expiration list 784 */ 785 int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, 786 int mnt_flags, struct list_head *fslist) 787 { 788 int err; 789 790 down_write(¤t->namespace->sem); 791 /* Something was mounted here while we slept */ 792 while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry)) 793 ; 794 err = -EINVAL; 795 if (!check_mnt(nd->mnt)) 796 goto unlock; 797 798 /* Refuse the same filesystem on the same mount point */ 799 err = -EBUSY; 800 if (nd->mnt->mnt_sb == newmnt->mnt_sb && 801 nd->mnt->mnt_root == nd->dentry) 802 goto unlock; 803 804 err = -EINVAL; 805 if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode)) 806 goto unlock; 807 808 newmnt->mnt_flags = mnt_flags; 809 newmnt->mnt_namespace = current->namespace; 810 err = graft_tree(newmnt, nd); 811 812 if (err == 0 && fslist) { 813 /* add to the specified expiration list */ 814 spin_lock(&vfsmount_lock); 815 list_add_tail(&newmnt->mnt_expire, fslist); 816 spin_unlock(&vfsmount_lock); 817 } 818 819 unlock: 820 up_write(¤t->namespace->sem); 821 mntput(newmnt); 822 return err; 823 } 824 825 EXPORT_SYMBOL_GPL(do_add_mount); 826 827 static void expire_mount(struct vfsmount *mnt, struct list_head *mounts) 828 { 829 spin_lock(&vfsmount_lock); 830 831 /* 832 * Check if mount is still attached, if not, let whoever holds it deal 833 * with the sucker 834 */ 835 if (mnt->mnt_parent == mnt) { 836 spin_unlock(&vfsmount_lock); 837 return; 838 } 839 840 /* 841 * Check that it is still dead: the count should now be 2 - as 842 * contributed by the vfsmount parent and the mntget above 843 */ 844 if (atomic_read(&mnt->mnt_count) == 2) { 845 struct nameidata old_nd; 846 847 /* delete from the namespace */ 848 list_del_init(&mnt->mnt_list); 849 mnt->mnt_namespace = NULL; 850 detach_mnt(mnt, &old_nd); 851 spin_unlock(&vfsmount_lock); 852 path_release(&old_nd); 853 854 /* 855 * Now lay it to rest if this was the last ref on the superblock 856 */ 857 if (atomic_read(&mnt->mnt_sb->s_active) == 1) { 858 /* last instance - try to be smart */ 859 lock_kernel(); 860 DQUOT_OFF(mnt->mnt_sb); 861 acct_auto_close(mnt->mnt_sb); 862 unlock_kernel(); 863 } 864 mntput(mnt); 865 } else { 866 /* 867 * Someone brought it back to life whilst we didn't have any 868 * locks held so return it to the expiration list 869 */ 870 list_add_tail(&mnt->mnt_expire, mounts); 871 spin_unlock(&vfsmount_lock); 872 } 873 } 874 875 /* 876 * process a list of expirable mountpoints with the intent of discarding any 877 * mountpoints that aren't in use and haven't been touched since last we came 878 * here 879 */ 880 void mark_mounts_for_expiry(struct list_head *mounts) 881 { 882 struct namespace *namespace; 883 struct vfsmount *mnt, *next; 884 LIST_HEAD(graveyard); 885 886 if (list_empty(mounts)) 887 return; 888 889 spin_lock(&vfsmount_lock); 890 891 /* extract from the expiration list every vfsmount that matches the 892 * following criteria: 893 * - only referenced by its parent vfsmount 894 * - still marked for expiry (marked on the last call here; marks are 895 * cleared by mntput()) 896 */ 897 list_for_each_entry_safe(mnt, next, mounts, mnt_expire) { 898 if (!xchg(&mnt->mnt_expiry_mark, 1) || 899 atomic_read(&mnt->mnt_count) != 1) 900 continue; 901 902 mntget(mnt); 903 list_move(&mnt->mnt_expire, &graveyard); 904 } 905 906 /* 907 * go through the vfsmounts we've just consigned to the graveyard to 908 * - check that they're still dead 909 * - delete the vfsmount from the appropriate namespace under lock 910 * - dispose of the corpse 911 */ 912 while (!list_empty(&graveyard)) { 913 mnt = list_entry(graveyard.next, struct vfsmount, mnt_expire); 914 list_del_init(&mnt->mnt_expire); 915 916 /* don't do anything if the namespace is dead - all the 917 * vfsmounts from it are going away anyway */ 918 namespace = mnt->mnt_namespace; 919 if (!namespace || !namespace->root) 920 continue; 921 get_namespace(namespace); 922 923 spin_unlock(&vfsmount_lock); 924 down_write(&namespace->sem); 925 expire_mount(mnt, mounts); 926 up_write(&namespace->sem); 927 928 mntput(mnt); 929 put_namespace(namespace); 930 931 spin_lock(&vfsmount_lock); 932 } 933 934 spin_unlock(&vfsmount_lock); 935 } 936 937 EXPORT_SYMBOL_GPL(mark_mounts_for_expiry); 938 939 /* 940 * Some copy_from_user() implementations do not return the exact number of 941 * bytes remaining to copy on a fault. But copy_mount_options() requires that. 942 * Note that this function differs from copy_from_user() in that it will oops 943 * on bad values of `to', rather than returning a short copy. 944 */ 945 static long 946 exact_copy_from_user(void *to, const void __user *from, unsigned long n) 947 { 948 char *t = to; 949 const char __user *f = from; 950 char c; 951 952 if (!access_ok(VERIFY_READ, from, n)) 953 return n; 954 955 while (n) { 956 if (__get_user(c, f)) { 957 memset(t, 0, n); 958 break; 959 } 960 *t++ = c; 961 f++; 962 n--; 963 } 964 return n; 965 } 966 967 int copy_mount_options(const void __user *data, unsigned long *where) 968 { 969 int i; 970 unsigned long page; 971 unsigned long size; 972 973 *where = 0; 974 if (!data) 975 return 0; 976 977 if (!(page = __get_free_page(GFP_KERNEL))) 978 return -ENOMEM; 979 980 /* We only care that *some* data at the address the user 981 * gave us is valid. Just in case, we'll zero 982 * the remainder of the page. 983 */ 984 /* copy_from_user cannot cross TASK_SIZE ! */ 985 size = TASK_SIZE - (unsigned long)data; 986 if (size > PAGE_SIZE) 987 size = PAGE_SIZE; 988 989 i = size - exact_copy_from_user((void *)page, data, size); 990 if (!i) { 991 free_page(page); 992 return -EFAULT; 993 } 994 if (i != PAGE_SIZE) 995 memset((char *)page + i, 0, PAGE_SIZE - i); 996 *where = page; 997 return 0; 998 } 999 1000 /* 1001 * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to 1002 * be given to the mount() call (ie: read-only, no-dev, no-suid etc). 1003 * 1004 * data is a (void *) that can point to any structure up to 1005 * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent 1006 * information (or be NULL). 1007 * 1008 * Pre-0.97 versions of mount() didn't have a flags word. 1009 * When the flags word was introduced its top half was required 1010 * to have the magic value 0xC0ED, and this remained so until 2.4.0-test9. 1011 * Therefore, if this magic number is present, it carries no information 1012 * and must be discarded. 1013 */ 1014 long do_mount(char * dev_name, char * dir_name, char *type_page, 1015 unsigned long flags, void *data_page) 1016 { 1017 struct nameidata nd; 1018 int retval = 0; 1019 int mnt_flags = 0; 1020 1021 /* Discard magic */ 1022 if ((flags & MS_MGC_MSK) == MS_MGC_VAL) 1023 flags &= ~MS_MGC_MSK; 1024 1025 /* Basic sanity checks */ 1026 1027 if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE)) 1028 return -EINVAL; 1029 if (dev_name && !memchr(dev_name, 0, PAGE_SIZE)) 1030 return -EINVAL; 1031 1032 if (data_page) 1033 ((char *)data_page)[PAGE_SIZE - 1] = 0; 1034 1035 /* Separate the per-mountpoint flags */ 1036 if (flags & MS_NOSUID) 1037 mnt_flags |= MNT_NOSUID; 1038 if (flags & MS_NODEV) 1039 mnt_flags |= MNT_NODEV; 1040 if (flags & MS_NOEXEC) 1041 mnt_flags |= MNT_NOEXEC; 1042 flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_ACTIVE); 1043 1044 /* ... and get the mountpoint */ 1045 retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd); 1046 if (retval) 1047 return retval; 1048 1049 retval = security_sb_mount(dev_name, &nd, type_page, flags, data_page); 1050 if (retval) 1051 goto dput_out; 1052 1053 if (flags & MS_REMOUNT) 1054 retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, 1055 data_page); 1056 else if (flags & MS_BIND) 1057 retval = do_loopback(&nd, dev_name, flags & MS_REC); 1058 else if (flags & MS_MOVE) 1059 retval = do_move_mount(&nd, dev_name); 1060 else 1061 retval = do_new_mount(&nd, type_page, flags, mnt_flags, 1062 dev_name, data_page); 1063 dput_out: 1064 path_release(&nd); 1065 return retval; 1066 } 1067 1068 int copy_namespace(int flags, struct task_struct *tsk) 1069 { 1070 struct namespace *namespace = tsk->namespace; 1071 struct namespace *new_ns; 1072 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL; 1073 struct fs_struct *fs = tsk->fs; 1074 struct vfsmount *p, *q; 1075 1076 if (!namespace) 1077 return 0; 1078 1079 get_namespace(namespace); 1080 1081 if (!(flags & CLONE_NEWNS)) 1082 return 0; 1083 1084 if (!capable(CAP_SYS_ADMIN)) { 1085 put_namespace(namespace); 1086 return -EPERM; 1087 } 1088 1089 new_ns = kmalloc(sizeof(struct namespace), GFP_KERNEL); 1090 if (!new_ns) 1091 goto out; 1092 1093 atomic_set(&new_ns->count, 1); 1094 init_rwsem(&new_ns->sem); 1095 INIT_LIST_HEAD(&new_ns->list); 1096 1097 down_write(&tsk->namespace->sem); 1098 /* First pass: copy the tree topology */ 1099 new_ns->root = copy_tree(namespace->root, namespace->root->mnt_root); 1100 if (!new_ns->root) { 1101 up_write(&tsk->namespace->sem); 1102 kfree(new_ns); 1103 goto out; 1104 } 1105 spin_lock(&vfsmount_lock); 1106 list_add_tail(&new_ns->list, &new_ns->root->mnt_list); 1107 spin_unlock(&vfsmount_lock); 1108 1109 /* 1110 * Second pass: switch the tsk->fs->* elements and mark new vfsmounts 1111 * as belonging to new namespace. We have already acquired a private 1112 * fs_struct, so tsk->fs->lock is not needed. 1113 */ 1114 p = namespace->root; 1115 q = new_ns->root; 1116 while (p) { 1117 q->mnt_namespace = new_ns; 1118 if (fs) { 1119 if (p == fs->rootmnt) { 1120 rootmnt = p; 1121 fs->rootmnt = mntget(q); 1122 } 1123 if (p == fs->pwdmnt) { 1124 pwdmnt = p; 1125 fs->pwdmnt = mntget(q); 1126 } 1127 if (p == fs->altrootmnt) { 1128 altrootmnt = p; 1129 fs->altrootmnt = mntget(q); 1130 } 1131 } 1132 p = next_mnt(p, namespace->root); 1133 q = next_mnt(q, new_ns->root); 1134 } 1135 up_write(&tsk->namespace->sem); 1136 1137 tsk->namespace = new_ns; 1138 1139 if (rootmnt) 1140 mntput(rootmnt); 1141 if (pwdmnt) 1142 mntput(pwdmnt); 1143 if (altrootmnt) 1144 mntput(altrootmnt); 1145 1146 put_namespace(namespace); 1147 return 0; 1148 1149 out: 1150 put_namespace(namespace); 1151 return -ENOMEM; 1152 } 1153 1154 asmlinkage long sys_mount(char __user * dev_name, char __user * dir_name, 1155 char __user * type, unsigned long flags, 1156 void __user * data) 1157 { 1158 int retval; 1159 unsigned long data_page; 1160 unsigned long type_page; 1161 unsigned long dev_page; 1162 char *dir_page; 1163 1164 retval = copy_mount_options (type, &type_page); 1165 if (retval < 0) 1166 return retval; 1167 1168 dir_page = getname(dir_name); 1169 retval = PTR_ERR(dir_page); 1170 if (IS_ERR(dir_page)) 1171 goto out1; 1172 1173 retval = copy_mount_options (dev_name, &dev_page); 1174 if (retval < 0) 1175 goto out2; 1176 1177 retval = copy_mount_options (data, &data_page); 1178 if (retval < 0) 1179 goto out3; 1180 1181 lock_kernel(); 1182 retval = do_mount((char*)dev_page, dir_page, (char*)type_page, 1183 flags, (void*)data_page); 1184 unlock_kernel(); 1185 free_page(data_page); 1186 1187 out3: 1188 free_page(dev_page); 1189 out2: 1190 putname(dir_page); 1191 out1: 1192 free_page(type_page); 1193 return retval; 1194 } 1195 1196 /* 1197 * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. 1198 * It can block. Requires the big lock held. 1199 */ 1200 void set_fs_root(struct fs_struct *fs, struct vfsmount *mnt, 1201 struct dentry *dentry) 1202 { 1203 struct dentry *old_root; 1204 struct vfsmount *old_rootmnt; 1205 write_lock(&fs->lock); 1206 old_root = fs->root; 1207 old_rootmnt = fs->rootmnt; 1208 fs->rootmnt = mntget(mnt); 1209 fs->root = dget(dentry); 1210 write_unlock(&fs->lock); 1211 if (old_root) { 1212 dput(old_root); 1213 mntput(old_rootmnt); 1214 } 1215 } 1216 1217 /* 1218 * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values. 1219 * It can block. Requires the big lock held. 1220 */ 1221 void set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt, 1222 struct dentry *dentry) 1223 { 1224 struct dentry *old_pwd; 1225 struct vfsmount *old_pwdmnt; 1226 1227 write_lock(&fs->lock); 1228 old_pwd = fs->pwd; 1229 old_pwdmnt = fs->pwdmnt; 1230 fs->pwdmnt = mntget(mnt); 1231 fs->pwd = dget(dentry); 1232 write_unlock(&fs->lock); 1233 1234 if (old_pwd) { 1235 dput(old_pwd); 1236 mntput(old_pwdmnt); 1237 } 1238 } 1239 1240 static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd) 1241 { 1242 struct task_struct *g, *p; 1243 struct fs_struct *fs; 1244 1245 read_lock(&tasklist_lock); 1246 do_each_thread(g, p) { 1247 task_lock(p); 1248 fs = p->fs; 1249 if (fs) { 1250 atomic_inc(&fs->count); 1251 task_unlock(p); 1252 if (fs->root==old_nd->dentry&&fs->rootmnt==old_nd->mnt) 1253 set_fs_root(fs, new_nd->mnt, new_nd->dentry); 1254 if (fs->pwd==old_nd->dentry&&fs->pwdmnt==old_nd->mnt) 1255 set_fs_pwd(fs, new_nd->mnt, new_nd->dentry); 1256 put_fs_struct(fs); 1257 } else 1258 task_unlock(p); 1259 } while_each_thread(g, p); 1260 read_unlock(&tasklist_lock); 1261 } 1262 1263 /* 1264 * pivot_root Semantics: 1265 * Moves the root file system of the current process to the directory put_old, 1266 * makes new_root as the new root file system of the current process, and sets 1267 * root/cwd of all processes which had them on the current root to new_root. 1268 * 1269 * Restrictions: 1270 * The new_root and put_old must be directories, and must not be on the 1271 * same file system as the current process root. The put_old must be 1272 * underneath new_root, i.e. adding a non-zero number of /.. to the string 1273 * pointed to by put_old must yield the same directory as new_root. No other 1274 * file system may be mounted on put_old. After all, new_root is a mountpoint. 1275 * 1276 * Notes: 1277 * - we don't move root/cwd if they are not at the root (reason: if something 1278 * cared enough to change them, it's probably wrong to force them elsewhere) 1279 * - it's okay to pick a root that isn't the root of a file system, e.g. 1280 * /nfs/my_root where /nfs is the mount point. It must be a mountpoint, 1281 * though, so you may need to say mount --bind /nfs/my_root /nfs/my_root 1282 * first. 1283 */ 1284 1285 asmlinkage long sys_pivot_root(const char __user *new_root, const char __user *put_old) 1286 { 1287 struct vfsmount *tmp; 1288 struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd; 1289 int error; 1290 1291 if (!capable(CAP_SYS_ADMIN)) 1292 return -EPERM; 1293 1294 lock_kernel(); 1295 1296 error = __user_walk(new_root, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd); 1297 if (error) 1298 goto out0; 1299 error = -EINVAL; 1300 if (!check_mnt(new_nd.mnt)) 1301 goto out1; 1302 1303 error = __user_walk(put_old, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd); 1304 if (error) 1305 goto out1; 1306 1307 error = security_sb_pivotroot(&old_nd, &new_nd); 1308 if (error) { 1309 path_release(&old_nd); 1310 goto out1; 1311 } 1312 1313 read_lock(¤t->fs->lock); 1314 user_nd.mnt = mntget(current->fs->rootmnt); 1315 user_nd.dentry = dget(current->fs->root); 1316 read_unlock(¤t->fs->lock); 1317 down_write(¤t->namespace->sem); 1318 down(&old_nd.dentry->d_inode->i_sem); 1319 error = -EINVAL; 1320 if (!check_mnt(user_nd.mnt)) 1321 goto out2; 1322 error = -ENOENT; 1323 if (IS_DEADDIR(new_nd.dentry->d_inode)) 1324 goto out2; 1325 if (d_unhashed(new_nd.dentry) && !IS_ROOT(new_nd.dentry)) 1326 goto out2; 1327 if (d_unhashed(old_nd.dentry) && !IS_ROOT(old_nd.dentry)) 1328 goto out2; 1329 error = -EBUSY; 1330 if (new_nd.mnt == user_nd.mnt || old_nd.mnt == user_nd.mnt) 1331 goto out2; /* loop, on the same file system */ 1332 error = -EINVAL; 1333 if (user_nd.mnt->mnt_root != user_nd.dentry) 1334 goto out2; /* not a mountpoint */ 1335 if (user_nd.mnt->mnt_parent == user_nd.mnt) 1336 goto out2; /* not attached */ 1337 if (new_nd.mnt->mnt_root != new_nd.dentry) 1338 goto out2; /* not a mountpoint */ 1339 if (new_nd.mnt->mnt_parent == new_nd.mnt) 1340 goto out2; /* not attached */ 1341 tmp = old_nd.mnt; /* make sure we can reach put_old from new_root */ 1342 spin_lock(&vfsmount_lock); 1343 if (tmp != new_nd.mnt) { 1344 for (;;) { 1345 if (tmp->mnt_parent == tmp) 1346 goto out3; /* already mounted on put_old */ 1347 if (tmp->mnt_parent == new_nd.mnt) 1348 break; 1349 tmp = tmp->mnt_parent; 1350 } 1351 if (!is_subdir(tmp->mnt_mountpoint, new_nd.dentry)) 1352 goto out3; 1353 } else if (!is_subdir(old_nd.dentry, new_nd.dentry)) 1354 goto out3; 1355 detach_mnt(new_nd.mnt, &parent_nd); 1356 detach_mnt(user_nd.mnt, &root_parent); 1357 attach_mnt(user_nd.mnt, &old_nd); /* mount old root on put_old */ 1358 attach_mnt(new_nd.mnt, &root_parent); /* mount new_root on / */ 1359 spin_unlock(&vfsmount_lock); 1360 chroot_fs_refs(&user_nd, &new_nd); 1361 security_sb_post_pivotroot(&user_nd, &new_nd); 1362 error = 0; 1363 path_release(&root_parent); 1364 path_release(&parent_nd); 1365 out2: 1366 up(&old_nd.dentry->d_inode->i_sem); 1367 up_write(¤t->namespace->sem); 1368 path_release(&user_nd); 1369 path_release(&old_nd); 1370 out1: 1371 path_release(&new_nd); 1372 out0: 1373 unlock_kernel(); 1374 return error; 1375 out3: 1376 spin_unlock(&vfsmount_lock); 1377 goto out2; 1378 } 1379 1380 static void __init init_mount_tree(void) 1381 { 1382 struct vfsmount *mnt; 1383 struct namespace *namespace; 1384 struct task_struct *g, *p; 1385 1386 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); 1387 if (IS_ERR(mnt)) 1388 panic("Can't create rootfs"); 1389 namespace = kmalloc(sizeof(*namespace), GFP_KERNEL); 1390 if (!namespace) 1391 panic("Can't allocate initial namespace"); 1392 atomic_set(&namespace->count, 1); 1393 INIT_LIST_HEAD(&namespace->list); 1394 init_rwsem(&namespace->sem); 1395 list_add(&mnt->mnt_list, &namespace->list); 1396 namespace->root = mnt; 1397 mnt->mnt_namespace = namespace; 1398 1399 init_task.namespace = namespace; 1400 read_lock(&tasklist_lock); 1401 do_each_thread(g, p) { 1402 get_namespace(namespace); 1403 p->namespace = namespace; 1404 } while_each_thread(g, p); 1405 read_unlock(&tasklist_lock); 1406 1407 set_fs_pwd(current->fs, namespace->root, namespace->root->mnt_root); 1408 set_fs_root(current->fs, namespace->root, namespace->root->mnt_root); 1409 } 1410 1411 void __init mnt_init(unsigned long mempages) 1412 { 1413 struct list_head *d; 1414 unsigned int nr_hash; 1415 int i; 1416 1417 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount), 1418 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 1419 1420 mount_hashtable = (struct list_head *) 1421 __get_free_page(GFP_ATOMIC); 1422 1423 if (!mount_hashtable) 1424 panic("Failed to allocate mount hash table\n"); 1425 1426 /* 1427 * Find the power-of-two list-heads that can fit into the allocation.. 1428 * We don't guarantee that "sizeof(struct list_head)" is necessarily 1429 * a power-of-two. 1430 */ 1431 nr_hash = PAGE_SIZE / sizeof(struct list_head); 1432 hash_bits = 0; 1433 do { 1434 hash_bits++; 1435 } while ((nr_hash >> hash_bits) != 0); 1436 hash_bits--; 1437 1438 /* 1439 * Re-calculate the actual number of entries and the mask 1440 * from the number of bits we can fit. 1441 */ 1442 nr_hash = 1UL << hash_bits; 1443 hash_mask = nr_hash-1; 1444 1445 printk("Mount-cache hash table entries: %d\n", nr_hash); 1446 1447 /* And initialize the newly allocated array */ 1448 d = mount_hashtable; 1449 i = nr_hash; 1450 do { 1451 INIT_LIST_HEAD(d); 1452 d++; 1453 i--; 1454 } while (i); 1455 sysfs_init(); 1456 init_rootfs(); 1457 init_mount_tree(); 1458 } 1459 1460 void __put_namespace(struct namespace *namespace) 1461 { 1462 struct vfsmount *root = namespace->root; 1463 namespace->root = NULL; 1464 spin_unlock(&vfsmount_lock); 1465 down_write(&namespace->sem); 1466 spin_lock(&vfsmount_lock); 1467 umount_tree(root); 1468 spin_unlock(&vfsmount_lock); 1469 up_write(&namespace->sem); 1470 kfree(namespace); 1471 } 1472