1 /* 2 * linux/fs/namei.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 /* 8 * Some corrections by tytso. 9 */ 10 11 /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname 12 * lookup logic. 13 */ 14 /* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture. 15 */ 16 17 #include <linux/init.h> 18 #include <linux/module.h> 19 #include <linux/slab.h> 20 #include <linux/fs.h> 21 #include <linux/namei.h> 22 #include <linux/quotaops.h> 23 #include <linux/pagemap.h> 24 #include <linux/fsnotify.h> 25 #include <linux/smp_lock.h> 26 #include <linux/personality.h> 27 #include <linux/security.h> 28 #include <linux/syscalls.h> 29 #include <linux/mount.h> 30 #include <linux/audit.h> 31 #include <linux/file.h> 32 #include <asm/namei.h> 33 #include <asm/uaccess.h> 34 35 #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) 36 37 /* [Feb-1997 T. Schoebel-Theuer] 38 * Fundamental changes in the pathname lookup mechanisms (namei) 39 * were necessary because of omirr. The reason is that omirr needs 40 * to know the _real_ pathname, not the user-supplied one, in case 41 * of symlinks (and also when transname replacements occur). 42 * 43 * The new code replaces the old recursive symlink resolution with 44 * an iterative one (in case of non-nested symlink chains). It does 45 * this with calls to <fs>_follow_link(). 46 * As a side effect, dir_namei(), _namei() and follow_link() are now 47 * replaced with a single function lookup_dentry() that can handle all 48 * the special cases of the former code. 49 * 50 * With the new dcache, the pathname is stored at each inode, at least as 51 * long as the refcount of the inode is positive. As a side effect, the 52 * size of the dcache depends on the inode cache and thus is dynamic. 53 * 54 * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink 55 * resolution to correspond with current state of the code. 56 * 57 * Note that the symlink resolution is not *completely* iterative. 58 * There is still a significant amount of tail- and mid- recursion in 59 * the algorithm. Also, note that <fs>_readlink() is not used in 60 * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink() 61 * may return different results than <fs>_follow_link(). Many virtual 62 * filesystems (including /proc) exhibit this behavior. 63 */ 64 65 /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation: 66 * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL 67 * and the name already exists in form of a symlink, try to create the new 68 * name indicated by the symlink. The old code always complained that the 69 * name already exists, due to not following the symlink even if its target 70 * is nonexistent. The new semantics affects also mknod() and link() when 71 * the name is a symlink pointing to a non-existant name. 72 * 73 * I don't know which semantics is the right one, since I have no access 74 * to standards. But I found by trial that HP-UX 9.0 has the full "new" 75 * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the 76 * "old" one. Personally, I think the new semantics is much more logical. 77 * Note that "ln old new" where "new" is a symlink pointing to a non-existing 78 * file does succeed in both HP-UX and SunOs, but not in Solaris 79 * and in the old Linux semantics. 80 */ 81 82 /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink 83 * semantics. See the comments in "open_namei" and "do_link" below. 84 * 85 * [10-Sep-98 Alan Modra] Another symlink change. 86 */ 87 88 /* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks: 89 * inside the path - always follow. 90 * in the last component in creation/removal/renaming - never follow. 91 * if LOOKUP_FOLLOW passed - follow. 92 * if the pathname has trailing slashes - follow. 93 * otherwise - don't follow. 94 * (applied in that order). 95 * 96 * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT 97 * restored for 2.4. This is the last surviving part of old 4.2BSD bug. 98 * During the 2.4 we need to fix the userland stuff depending on it - 99 * hopefully we will be able to get rid of that wart in 2.5. So far only 100 * XEmacs seems to be relying on it... 101 */ 102 /* 103 * [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland) 104 * implemented. Let's see if raised priority of ->s_vfs_rename_sem gives 105 * any extra contention... 106 */ 107 108 /* In order to reduce some races, while at the same time doing additional 109 * checking and hopefully speeding things up, we copy filenames to the 110 * kernel data space before using them.. 111 * 112 * POSIX.1 2.4: an empty pathname is invalid (ENOENT). 113 * PATH_MAX includes the nul terminator --RR. 114 */ 115 static inline int do_getname(const char __user *filename, char *page) 116 { 117 int retval; 118 unsigned long len = PATH_MAX; 119 120 if (!segment_eq(get_fs(), KERNEL_DS)) { 121 if ((unsigned long) filename >= TASK_SIZE) 122 return -EFAULT; 123 if (TASK_SIZE - (unsigned long) filename < PATH_MAX) 124 len = TASK_SIZE - (unsigned long) filename; 125 } 126 127 retval = strncpy_from_user(page, filename, len); 128 if (retval > 0) { 129 if (retval < len) 130 return 0; 131 return -ENAMETOOLONG; 132 } else if (!retval) 133 retval = -ENOENT; 134 return retval; 135 } 136 137 char * getname(const char __user * filename) 138 { 139 char *tmp, *result; 140 141 result = ERR_PTR(-ENOMEM); 142 tmp = __getname(); 143 if (tmp) { 144 int retval = do_getname(filename, tmp); 145 146 result = tmp; 147 if (retval < 0) { 148 __putname(tmp); 149 result = ERR_PTR(retval); 150 } 151 } 152 audit_getname(result); 153 return result; 154 } 155 156 #ifdef CONFIG_AUDITSYSCALL 157 void putname(const char *name) 158 { 159 if (unlikely(current->audit_context)) 160 audit_putname(name); 161 else 162 __putname(name); 163 } 164 EXPORT_SYMBOL(putname); 165 #endif 166 167 168 /** 169 * generic_permission - check for access rights on a Posix-like filesystem 170 * @inode: inode to check access rights for 171 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) 172 * @check_acl: optional callback to check for Posix ACLs 173 * 174 * Used to check for read/write/execute permissions on a file. 175 * We use "fsuid" for this, letting us set arbitrary permissions 176 * for filesystem access without changing the "normal" uids which 177 * are used for other things.. 178 */ 179 int generic_permission(struct inode *inode, int mask, 180 int (*check_acl)(struct inode *inode, int mask)) 181 { 182 umode_t mode = inode->i_mode; 183 184 if (current->fsuid == inode->i_uid) 185 mode >>= 6; 186 else { 187 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { 188 int error = check_acl(inode, mask); 189 if (error == -EACCES) 190 goto check_capabilities; 191 else if (error != -EAGAIN) 192 return error; 193 } 194 195 if (in_group_p(inode->i_gid)) 196 mode >>= 3; 197 } 198 199 /* 200 * If the DACs are ok we don't need any capability check. 201 */ 202 if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)) 203 return 0; 204 205 check_capabilities: 206 /* 207 * Read/write DACs are always overridable. 208 * Executable DACs are overridable if at least one exec bit is set. 209 */ 210 if (!(mask & MAY_EXEC) || 211 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode)) 212 if (capable(CAP_DAC_OVERRIDE)) 213 return 0; 214 215 /* 216 * Searching includes executable on directories, else just read. 217 */ 218 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) 219 if (capable(CAP_DAC_READ_SEARCH)) 220 return 0; 221 222 return -EACCES; 223 } 224 225 int permission(struct inode *inode, int mask, struct nameidata *nd) 226 { 227 int retval, submask; 228 229 if (mask & MAY_WRITE) { 230 umode_t mode = inode->i_mode; 231 232 /* 233 * Nobody gets write access to a read-only fs. 234 */ 235 if (IS_RDONLY(inode) && 236 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) 237 return -EROFS; 238 239 /* 240 * Nobody gets write access to an immutable file. 241 */ 242 if (IS_IMMUTABLE(inode)) 243 return -EACCES; 244 } 245 246 247 /* Ordinary permission routines do not understand MAY_APPEND. */ 248 submask = mask & ~MAY_APPEND; 249 if (inode->i_op && inode->i_op->permission) 250 retval = inode->i_op->permission(inode, submask, nd); 251 else 252 retval = generic_permission(inode, submask, NULL); 253 if (retval) 254 return retval; 255 256 return security_inode_permission(inode, mask, nd); 257 } 258 259 /* 260 * get_write_access() gets write permission for a file. 261 * put_write_access() releases this write permission. 262 * This is used for regular files. 263 * We cannot support write (and maybe mmap read-write shared) accesses and 264 * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode 265 * can have the following values: 266 * 0: no writers, no VM_DENYWRITE mappings 267 * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist 268 * > 0: (i_writecount) users are writing to the file. 269 * 270 * Normally we operate on that counter with atomic_{inc,dec} and it's safe 271 * except for the cases where we don't hold i_writecount yet. Then we need to 272 * use {get,deny}_write_access() - these functions check the sign and refuse 273 * to do the change if sign is wrong. Exclusion between them is provided by 274 * the inode->i_lock spinlock. 275 */ 276 277 int get_write_access(struct inode * inode) 278 { 279 spin_lock(&inode->i_lock); 280 if (atomic_read(&inode->i_writecount) < 0) { 281 spin_unlock(&inode->i_lock); 282 return -ETXTBSY; 283 } 284 atomic_inc(&inode->i_writecount); 285 spin_unlock(&inode->i_lock); 286 287 return 0; 288 } 289 290 int deny_write_access(struct file * file) 291 { 292 struct inode *inode = file->f_dentry->d_inode; 293 294 spin_lock(&inode->i_lock); 295 if (atomic_read(&inode->i_writecount) > 0) { 296 spin_unlock(&inode->i_lock); 297 return -ETXTBSY; 298 } 299 atomic_dec(&inode->i_writecount); 300 spin_unlock(&inode->i_lock); 301 302 return 0; 303 } 304 305 void path_release(struct nameidata *nd) 306 { 307 dput(nd->dentry); 308 mntput(nd->mnt); 309 } 310 311 /* 312 * umount() mustn't call path_release()/mntput() as that would clear 313 * mnt_expiry_mark 314 */ 315 void path_release_on_umount(struct nameidata *nd) 316 { 317 dput(nd->dentry); 318 mntput_no_expire(nd->mnt); 319 } 320 321 /** 322 * release_open_intent - free up open intent resources 323 * @nd: pointer to nameidata 324 */ 325 void release_open_intent(struct nameidata *nd) 326 { 327 if (nd->intent.open.file->f_dentry == NULL) 328 put_filp(nd->intent.open.file); 329 else 330 fput(nd->intent.open.file); 331 } 332 333 /* 334 * Internal lookup() using the new generic dcache. 335 * SMP-safe 336 */ 337 static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd) 338 { 339 struct dentry * dentry = __d_lookup(parent, name); 340 341 /* lockess __d_lookup may fail due to concurrent d_move() 342 * in some unrelated directory, so try with d_lookup 343 */ 344 if (!dentry) 345 dentry = d_lookup(parent, name); 346 347 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { 348 if (!dentry->d_op->d_revalidate(dentry, nd) && !d_invalidate(dentry)) { 349 dput(dentry); 350 dentry = NULL; 351 } 352 } 353 return dentry; 354 } 355 356 /* 357 * Short-cut version of permission(), for calling by 358 * path_walk(), when dcache lock is held. Combines parts 359 * of permission() and generic_permission(), and tests ONLY for 360 * MAY_EXEC permission. 361 * 362 * If appropriate, check DAC only. If not appropriate, or 363 * short-cut DAC fails, then call permission() to do more 364 * complete permission check. 365 */ 366 static inline int exec_permission_lite(struct inode *inode, 367 struct nameidata *nd) 368 { 369 umode_t mode = inode->i_mode; 370 371 if (inode->i_op && inode->i_op->permission) 372 return -EAGAIN; 373 374 if (current->fsuid == inode->i_uid) 375 mode >>= 6; 376 else if (in_group_p(inode->i_gid)) 377 mode >>= 3; 378 379 if (mode & MAY_EXEC) 380 goto ok; 381 382 if ((inode->i_mode & S_IXUGO) && capable(CAP_DAC_OVERRIDE)) 383 goto ok; 384 385 if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_OVERRIDE)) 386 goto ok; 387 388 if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_READ_SEARCH)) 389 goto ok; 390 391 return -EACCES; 392 ok: 393 return security_inode_permission(inode, MAY_EXEC, nd); 394 } 395 396 /* 397 * This is called when everything else fails, and we actually have 398 * to go to the low-level filesystem to find out what we should do.. 399 * 400 * We get the directory semaphore, and after getting that we also 401 * make sure that nobody added the entry to the dcache in the meantime.. 402 * SMP-safe 403 */ 404 static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd) 405 { 406 struct dentry * result; 407 struct inode *dir = parent->d_inode; 408 409 down(&dir->i_sem); 410 /* 411 * First re-do the cached lookup just in case it was created 412 * while we waited for the directory semaphore.. 413 * 414 * FIXME! This could use version numbering or similar to 415 * avoid unnecessary cache lookups. 416 * 417 * The "dcache_lock" is purely to protect the RCU list walker 418 * from concurrent renames at this point (we mustn't get false 419 * negatives from the RCU list walk here, unlike the optimistic 420 * fast walk). 421 * 422 * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup 423 */ 424 result = d_lookup(parent, name); 425 if (!result) { 426 struct dentry * dentry = d_alloc(parent, name); 427 result = ERR_PTR(-ENOMEM); 428 if (dentry) { 429 result = dir->i_op->lookup(dir, dentry, nd); 430 if (result) 431 dput(dentry); 432 else 433 result = dentry; 434 } 435 up(&dir->i_sem); 436 return result; 437 } 438 439 /* 440 * Uhhuh! Nasty case: the cache was re-populated while 441 * we waited on the semaphore. Need to revalidate. 442 */ 443 up(&dir->i_sem); 444 if (result->d_op && result->d_op->d_revalidate) { 445 if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) { 446 dput(result); 447 result = ERR_PTR(-ENOENT); 448 } 449 } 450 return result; 451 } 452 453 static int __emul_lookup_dentry(const char *, struct nameidata *); 454 455 /* SMP-safe */ 456 static inline int 457 walk_init_root(const char *name, struct nameidata *nd) 458 { 459 read_lock(¤t->fs->lock); 460 if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) { 461 nd->mnt = mntget(current->fs->altrootmnt); 462 nd->dentry = dget(current->fs->altroot); 463 read_unlock(¤t->fs->lock); 464 if (__emul_lookup_dentry(name,nd)) 465 return 0; 466 read_lock(¤t->fs->lock); 467 } 468 nd->mnt = mntget(current->fs->rootmnt); 469 nd->dentry = dget(current->fs->root); 470 read_unlock(¤t->fs->lock); 471 return 1; 472 } 473 474 static inline int __vfs_follow_link(struct nameidata *nd, const char *link) 475 { 476 int res = 0; 477 char *name; 478 if (IS_ERR(link)) 479 goto fail; 480 481 if (*link == '/') { 482 path_release(nd); 483 if (!walk_init_root(link, nd)) 484 /* weird __emul_prefix() stuff did it */ 485 goto out; 486 } 487 res = link_path_walk(link, nd); 488 out: 489 if (nd->depth || res || nd->last_type!=LAST_NORM) 490 return res; 491 /* 492 * If it is an iterative symlinks resolution in open_namei() we 493 * have to copy the last component. And all that crap because of 494 * bloody create() on broken symlinks. Furrfu... 495 */ 496 name = __getname(); 497 if (unlikely(!name)) { 498 path_release(nd); 499 return -ENOMEM; 500 } 501 strcpy(name, nd->last.name); 502 nd->last.name = name; 503 return 0; 504 fail: 505 path_release(nd); 506 return PTR_ERR(link); 507 } 508 509 struct path { 510 struct vfsmount *mnt; 511 struct dentry *dentry; 512 }; 513 514 static inline int __do_follow_link(struct path *path, struct nameidata *nd) 515 { 516 int error; 517 void *cookie; 518 struct dentry *dentry = path->dentry; 519 520 touch_atime(path->mnt, dentry); 521 nd_set_link(nd, NULL); 522 523 if (path->mnt == nd->mnt) 524 mntget(path->mnt); 525 cookie = dentry->d_inode->i_op->follow_link(dentry, nd); 526 error = PTR_ERR(cookie); 527 if (!IS_ERR(cookie)) { 528 char *s = nd_get_link(nd); 529 error = 0; 530 if (s) 531 error = __vfs_follow_link(nd, s); 532 if (dentry->d_inode->i_op->put_link) 533 dentry->d_inode->i_op->put_link(dentry, nd, cookie); 534 } 535 dput(dentry); 536 mntput(path->mnt); 537 538 return error; 539 } 540 541 static inline void dput_path(struct path *path, struct nameidata *nd) 542 { 543 dput(path->dentry); 544 if (path->mnt != nd->mnt) 545 mntput(path->mnt); 546 } 547 548 static inline void path_to_nameidata(struct path *path, struct nameidata *nd) 549 { 550 dput(nd->dentry); 551 if (nd->mnt != path->mnt) 552 mntput(nd->mnt); 553 nd->mnt = path->mnt; 554 nd->dentry = path->dentry; 555 } 556 557 /* 558 * This limits recursive symlink follows to 8, while 559 * limiting consecutive symlinks to 40. 560 * 561 * Without that kind of total limit, nasty chains of consecutive 562 * symlinks can cause almost arbitrarily long lookups. 563 */ 564 static inline int do_follow_link(struct path *path, struct nameidata *nd) 565 { 566 int err = -ELOOP; 567 if (current->link_count >= MAX_NESTED_LINKS) 568 goto loop; 569 if (current->total_link_count >= 40) 570 goto loop; 571 BUG_ON(nd->depth >= MAX_NESTED_LINKS); 572 cond_resched(); 573 err = security_inode_follow_link(path->dentry, nd); 574 if (err) 575 goto loop; 576 current->link_count++; 577 current->total_link_count++; 578 nd->depth++; 579 err = __do_follow_link(path, nd); 580 current->link_count--; 581 nd->depth--; 582 return err; 583 loop: 584 dput_path(path, nd); 585 path_release(nd); 586 return err; 587 } 588 589 int follow_up(struct vfsmount **mnt, struct dentry **dentry) 590 { 591 struct vfsmount *parent; 592 struct dentry *mountpoint; 593 spin_lock(&vfsmount_lock); 594 parent=(*mnt)->mnt_parent; 595 if (parent == *mnt) { 596 spin_unlock(&vfsmount_lock); 597 return 0; 598 } 599 mntget(parent); 600 mountpoint=dget((*mnt)->mnt_mountpoint); 601 spin_unlock(&vfsmount_lock); 602 dput(*dentry); 603 *dentry = mountpoint; 604 mntput(*mnt); 605 *mnt = parent; 606 return 1; 607 } 608 609 /* no need for dcache_lock, as serialization is taken care in 610 * namespace.c 611 */ 612 static int __follow_mount(struct path *path) 613 { 614 int res = 0; 615 while (d_mountpoint(path->dentry)) { 616 struct vfsmount *mounted = lookup_mnt(path->mnt, path->dentry); 617 if (!mounted) 618 break; 619 dput(path->dentry); 620 if (res) 621 mntput(path->mnt); 622 path->mnt = mounted; 623 path->dentry = dget(mounted->mnt_root); 624 res = 1; 625 } 626 return res; 627 } 628 629 static void follow_mount(struct vfsmount **mnt, struct dentry **dentry) 630 { 631 while (d_mountpoint(*dentry)) { 632 struct vfsmount *mounted = lookup_mnt(*mnt, *dentry); 633 if (!mounted) 634 break; 635 dput(*dentry); 636 mntput(*mnt); 637 *mnt = mounted; 638 *dentry = dget(mounted->mnt_root); 639 } 640 } 641 642 /* no need for dcache_lock, as serialization is taken care in 643 * namespace.c 644 */ 645 int follow_down(struct vfsmount **mnt, struct dentry **dentry) 646 { 647 struct vfsmount *mounted; 648 649 mounted = lookup_mnt(*mnt, *dentry); 650 if (mounted) { 651 dput(*dentry); 652 mntput(*mnt); 653 *mnt = mounted; 654 *dentry = dget(mounted->mnt_root); 655 return 1; 656 } 657 return 0; 658 } 659 660 static inline void follow_dotdot(struct nameidata *nd) 661 { 662 while(1) { 663 struct vfsmount *parent; 664 struct dentry *old = nd->dentry; 665 666 read_lock(¤t->fs->lock); 667 if (nd->dentry == current->fs->root && 668 nd->mnt == current->fs->rootmnt) { 669 read_unlock(¤t->fs->lock); 670 break; 671 } 672 read_unlock(¤t->fs->lock); 673 spin_lock(&dcache_lock); 674 if (nd->dentry != nd->mnt->mnt_root) { 675 nd->dentry = dget(nd->dentry->d_parent); 676 spin_unlock(&dcache_lock); 677 dput(old); 678 break; 679 } 680 spin_unlock(&dcache_lock); 681 spin_lock(&vfsmount_lock); 682 parent = nd->mnt->mnt_parent; 683 if (parent == nd->mnt) { 684 spin_unlock(&vfsmount_lock); 685 break; 686 } 687 mntget(parent); 688 nd->dentry = dget(nd->mnt->mnt_mountpoint); 689 spin_unlock(&vfsmount_lock); 690 dput(old); 691 mntput(nd->mnt); 692 nd->mnt = parent; 693 } 694 follow_mount(&nd->mnt, &nd->dentry); 695 } 696 697 /* 698 * It's more convoluted than I'd like it to be, but... it's still fairly 699 * small and for now I'd prefer to have fast path as straight as possible. 700 * It _is_ time-critical. 701 */ 702 static int do_lookup(struct nameidata *nd, struct qstr *name, 703 struct path *path) 704 { 705 struct vfsmount *mnt = nd->mnt; 706 struct dentry *dentry = __d_lookup(nd->dentry, name); 707 708 if (!dentry) 709 goto need_lookup; 710 if (dentry->d_op && dentry->d_op->d_revalidate) 711 goto need_revalidate; 712 done: 713 path->mnt = mnt; 714 path->dentry = dentry; 715 __follow_mount(path); 716 return 0; 717 718 need_lookup: 719 dentry = real_lookup(nd->dentry, name, nd); 720 if (IS_ERR(dentry)) 721 goto fail; 722 goto done; 723 724 need_revalidate: 725 if (dentry->d_op->d_revalidate(dentry, nd)) 726 goto done; 727 if (d_invalidate(dentry)) 728 goto done; 729 dput(dentry); 730 goto need_lookup; 731 732 fail: 733 return PTR_ERR(dentry); 734 } 735 736 /* 737 * Name resolution. 738 * This is the basic name resolution function, turning a pathname into 739 * the final dentry. We expect 'base' to be positive and a directory. 740 * 741 * Returns 0 and nd will have valid dentry and mnt on success. 742 * Returns error and drops reference to input namei data on failure. 743 */ 744 static fastcall int __link_path_walk(const char * name, struct nameidata *nd) 745 { 746 struct path next; 747 struct inode *inode; 748 int err; 749 unsigned int lookup_flags = nd->flags; 750 751 while (*name=='/') 752 name++; 753 if (!*name) 754 goto return_reval; 755 756 inode = nd->dentry->d_inode; 757 if (nd->depth) 758 lookup_flags = LOOKUP_FOLLOW; 759 760 /* At this point we know we have a real path component. */ 761 for(;;) { 762 unsigned long hash; 763 struct qstr this; 764 unsigned int c; 765 766 nd->flags |= LOOKUP_CONTINUE; 767 err = exec_permission_lite(inode, nd); 768 if (err == -EAGAIN) { 769 err = permission(inode, MAY_EXEC, nd); 770 } 771 if (err) 772 break; 773 774 this.name = name; 775 c = *(const unsigned char *)name; 776 777 hash = init_name_hash(); 778 do { 779 name++; 780 hash = partial_name_hash(c, hash); 781 c = *(const unsigned char *)name; 782 } while (c && (c != '/')); 783 this.len = name - (const char *) this.name; 784 this.hash = end_name_hash(hash); 785 786 /* remove trailing slashes? */ 787 if (!c) 788 goto last_component; 789 while (*++name == '/'); 790 if (!*name) 791 goto last_with_slashes; 792 793 /* 794 * "." and ".." are special - ".." especially so because it has 795 * to be able to know about the current root directory and 796 * parent relationships. 797 */ 798 if (this.name[0] == '.') switch (this.len) { 799 default: 800 break; 801 case 2: 802 if (this.name[1] != '.') 803 break; 804 follow_dotdot(nd); 805 inode = nd->dentry->d_inode; 806 /* fallthrough */ 807 case 1: 808 continue; 809 } 810 /* 811 * See if the low-level filesystem might want 812 * to use its own hash.. 813 */ 814 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { 815 err = nd->dentry->d_op->d_hash(nd->dentry, &this); 816 if (err < 0) 817 break; 818 } 819 /* This does the actual lookups.. */ 820 err = do_lookup(nd, &this, &next); 821 if (err) 822 break; 823 824 err = -ENOENT; 825 inode = next.dentry->d_inode; 826 if (!inode) 827 goto out_dput; 828 err = -ENOTDIR; 829 if (!inode->i_op) 830 goto out_dput; 831 832 if (inode->i_op->follow_link) { 833 err = do_follow_link(&next, nd); 834 if (err) 835 goto return_err; 836 err = -ENOENT; 837 inode = nd->dentry->d_inode; 838 if (!inode) 839 break; 840 err = -ENOTDIR; 841 if (!inode->i_op) 842 break; 843 } else 844 path_to_nameidata(&next, nd); 845 err = -ENOTDIR; 846 if (!inode->i_op->lookup) 847 break; 848 continue; 849 /* here ends the main loop */ 850 851 last_with_slashes: 852 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; 853 last_component: 854 nd->flags &= ~LOOKUP_CONTINUE; 855 if (lookup_flags & LOOKUP_PARENT) 856 goto lookup_parent; 857 if (this.name[0] == '.') switch (this.len) { 858 default: 859 break; 860 case 2: 861 if (this.name[1] != '.') 862 break; 863 follow_dotdot(nd); 864 inode = nd->dentry->d_inode; 865 /* fallthrough */ 866 case 1: 867 goto return_reval; 868 } 869 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { 870 err = nd->dentry->d_op->d_hash(nd->dentry, &this); 871 if (err < 0) 872 break; 873 } 874 err = do_lookup(nd, &this, &next); 875 if (err) 876 break; 877 inode = next.dentry->d_inode; 878 if ((lookup_flags & LOOKUP_FOLLOW) 879 && inode && inode->i_op && inode->i_op->follow_link) { 880 err = do_follow_link(&next, nd); 881 if (err) 882 goto return_err; 883 inode = nd->dentry->d_inode; 884 } else 885 path_to_nameidata(&next, nd); 886 err = -ENOENT; 887 if (!inode) 888 break; 889 if (lookup_flags & LOOKUP_DIRECTORY) { 890 err = -ENOTDIR; 891 if (!inode->i_op || !inode->i_op->lookup) 892 break; 893 } 894 goto return_base; 895 lookup_parent: 896 nd->last = this; 897 nd->last_type = LAST_NORM; 898 if (this.name[0] != '.') 899 goto return_base; 900 if (this.len == 1) 901 nd->last_type = LAST_DOT; 902 else if (this.len == 2 && this.name[1] == '.') 903 nd->last_type = LAST_DOTDOT; 904 else 905 goto return_base; 906 return_reval: 907 /* 908 * We bypassed the ordinary revalidation routines. 909 * We may need to check the cached dentry for staleness. 910 */ 911 if (nd->dentry && nd->dentry->d_sb && 912 (nd->dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) { 913 err = -ESTALE; 914 /* Note: we do not d_invalidate() */ 915 if (!nd->dentry->d_op->d_revalidate(nd->dentry, nd)) 916 break; 917 } 918 return_base: 919 return 0; 920 out_dput: 921 dput_path(&next, nd); 922 break; 923 } 924 path_release(nd); 925 return_err: 926 return err; 927 } 928 929 /* 930 * Wrapper to retry pathname resolution whenever the underlying 931 * file system returns an ESTALE. 932 * 933 * Retry the whole path once, forcing real lookup requests 934 * instead of relying on the dcache. 935 */ 936 int fastcall link_path_walk(const char *name, struct nameidata *nd) 937 { 938 struct nameidata save = *nd; 939 int result; 940 941 /* make sure the stuff we saved doesn't go away */ 942 dget(save.dentry); 943 mntget(save.mnt); 944 945 result = __link_path_walk(name, nd); 946 if (result == -ESTALE) { 947 *nd = save; 948 dget(nd->dentry); 949 mntget(nd->mnt); 950 nd->flags |= LOOKUP_REVAL; 951 result = __link_path_walk(name, nd); 952 } 953 954 dput(save.dentry); 955 mntput(save.mnt); 956 957 return result; 958 } 959 960 int fastcall path_walk(const char * name, struct nameidata *nd) 961 { 962 current->total_link_count = 0; 963 return link_path_walk(name, nd); 964 } 965 966 /* 967 * SMP-safe: Returns 1 and nd will have valid dentry and mnt, if 968 * everything is done. Returns 0 and drops input nd, if lookup failed; 969 */ 970 static int __emul_lookup_dentry(const char *name, struct nameidata *nd) 971 { 972 if (path_walk(name, nd)) 973 return 0; /* something went wrong... */ 974 975 if (!nd->dentry->d_inode || S_ISDIR(nd->dentry->d_inode->i_mode)) { 976 struct dentry *old_dentry = nd->dentry; 977 struct vfsmount *old_mnt = nd->mnt; 978 struct qstr last = nd->last; 979 int last_type = nd->last_type; 980 /* 981 * NAME was not found in alternate root or it's a directory. Try to find 982 * it in the normal root: 983 */ 984 nd->last_type = LAST_ROOT; 985 read_lock(¤t->fs->lock); 986 nd->mnt = mntget(current->fs->rootmnt); 987 nd->dentry = dget(current->fs->root); 988 read_unlock(¤t->fs->lock); 989 if (path_walk(name, nd) == 0) { 990 if (nd->dentry->d_inode) { 991 dput(old_dentry); 992 mntput(old_mnt); 993 return 1; 994 } 995 path_release(nd); 996 } 997 nd->dentry = old_dentry; 998 nd->mnt = old_mnt; 999 nd->last = last; 1000 nd->last_type = last_type; 1001 } 1002 return 1; 1003 } 1004 1005 void set_fs_altroot(void) 1006 { 1007 char *emul = __emul_prefix(); 1008 struct nameidata nd; 1009 struct vfsmount *mnt = NULL, *oldmnt; 1010 struct dentry *dentry = NULL, *olddentry; 1011 int err; 1012 1013 if (!emul) 1014 goto set_it; 1015 err = path_lookup(emul, LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_NOALT, &nd); 1016 if (!err) { 1017 mnt = nd.mnt; 1018 dentry = nd.dentry; 1019 } 1020 set_it: 1021 write_lock(¤t->fs->lock); 1022 oldmnt = current->fs->altrootmnt; 1023 olddentry = current->fs->altroot; 1024 current->fs->altrootmnt = mnt; 1025 current->fs->altroot = dentry; 1026 write_unlock(¤t->fs->lock); 1027 if (olddentry) { 1028 dput(olddentry); 1029 mntput(oldmnt); 1030 } 1031 } 1032 1033 /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ 1034 int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata *nd) 1035 { 1036 int retval = 0; 1037 1038 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1039 nd->flags = flags; 1040 nd->depth = 0; 1041 1042 read_lock(¤t->fs->lock); 1043 if (*name=='/') { 1044 if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) { 1045 nd->mnt = mntget(current->fs->altrootmnt); 1046 nd->dentry = dget(current->fs->altroot); 1047 read_unlock(¤t->fs->lock); 1048 if (__emul_lookup_dentry(name,nd)) 1049 goto out; /* found in altroot */ 1050 read_lock(¤t->fs->lock); 1051 } 1052 nd->mnt = mntget(current->fs->rootmnt); 1053 nd->dentry = dget(current->fs->root); 1054 } else { 1055 nd->mnt = mntget(current->fs->pwdmnt); 1056 nd->dentry = dget(current->fs->pwd); 1057 } 1058 read_unlock(¤t->fs->lock); 1059 current->total_link_count = 0; 1060 retval = link_path_walk(name, nd); 1061 out: 1062 if (unlikely(current->audit_context 1063 && nd && nd->dentry && nd->dentry->d_inode)) 1064 audit_inode(name, nd->dentry->d_inode, flags); 1065 return retval; 1066 } 1067 1068 static int __path_lookup_intent_open(const char *name, unsigned int lookup_flags, 1069 struct nameidata *nd, int open_flags, int create_mode) 1070 { 1071 struct file *filp = get_empty_filp(); 1072 int err; 1073 1074 if (filp == NULL) 1075 return -ENFILE; 1076 nd->intent.open.file = filp; 1077 nd->intent.open.flags = open_flags; 1078 nd->intent.open.create_mode = create_mode; 1079 err = path_lookup(name, lookup_flags|LOOKUP_OPEN, nd); 1080 if (IS_ERR(nd->intent.open.file)) { 1081 if (err == 0) { 1082 err = PTR_ERR(nd->intent.open.file); 1083 path_release(nd); 1084 } 1085 } else if (err != 0) 1086 release_open_intent(nd); 1087 return err; 1088 } 1089 1090 /** 1091 * path_lookup_open - lookup a file path with open intent 1092 * @name: pointer to file name 1093 * @lookup_flags: lookup intent flags 1094 * @nd: pointer to nameidata 1095 * @open_flags: open intent flags 1096 */ 1097 int path_lookup_open(const char *name, unsigned int lookup_flags, 1098 struct nameidata *nd, int open_flags) 1099 { 1100 return __path_lookup_intent_open(name, lookup_flags, nd, 1101 open_flags, 0); 1102 } 1103 1104 /** 1105 * path_lookup_create - lookup a file path with open + create intent 1106 * @name: pointer to file name 1107 * @lookup_flags: lookup intent flags 1108 * @nd: pointer to nameidata 1109 * @open_flags: open intent flags 1110 * @create_mode: create intent flags 1111 */ 1112 int path_lookup_create(const char *name, unsigned int lookup_flags, 1113 struct nameidata *nd, int open_flags, int create_mode) 1114 { 1115 return __path_lookup_intent_open(name, lookup_flags|LOOKUP_CREATE, nd, 1116 open_flags, create_mode); 1117 } 1118 1119 int __user_path_lookup_open(const char __user *name, unsigned int lookup_flags, 1120 struct nameidata *nd, int open_flags) 1121 { 1122 char *tmp = getname(name); 1123 int err = PTR_ERR(tmp); 1124 1125 if (!IS_ERR(tmp)) { 1126 err = __path_lookup_intent_open(tmp, lookup_flags, nd, open_flags, 0); 1127 putname(tmp); 1128 } 1129 return err; 1130 } 1131 1132 /* 1133 * Restricted form of lookup. Doesn't follow links, single-component only, 1134 * needs parent already locked. Doesn't follow mounts. 1135 * SMP-safe. 1136 */ 1137 static struct dentry * __lookup_hash(struct qstr *name, struct dentry * base, struct nameidata *nd) 1138 { 1139 struct dentry * dentry; 1140 struct inode *inode; 1141 int err; 1142 1143 inode = base->d_inode; 1144 err = permission(inode, MAY_EXEC, nd); 1145 dentry = ERR_PTR(err); 1146 if (err) 1147 goto out; 1148 1149 /* 1150 * See if the low-level filesystem might want 1151 * to use its own hash.. 1152 */ 1153 if (base->d_op && base->d_op->d_hash) { 1154 err = base->d_op->d_hash(base, name); 1155 dentry = ERR_PTR(err); 1156 if (err < 0) 1157 goto out; 1158 } 1159 1160 dentry = cached_lookup(base, name, nd); 1161 if (!dentry) { 1162 struct dentry *new = d_alloc(base, name); 1163 dentry = ERR_PTR(-ENOMEM); 1164 if (!new) 1165 goto out; 1166 dentry = inode->i_op->lookup(inode, new, nd); 1167 if (!dentry) 1168 dentry = new; 1169 else 1170 dput(new); 1171 } 1172 out: 1173 return dentry; 1174 } 1175 1176 struct dentry * lookup_hash(struct qstr *name, struct dentry * base) 1177 { 1178 return __lookup_hash(name, base, NULL); 1179 } 1180 1181 /* SMP-safe */ 1182 struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) 1183 { 1184 unsigned long hash; 1185 struct qstr this; 1186 unsigned int c; 1187 1188 this.name = name; 1189 this.len = len; 1190 if (!len) 1191 goto access; 1192 1193 hash = init_name_hash(); 1194 while (len--) { 1195 c = *(const unsigned char *)name++; 1196 if (c == '/' || c == '\0') 1197 goto access; 1198 hash = partial_name_hash(c, hash); 1199 } 1200 this.hash = end_name_hash(hash); 1201 1202 return lookup_hash(&this, base); 1203 access: 1204 return ERR_PTR(-EACCES); 1205 } 1206 1207 /* 1208 * namei() 1209 * 1210 * is used by most simple commands to get the inode of a specified name. 1211 * Open, link etc use their own routines, but this is enough for things 1212 * like 'chmod' etc. 1213 * 1214 * namei exists in two versions: namei/lnamei. The only difference is 1215 * that namei follows links, while lnamei does not. 1216 * SMP-safe 1217 */ 1218 int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) 1219 { 1220 char *tmp = getname(name); 1221 int err = PTR_ERR(tmp); 1222 1223 if (!IS_ERR(tmp)) { 1224 err = path_lookup(tmp, flags, nd); 1225 putname(tmp); 1226 } 1227 return err; 1228 } 1229 1230 /* 1231 * It's inline, so penalty for filesystems that don't use sticky bit is 1232 * minimal. 1233 */ 1234 static inline int check_sticky(struct inode *dir, struct inode *inode) 1235 { 1236 if (!(dir->i_mode & S_ISVTX)) 1237 return 0; 1238 if (inode->i_uid == current->fsuid) 1239 return 0; 1240 if (dir->i_uid == current->fsuid) 1241 return 0; 1242 return !capable(CAP_FOWNER); 1243 } 1244 1245 /* 1246 * Check whether we can remove a link victim from directory dir, check 1247 * whether the type of victim is right. 1248 * 1. We can't do it if dir is read-only (done in permission()) 1249 * 2. We should have write and exec permissions on dir 1250 * 3. We can't remove anything from append-only dir 1251 * 4. We can't do anything with immutable dir (done in permission()) 1252 * 5. If the sticky bit on dir is set we should either 1253 * a. be owner of dir, or 1254 * b. be owner of victim, or 1255 * c. have CAP_FOWNER capability 1256 * 6. If the victim is append-only or immutable we can't do antyhing with 1257 * links pointing to it. 1258 * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR. 1259 * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR. 1260 * 9. We can't remove a root or mountpoint. 1261 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by 1262 * nfs_async_unlink(). 1263 */ 1264 static inline int may_delete(struct inode *dir,struct dentry *victim,int isdir) 1265 { 1266 int error; 1267 1268 if (!victim->d_inode) 1269 return -ENOENT; 1270 1271 BUG_ON(victim->d_parent->d_inode != dir); 1272 1273 error = permission(dir,MAY_WRITE | MAY_EXEC, NULL); 1274 if (error) 1275 return error; 1276 if (IS_APPEND(dir)) 1277 return -EPERM; 1278 if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| 1279 IS_IMMUTABLE(victim->d_inode)) 1280 return -EPERM; 1281 if (isdir) { 1282 if (!S_ISDIR(victim->d_inode->i_mode)) 1283 return -ENOTDIR; 1284 if (IS_ROOT(victim)) 1285 return -EBUSY; 1286 } else if (S_ISDIR(victim->d_inode->i_mode)) 1287 return -EISDIR; 1288 if (IS_DEADDIR(dir)) 1289 return -ENOENT; 1290 if (victim->d_flags & DCACHE_NFSFS_RENAMED) 1291 return -EBUSY; 1292 return 0; 1293 } 1294 1295 /* Check whether we can create an object with dentry child in directory 1296 * dir. 1297 * 1. We can't do it if child already exists (open has special treatment for 1298 * this case, but since we are inlined it's OK) 1299 * 2. We can't do it if dir is read-only (done in permission()) 1300 * 3. We should have write and exec permissions on dir 1301 * 4. We can't do it if dir is immutable (done in permission()) 1302 */ 1303 static inline int may_create(struct inode *dir, struct dentry *child, 1304 struct nameidata *nd) 1305 { 1306 if (child->d_inode) 1307 return -EEXIST; 1308 if (IS_DEADDIR(dir)) 1309 return -ENOENT; 1310 return permission(dir,MAY_WRITE | MAY_EXEC, nd); 1311 } 1312 1313 /* 1314 * O_DIRECTORY translates into forcing a directory lookup. 1315 */ 1316 static inline int lookup_flags(unsigned int f) 1317 { 1318 unsigned long retval = LOOKUP_FOLLOW; 1319 1320 if (f & O_NOFOLLOW) 1321 retval &= ~LOOKUP_FOLLOW; 1322 1323 if (f & O_DIRECTORY) 1324 retval |= LOOKUP_DIRECTORY; 1325 1326 return retval; 1327 } 1328 1329 /* 1330 * p1 and p2 should be directories on the same fs. 1331 */ 1332 struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) 1333 { 1334 struct dentry *p; 1335 1336 if (p1 == p2) { 1337 down(&p1->d_inode->i_sem); 1338 return NULL; 1339 } 1340 1341 down(&p1->d_inode->i_sb->s_vfs_rename_sem); 1342 1343 for (p = p1; p->d_parent != p; p = p->d_parent) { 1344 if (p->d_parent == p2) { 1345 down(&p2->d_inode->i_sem); 1346 down(&p1->d_inode->i_sem); 1347 return p; 1348 } 1349 } 1350 1351 for (p = p2; p->d_parent != p; p = p->d_parent) { 1352 if (p->d_parent == p1) { 1353 down(&p1->d_inode->i_sem); 1354 down(&p2->d_inode->i_sem); 1355 return p; 1356 } 1357 } 1358 1359 down(&p1->d_inode->i_sem); 1360 down(&p2->d_inode->i_sem); 1361 return NULL; 1362 } 1363 1364 void unlock_rename(struct dentry *p1, struct dentry *p2) 1365 { 1366 up(&p1->d_inode->i_sem); 1367 if (p1 != p2) { 1368 up(&p2->d_inode->i_sem); 1369 up(&p1->d_inode->i_sb->s_vfs_rename_sem); 1370 } 1371 } 1372 1373 int vfs_create(struct inode *dir, struct dentry *dentry, int mode, 1374 struct nameidata *nd) 1375 { 1376 int error = may_create(dir, dentry, nd); 1377 1378 if (error) 1379 return error; 1380 1381 if (!dir->i_op || !dir->i_op->create) 1382 return -EACCES; /* shouldn't it be ENOSYS? */ 1383 mode &= S_IALLUGO; 1384 mode |= S_IFREG; 1385 error = security_inode_create(dir, dentry, mode); 1386 if (error) 1387 return error; 1388 DQUOT_INIT(dir); 1389 error = dir->i_op->create(dir, dentry, mode, nd); 1390 if (!error) 1391 fsnotify_create(dir, dentry->d_name.name); 1392 return error; 1393 } 1394 1395 int may_open(struct nameidata *nd, int acc_mode, int flag) 1396 { 1397 struct dentry *dentry = nd->dentry; 1398 struct inode *inode = dentry->d_inode; 1399 int error; 1400 1401 if (!inode) 1402 return -ENOENT; 1403 1404 if (S_ISLNK(inode->i_mode)) 1405 return -ELOOP; 1406 1407 if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE)) 1408 return -EISDIR; 1409 1410 error = permission(inode, acc_mode, nd); 1411 if (error) 1412 return error; 1413 1414 /* 1415 * FIFO's, sockets and device files are special: they don't 1416 * actually live on the filesystem itself, and as such you 1417 * can write to them even if the filesystem is read-only. 1418 */ 1419 if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { 1420 flag &= ~O_TRUNC; 1421 } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) { 1422 if (nd->mnt->mnt_flags & MNT_NODEV) 1423 return -EACCES; 1424 1425 flag &= ~O_TRUNC; 1426 } else if (IS_RDONLY(inode) && (flag & FMODE_WRITE)) 1427 return -EROFS; 1428 /* 1429 * An append-only file must be opened in append mode for writing. 1430 */ 1431 if (IS_APPEND(inode)) { 1432 if ((flag & FMODE_WRITE) && !(flag & O_APPEND)) 1433 return -EPERM; 1434 if (flag & O_TRUNC) 1435 return -EPERM; 1436 } 1437 1438 /* O_NOATIME can only be set by the owner or superuser */ 1439 if (flag & O_NOATIME) 1440 if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) 1441 return -EPERM; 1442 1443 /* 1444 * Ensure there are no outstanding leases on the file. 1445 */ 1446 error = break_lease(inode, flag); 1447 if (error) 1448 return error; 1449 1450 if (flag & O_TRUNC) { 1451 error = get_write_access(inode); 1452 if (error) 1453 return error; 1454 1455 /* 1456 * Refuse to truncate files with mandatory locks held on them. 1457 */ 1458 error = locks_verify_locked(inode); 1459 if (!error) { 1460 DQUOT_INIT(inode); 1461 1462 error = do_truncate(dentry, 0); 1463 } 1464 put_write_access(inode); 1465 if (error) 1466 return error; 1467 } else 1468 if (flag & FMODE_WRITE) 1469 DQUOT_INIT(inode); 1470 1471 return 0; 1472 } 1473 1474 /* 1475 * open_namei() 1476 * 1477 * namei for open - this is in fact almost the whole open-routine. 1478 * 1479 * Note that the low bits of "flag" aren't the same as in the open 1480 * system call - they are 00 - no permissions needed 1481 * 01 - read permission needed 1482 * 10 - write permission needed 1483 * 11 - read/write permissions needed 1484 * which is a lot more logical, and also allows the "no perm" needed 1485 * for symlinks (where the permissions are checked later). 1486 * SMP-safe 1487 */ 1488 int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) 1489 { 1490 int acc_mode, error; 1491 struct path path; 1492 struct dentry *dir; 1493 int count = 0; 1494 1495 acc_mode = ACC_MODE(flag); 1496 1497 /* O_TRUNC implies we need access checks for write permissions */ 1498 if (flag & O_TRUNC) 1499 acc_mode |= MAY_WRITE; 1500 1501 /* Allow the LSM permission hook to distinguish append 1502 access from general write access. */ 1503 if (flag & O_APPEND) 1504 acc_mode |= MAY_APPEND; 1505 1506 /* 1507 * The simplest case - just a plain lookup. 1508 */ 1509 if (!(flag & O_CREAT)) { 1510 error = path_lookup_open(pathname, lookup_flags(flag), nd, flag); 1511 if (error) 1512 return error; 1513 goto ok; 1514 } 1515 1516 /* 1517 * Create - we need to know the parent. 1518 */ 1519 error = path_lookup_create(pathname, LOOKUP_PARENT, nd, flag, mode); 1520 if (error) 1521 return error; 1522 1523 /* 1524 * We have the parent and last component. First of all, check 1525 * that we are not asked to creat(2) an obvious directory - that 1526 * will not do. 1527 */ 1528 error = -EISDIR; 1529 if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len]) 1530 goto exit; 1531 1532 dir = nd->dentry; 1533 nd->flags &= ~LOOKUP_PARENT; 1534 down(&dir->d_inode->i_sem); 1535 path.dentry = __lookup_hash(&nd->last, nd->dentry, nd); 1536 path.mnt = nd->mnt; 1537 1538 do_last: 1539 error = PTR_ERR(path.dentry); 1540 if (IS_ERR(path.dentry)) { 1541 up(&dir->d_inode->i_sem); 1542 goto exit; 1543 } 1544 1545 /* Negative dentry, just create the file */ 1546 if (!path.dentry->d_inode) { 1547 if (!IS_POSIXACL(dir->d_inode)) 1548 mode &= ~current->fs->umask; 1549 error = vfs_create(dir->d_inode, path.dentry, mode, nd); 1550 up(&dir->d_inode->i_sem); 1551 dput(nd->dentry); 1552 nd->dentry = path.dentry; 1553 if (error) 1554 goto exit; 1555 /* Don't check for write permission, don't truncate */ 1556 acc_mode = 0; 1557 flag &= ~O_TRUNC; 1558 goto ok; 1559 } 1560 1561 /* 1562 * It already exists. 1563 */ 1564 up(&dir->d_inode->i_sem); 1565 1566 error = -EEXIST; 1567 if (flag & O_EXCL) 1568 goto exit_dput; 1569 1570 if (__follow_mount(&path)) { 1571 error = -ELOOP; 1572 if (flag & O_NOFOLLOW) 1573 goto exit_dput; 1574 } 1575 error = -ENOENT; 1576 if (!path.dentry->d_inode) 1577 goto exit_dput; 1578 if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link) 1579 goto do_link; 1580 1581 path_to_nameidata(&path, nd); 1582 error = -EISDIR; 1583 if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) 1584 goto exit; 1585 ok: 1586 error = may_open(nd, acc_mode, flag); 1587 if (error) 1588 goto exit; 1589 return 0; 1590 1591 exit_dput: 1592 dput_path(&path, nd); 1593 exit: 1594 if (!IS_ERR(nd->intent.open.file)) 1595 release_open_intent(nd); 1596 path_release(nd); 1597 return error; 1598 1599 do_link: 1600 error = -ELOOP; 1601 if (flag & O_NOFOLLOW) 1602 goto exit_dput; 1603 /* 1604 * This is subtle. Instead of calling do_follow_link() we do the 1605 * thing by hands. The reason is that this way we have zero link_count 1606 * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT. 1607 * After that we have the parent and last component, i.e. 1608 * we are in the same situation as after the first path_walk(). 1609 * Well, almost - if the last component is normal we get its copy 1610 * stored in nd->last.name and we will have to putname() it when we 1611 * are done. Procfs-like symlinks just set LAST_BIND. 1612 */ 1613 nd->flags |= LOOKUP_PARENT; 1614 error = security_inode_follow_link(path.dentry, nd); 1615 if (error) 1616 goto exit_dput; 1617 error = __do_follow_link(&path, nd); 1618 if (error) 1619 return error; 1620 nd->flags &= ~LOOKUP_PARENT; 1621 if (nd->last_type == LAST_BIND) 1622 goto ok; 1623 error = -EISDIR; 1624 if (nd->last_type != LAST_NORM) 1625 goto exit; 1626 if (nd->last.name[nd->last.len]) { 1627 __putname(nd->last.name); 1628 goto exit; 1629 } 1630 error = -ELOOP; 1631 if (count++==32) { 1632 __putname(nd->last.name); 1633 goto exit; 1634 } 1635 dir = nd->dentry; 1636 down(&dir->d_inode->i_sem); 1637 path.dentry = __lookup_hash(&nd->last, nd->dentry, nd); 1638 path.mnt = nd->mnt; 1639 __putname(nd->last.name); 1640 goto do_last; 1641 } 1642 1643 /** 1644 * lookup_create - lookup a dentry, creating it if it doesn't exist 1645 * @nd: nameidata info 1646 * @is_dir: directory flag 1647 * 1648 * Simple function to lookup and return a dentry and create it 1649 * if it doesn't exist. Is SMP-safe. 1650 * 1651 * Returns with nd->dentry->d_inode->i_sem locked. 1652 */ 1653 struct dentry *lookup_create(struct nameidata *nd, int is_dir) 1654 { 1655 struct dentry *dentry = ERR_PTR(-EEXIST); 1656 1657 down(&nd->dentry->d_inode->i_sem); 1658 /* 1659 * Yucky last component or no last component at all? 1660 * (foo/., foo/.., /////) 1661 */ 1662 if (nd->last_type != LAST_NORM) 1663 goto fail; 1664 nd->flags &= ~LOOKUP_PARENT; 1665 1666 /* 1667 * Do the final lookup. 1668 */ 1669 dentry = lookup_hash(&nd->last, nd->dentry); 1670 if (IS_ERR(dentry)) 1671 goto fail; 1672 1673 /* 1674 * Special case - lookup gave negative, but... we had foo/bar/ 1675 * From the vfs_mknod() POV we just have a negative dentry - 1676 * all is fine. Let's be bastards - you had / on the end, you've 1677 * been asking for (non-existent) directory. -ENOENT for you. 1678 */ 1679 if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) 1680 goto enoent; 1681 return dentry; 1682 enoent: 1683 dput(dentry); 1684 dentry = ERR_PTR(-ENOENT); 1685 fail: 1686 return dentry; 1687 } 1688 EXPORT_SYMBOL_GPL(lookup_create); 1689 1690 int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) 1691 { 1692 int error = may_create(dir, dentry, NULL); 1693 1694 if (error) 1695 return error; 1696 1697 if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) 1698 return -EPERM; 1699 1700 if (!dir->i_op || !dir->i_op->mknod) 1701 return -EPERM; 1702 1703 error = security_inode_mknod(dir, dentry, mode, dev); 1704 if (error) 1705 return error; 1706 1707 DQUOT_INIT(dir); 1708 error = dir->i_op->mknod(dir, dentry, mode, dev); 1709 if (!error) 1710 fsnotify_create(dir, dentry->d_name.name); 1711 return error; 1712 } 1713 1714 asmlinkage long sys_mknod(const char __user * filename, int mode, unsigned dev) 1715 { 1716 int error = 0; 1717 char * tmp; 1718 struct dentry * dentry; 1719 struct nameidata nd; 1720 1721 if (S_ISDIR(mode)) 1722 return -EPERM; 1723 tmp = getname(filename); 1724 if (IS_ERR(tmp)) 1725 return PTR_ERR(tmp); 1726 1727 error = path_lookup(tmp, LOOKUP_PARENT, &nd); 1728 if (error) 1729 goto out; 1730 dentry = lookup_create(&nd, 0); 1731 error = PTR_ERR(dentry); 1732 1733 if (!IS_POSIXACL(nd.dentry->d_inode)) 1734 mode &= ~current->fs->umask; 1735 if (!IS_ERR(dentry)) { 1736 switch (mode & S_IFMT) { 1737 case 0: case S_IFREG: 1738 error = vfs_create(nd.dentry->d_inode,dentry,mode,&nd); 1739 break; 1740 case S_IFCHR: case S_IFBLK: 1741 error = vfs_mknod(nd.dentry->d_inode,dentry,mode, 1742 new_decode_dev(dev)); 1743 break; 1744 case S_IFIFO: case S_IFSOCK: 1745 error = vfs_mknod(nd.dentry->d_inode,dentry,mode,0); 1746 break; 1747 case S_IFDIR: 1748 error = -EPERM; 1749 break; 1750 default: 1751 error = -EINVAL; 1752 } 1753 dput(dentry); 1754 } 1755 up(&nd.dentry->d_inode->i_sem); 1756 path_release(&nd); 1757 out: 1758 putname(tmp); 1759 1760 return error; 1761 } 1762 1763 int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 1764 { 1765 int error = may_create(dir, dentry, NULL); 1766 1767 if (error) 1768 return error; 1769 1770 if (!dir->i_op || !dir->i_op->mkdir) 1771 return -EPERM; 1772 1773 mode &= (S_IRWXUGO|S_ISVTX); 1774 error = security_inode_mkdir(dir, dentry, mode); 1775 if (error) 1776 return error; 1777 1778 DQUOT_INIT(dir); 1779 error = dir->i_op->mkdir(dir, dentry, mode); 1780 if (!error) 1781 fsnotify_mkdir(dir, dentry->d_name.name); 1782 return error; 1783 } 1784 1785 asmlinkage long sys_mkdir(const char __user * pathname, int mode) 1786 { 1787 int error = 0; 1788 char * tmp; 1789 1790 tmp = getname(pathname); 1791 error = PTR_ERR(tmp); 1792 if (!IS_ERR(tmp)) { 1793 struct dentry *dentry; 1794 struct nameidata nd; 1795 1796 error = path_lookup(tmp, LOOKUP_PARENT, &nd); 1797 if (error) 1798 goto out; 1799 dentry = lookup_create(&nd, 1); 1800 error = PTR_ERR(dentry); 1801 if (!IS_ERR(dentry)) { 1802 if (!IS_POSIXACL(nd.dentry->d_inode)) 1803 mode &= ~current->fs->umask; 1804 error = vfs_mkdir(nd.dentry->d_inode, dentry, mode); 1805 dput(dentry); 1806 } 1807 up(&nd.dentry->d_inode->i_sem); 1808 path_release(&nd); 1809 out: 1810 putname(tmp); 1811 } 1812 1813 return error; 1814 } 1815 1816 /* 1817 * We try to drop the dentry early: we should have 1818 * a usage count of 2 if we're the only user of this 1819 * dentry, and if that is true (possibly after pruning 1820 * the dcache), then we drop the dentry now. 1821 * 1822 * A low-level filesystem can, if it choses, legally 1823 * do a 1824 * 1825 * if (!d_unhashed(dentry)) 1826 * return -EBUSY; 1827 * 1828 * if it cannot handle the case of removing a directory 1829 * that is still in use by something else.. 1830 */ 1831 void dentry_unhash(struct dentry *dentry) 1832 { 1833 dget(dentry); 1834 if (atomic_read(&dentry->d_count)) 1835 shrink_dcache_parent(dentry); 1836 spin_lock(&dcache_lock); 1837 spin_lock(&dentry->d_lock); 1838 if (atomic_read(&dentry->d_count) == 2) 1839 __d_drop(dentry); 1840 spin_unlock(&dentry->d_lock); 1841 spin_unlock(&dcache_lock); 1842 } 1843 1844 int vfs_rmdir(struct inode *dir, struct dentry *dentry) 1845 { 1846 int error = may_delete(dir, dentry, 1); 1847 1848 if (error) 1849 return error; 1850 1851 if (!dir->i_op || !dir->i_op->rmdir) 1852 return -EPERM; 1853 1854 DQUOT_INIT(dir); 1855 1856 down(&dentry->d_inode->i_sem); 1857 dentry_unhash(dentry); 1858 if (d_mountpoint(dentry)) 1859 error = -EBUSY; 1860 else { 1861 error = security_inode_rmdir(dir, dentry); 1862 if (!error) { 1863 error = dir->i_op->rmdir(dir, dentry); 1864 if (!error) 1865 dentry->d_inode->i_flags |= S_DEAD; 1866 } 1867 } 1868 up(&dentry->d_inode->i_sem); 1869 if (!error) { 1870 d_delete(dentry); 1871 } 1872 dput(dentry); 1873 1874 return error; 1875 } 1876 1877 asmlinkage long sys_rmdir(const char __user * pathname) 1878 { 1879 int error = 0; 1880 char * name; 1881 struct dentry *dentry; 1882 struct nameidata nd; 1883 1884 name = getname(pathname); 1885 if(IS_ERR(name)) 1886 return PTR_ERR(name); 1887 1888 error = path_lookup(name, LOOKUP_PARENT, &nd); 1889 if (error) 1890 goto exit; 1891 1892 switch(nd.last_type) { 1893 case LAST_DOTDOT: 1894 error = -ENOTEMPTY; 1895 goto exit1; 1896 case LAST_DOT: 1897 error = -EINVAL; 1898 goto exit1; 1899 case LAST_ROOT: 1900 error = -EBUSY; 1901 goto exit1; 1902 } 1903 down(&nd.dentry->d_inode->i_sem); 1904 dentry = lookup_hash(&nd.last, nd.dentry); 1905 error = PTR_ERR(dentry); 1906 if (!IS_ERR(dentry)) { 1907 error = vfs_rmdir(nd.dentry->d_inode, dentry); 1908 dput(dentry); 1909 } 1910 up(&nd.dentry->d_inode->i_sem); 1911 exit1: 1912 path_release(&nd); 1913 exit: 1914 putname(name); 1915 return error; 1916 } 1917 1918 int vfs_unlink(struct inode *dir, struct dentry *dentry) 1919 { 1920 int error = may_delete(dir, dentry, 0); 1921 1922 if (error) 1923 return error; 1924 1925 if (!dir->i_op || !dir->i_op->unlink) 1926 return -EPERM; 1927 1928 DQUOT_INIT(dir); 1929 1930 down(&dentry->d_inode->i_sem); 1931 if (d_mountpoint(dentry)) 1932 error = -EBUSY; 1933 else { 1934 error = security_inode_unlink(dir, dentry); 1935 if (!error) 1936 error = dir->i_op->unlink(dir, dentry); 1937 } 1938 up(&dentry->d_inode->i_sem); 1939 1940 /* We don't d_delete() NFS sillyrenamed files--they still exist. */ 1941 if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) { 1942 d_delete(dentry); 1943 } 1944 1945 return error; 1946 } 1947 1948 /* 1949 * Make sure that the actual truncation of the file will occur outside its 1950 * directory's i_sem. Truncate can take a long time if there is a lot of 1951 * writeout happening, and we don't want to prevent access to the directory 1952 * while waiting on the I/O. 1953 */ 1954 asmlinkage long sys_unlink(const char __user * pathname) 1955 { 1956 int error = 0; 1957 char * name; 1958 struct dentry *dentry; 1959 struct nameidata nd; 1960 struct inode *inode = NULL; 1961 1962 name = getname(pathname); 1963 if(IS_ERR(name)) 1964 return PTR_ERR(name); 1965 1966 error = path_lookup(name, LOOKUP_PARENT, &nd); 1967 if (error) 1968 goto exit; 1969 error = -EISDIR; 1970 if (nd.last_type != LAST_NORM) 1971 goto exit1; 1972 down(&nd.dentry->d_inode->i_sem); 1973 dentry = lookup_hash(&nd.last, nd.dentry); 1974 error = PTR_ERR(dentry); 1975 if (!IS_ERR(dentry)) { 1976 /* Why not before? Because we want correct error value */ 1977 if (nd.last.name[nd.last.len]) 1978 goto slashes; 1979 inode = dentry->d_inode; 1980 if (inode) 1981 atomic_inc(&inode->i_count); 1982 error = vfs_unlink(nd.dentry->d_inode, dentry); 1983 exit2: 1984 dput(dentry); 1985 } 1986 up(&nd.dentry->d_inode->i_sem); 1987 if (inode) 1988 iput(inode); /* truncate the inode here */ 1989 exit1: 1990 path_release(&nd); 1991 exit: 1992 putname(name); 1993 return error; 1994 1995 slashes: 1996 error = !dentry->d_inode ? -ENOENT : 1997 S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR; 1998 goto exit2; 1999 } 2000 2001 int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode) 2002 { 2003 int error = may_create(dir, dentry, NULL); 2004 2005 if (error) 2006 return error; 2007 2008 if (!dir->i_op || !dir->i_op->symlink) 2009 return -EPERM; 2010 2011 error = security_inode_symlink(dir, dentry, oldname); 2012 if (error) 2013 return error; 2014 2015 DQUOT_INIT(dir); 2016 error = dir->i_op->symlink(dir, dentry, oldname); 2017 if (!error) 2018 fsnotify_create(dir, dentry->d_name.name); 2019 return error; 2020 } 2021 2022 asmlinkage long sys_symlink(const char __user * oldname, const char __user * newname) 2023 { 2024 int error = 0; 2025 char * from; 2026 char * to; 2027 2028 from = getname(oldname); 2029 if(IS_ERR(from)) 2030 return PTR_ERR(from); 2031 to = getname(newname); 2032 error = PTR_ERR(to); 2033 if (!IS_ERR(to)) { 2034 struct dentry *dentry; 2035 struct nameidata nd; 2036 2037 error = path_lookup(to, LOOKUP_PARENT, &nd); 2038 if (error) 2039 goto out; 2040 dentry = lookup_create(&nd, 0); 2041 error = PTR_ERR(dentry); 2042 if (!IS_ERR(dentry)) { 2043 error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO); 2044 dput(dentry); 2045 } 2046 up(&nd.dentry->d_inode->i_sem); 2047 path_release(&nd); 2048 out: 2049 putname(to); 2050 } 2051 putname(from); 2052 return error; 2053 } 2054 2055 int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) 2056 { 2057 struct inode *inode = old_dentry->d_inode; 2058 int error; 2059 2060 if (!inode) 2061 return -ENOENT; 2062 2063 error = may_create(dir, new_dentry, NULL); 2064 if (error) 2065 return error; 2066 2067 if (dir->i_sb != inode->i_sb) 2068 return -EXDEV; 2069 2070 /* 2071 * A link to an append-only or immutable file cannot be created. 2072 */ 2073 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 2074 return -EPERM; 2075 if (!dir->i_op || !dir->i_op->link) 2076 return -EPERM; 2077 if (S_ISDIR(old_dentry->d_inode->i_mode)) 2078 return -EPERM; 2079 2080 error = security_inode_link(old_dentry, dir, new_dentry); 2081 if (error) 2082 return error; 2083 2084 down(&old_dentry->d_inode->i_sem); 2085 DQUOT_INIT(dir); 2086 error = dir->i_op->link(old_dentry, dir, new_dentry); 2087 up(&old_dentry->d_inode->i_sem); 2088 if (!error) 2089 fsnotify_create(dir, new_dentry->d_name.name); 2090 return error; 2091 } 2092 2093 /* 2094 * Hardlinks are often used in delicate situations. We avoid 2095 * security-related surprises by not following symlinks on the 2096 * newname. --KAB 2097 * 2098 * We don't follow them on the oldname either to be compatible 2099 * with linux 2.0, and to avoid hard-linking to directories 2100 * and other special files. --ADM 2101 */ 2102 asmlinkage long sys_link(const char __user * oldname, const char __user * newname) 2103 { 2104 struct dentry *new_dentry; 2105 struct nameidata nd, old_nd; 2106 int error; 2107 char * to; 2108 2109 to = getname(newname); 2110 if (IS_ERR(to)) 2111 return PTR_ERR(to); 2112 2113 error = __user_walk(oldname, 0, &old_nd); 2114 if (error) 2115 goto exit; 2116 error = path_lookup(to, LOOKUP_PARENT, &nd); 2117 if (error) 2118 goto out; 2119 error = -EXDEV; 2120 if (old_nd.mnt != nd.mnt) 2121 goto out_release; 2122 new_dentry = lookup_create(&nd, 0); 2123 error = PTR_ERR(new_dentry); 2124 if (!IS_ERR(new_dentry)) { 2125 error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); 2126 dput(new_dentry); 2127 } 2128 up(&nd.dentry->d_inode->i_sem); 2129 out_release: 2130 path_release(&nd); 2131 out: 2132 path_release(&old_nd); 2133 exit: 2134 putname(to); 2135 2136 return error; 2137 } 2138 2139 /* 2140 * The worst of all namespace operations - renaming directory. "Perverted" 2141 * doesn't even start to describe it. Somebody in UCB had a heck of a trip... 2142 * Problems: 2143 * a) we can get into loop creation. Check is done in is_subdir(). 2144 * b) race potential - two innocent renames can create a loop together. 2145 * That's where 4.4 screws up. Current fix: serialization on 2146 * sb->s_vfs_rename_sem. We might be more accurate, but that's another 2147 * story. 2148 * c) we have to lock _three_ objects - parents and victim (if it exists). 2149 * And that - after we got ->i_sem on parents (until then we don't know 2150 * whether the target exists). Solution: try to be smart with locking 2151 * order for inodes. We rely on the fact that tree topology may change 2152 * only under ->s_vfs_rename_sem _and_ that parent of the object we 2153 * move will be locked. Thus we can rank directories by the tree 2154 * (ancestors first) and rank all non-directories after them. 2155 * That works since everybody except rename does "lock parent, lookup, 2156 * lock child" and rename is under ->s_vfs_rename_sem. 2157 * HOWEVER, it relies on the assumption that any object with ->lookup() 2158 * has no more than 1 dentry. If "hybrid" objects will ever appear, 2159 * we'd better make sure that there's no link(2) for them. 2160 * d) some filesystems don't support opened-but-unlinked directories, 2161 * either because of layout or because they are not ready to deal with 2162 * all cases correctly. The latter will be fixed (taking this sort of 2163 * stuff into VFS), but the former is not going away. Solution: the same 2164 * trick as in rmdir(). 2165 * e) conversion from fhandle to dentry may come in the wrong moment - when 2166 * we are removing the target. Solution: we will have to grab ->i_sem 2167 * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on 2168 * ->i_sem on parents, which works but leads to some truely excessive 2169 * locking]. 2170 */ 2171 static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, 2172 struct inode *new_dir, struct dentry *new_dentry) 2173 { 2174 int error = 0; 2175 struct inode *target; 2176 2177 /* 2178 * If we are going to change the parent - check write permissions, 2179 * we'll need to flip '..'. 2180 */ 2181 if (new_dir != old_dir) { 2182 error = permission(old_dentry->d_inode, MAY_WRITE, NULL); 2183 if (error) 2184 return error; 2185 } 2186 2187 error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); 2188 if (error) 2189 return error; 2190 2191 target = new_dentry->d_inode; 2192 if (target) { 2193 down(&target->i_sem); 2194 dentry_unhash(new_dentry); 2195 } 2196 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) 2197 error = -EBUSY; 2198 else 2199 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2200 if (target) { 2201 if (!error) 2202 target->i_flags |= S_DEAD; 2203 up(&target->i_sem); 2204 if (d_unhashed(new_dentry)) 2205 d_rehash(new_dentry); 2206 dput(new_dentry); 2207 } 2208 if (!error) 2209 d_move(old_dentry,new_dentry); 2210 return error; 2211 } 2212 2213 static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, 2214 struct inode *new_dir, struct dentry *new_dentry) 2215 { 2216 struct inode *target; 2217 int error; 2218 2219 error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); 2220 if (error) 2221 return error; 2222 2223 dget(new_dentry); 2224 target = new_dentry->d_inode; 2225 if (target) 2226 down(&target->i_sem); 2227 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) 2228 error = -EBUSY; 2229 else 2230 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2231 if (!error) { 2232 /* The following d_move() should become unconditional */ 2233 if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) 2234 d_move(old_dentry, new_dentry); 2235 } 2236 if (target) 2237 up(&target->i_sem); 2238 dput(new_dentry); 2239 return error; 2240 } 2241 2242 int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, 2243 struct inode *new_dir, struct dentry *new_dentry) 2244 { 2245 int error; 2246 int is_dir = S_ISDIR(old_dentry->d_inode->i_mode); 2247 const char *old_name; 2248 2249 if (old_dentry->d_inode == new_dentry->d_inode) 2250 return 0; 2251 2252 error = may_delete(old_dir, old_dentry, is_dir); 2253 if (error) 2254 return error; 2255 2256 if (!new_dentry->d_inode) 2257 error = may_create(new_dir, new_dentry, NULL); 2258 else 2259 error = may_delete(new_dir, new_dentry, is_dir); 2260 if (error) 2261 return error; 2262 2263 if (!old_dir->i_op || !old_dir->i_op->rename) 2264 return -EPERM; 2265 2266 DQUOT_INIT(old_dir); 2267 DQUOT_INIT(new_dir); 2268 2269 old_name = fsnotify_oldname_init(old_dentry->d_name.name); 2270 2271 if (is_dir) 2272 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); 2273 else 2274 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); 2275 if (!error) { 2276 const char *new_name = old_dentry->d_name.name; 2277 fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir, 2278 new_dentry->d_inode, old_dentry->d_inode); 2279 } 2280 fsnotify_oldname_free(old_name); 2281 2282 return error; 2283 } 2284 2285 static inline int do_rename(const char * oldname, const char * newname) 2286 { 2287 int error = 0; 2288 struct dentry * old_dir, * new_dir; 2289 struct dentry * old_dentry, *new_dentry; 2290 struct dentry * trap; 2291 struct nameidata oldnd, newnd; 2292 2293 error = path_lookup(oldname, LOOKUP_PARENT, &oldnd); 2294 if (error) 2295 goto exit; 2296 2297 error = path_lookup(newname, LOOKUP_PARENT, &newnd); 2298 if (error) 2299 goto exit1; 2300 2301 error = -EXDEV; 2302 if (oldnd.mnt != newnd.mnt) 2303 goto exit2; 2304 2305 old_dir = oldnd.dentry; 2306 error = -EBUSY; 2307 if (oldnd.last_type != LAST_NORM) 2308 goto exit2; 2309 2310 new_dir = newnd.dentry; 2311 if (newnd.last_type != LAST_NORM) 2312 goto exit2; 2313 2314 trap = lock_rename(new_dir, old_dir); 2315 2316 old_dentry = lookup_hash(&oldnd.last, old_dir); 2317 error = PTR_ERR(old_dentry); 2318 if (IS_ERR(old_dentry)) 2319 goto exit3; 2320 /* source must exist */ 2321 error = -ENOENT; 2322 if (!old_dentry->d_inode) 2323 goto exit4; 2324 /* unless the source is a directory trailing slashes give -ENOTDIR */ 2325 if (!S_ISDIR(old_dentry->d_inode->i_mode)) { 2326 error = -ENOTDIR; 2327 if (oldnd.last.name[oldnd.last.len]) 2328 goto exit4; 2329 if (newnd.last.name[newnd.last.len]) 2330 goto exit4; 2331 } 2332 /* source should not be ancestor of target */ 2333 error = -EINVAL; 2334 if (old_dentry == trap) 2335 goto exit4; 2336 new_dentry = lookup_hash(&newnd.last, new_dir); 2337 error = PTR_ERR(new_dentry); 2338 if (IS_ERR(new_dentry)) 2339 goto exit4; 2340 /* target should not be an ancestor of source */ 2341 error = -ENOTEMPTY; 2342 if (new_dentry == trap) 2343 goto exit5; 2344 2345 error = vfs_rename(old_dir->d_inode, old_dentry, 2346 new_dir->d_inode, new_dentry); 2347 exit5: 2348 dput(new_dentry); 2349 exit4: 2350 dput(old_dentry); 2351 exit3: 2352 unlock_rename(new_dir, old_dir); 2353 exit2: 2354 path_release(&newnd); 2355 exit1: 2356 path_release(&oldnd); 2357 exit: 2358 return error; 2359 } 2360 2361 asmlinkage long sys_rename(const char __user * oldname, const char __user * newname) 2362 { 2363 int error; 2364 char * from; 2365 char * to; 2366 2367 from = getname(oldname); 2368 if(IS_ERR(from)) 2369 return PTR_ERR(from); 2370 to = getname(newname); 2371 error = PTR_ERR(to); 2372 if (!IS_ERR(to)) { 2373 error = do_rename(from,to); 2374 putname(to); 2375 } 2376 putname(from); 2377 return error; 2378 } 2379 2380 int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link) 2381 { 2382 int len; 2383 2384 len = PTR_ERR(link); 2385 if (IS_ERR(link)) 2386 goto out; 2387 2388 len = strlen(link); 2389 if (len > (unsigned) buflen) 2390 len = buflen; 2391 if (copy_to_user(buffer, link, len)) 2392 len = -EFAULT; 2393 out: 2394 return len; 2395 } 2396 2397 /* 2398 * A helper for ->readlink(). This should be used *ONLY* for symlinks that 2399 * have ->follow_link() touching nd only in nd_set_link(). Using (or not 2400 * using) it for any given inode is up to filesystem. 2401 */ 2402 int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) 2403 { 2404 struct nameidata nd; 2405 void *cookie; 2406 2407 nd.depth = 0; 2408 cookie = dentry->d_inode->i_op->follow_link(dentry, &nd); 2409 if (!IS_ERR(cookie)) { 2410 int res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); 2411 if (dentry->d_inode->i_op->put_link) 2412 dentry->d_inode->i_op->put_link(dentry, &nd, cookie); 2413 cookie = ERR_PTR(res); 2414 } 2415 return PTR_ERR(cookie); 2416 } 2417 2418 int vfs_follow_link(struct nameidata *nd, const char *link) 2419 { 2420 return __vfs_follow_link(nd, link); 2421 } 2422 2423 /* get the link contents into pagecache */ 2424 static char *page_getlink(struct dentry * dentry, struct page **ppage) 2425 { 2426 struct page * page; 2427 struct address_space *mapping = dentry->d_inode->i_mapping; 2428 page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage, 2429 NULL); 2430 if (IS_ERR(page)) 2431 goto sync_fail; 2432 wait_on_page_locked(page); 2433 if (!PageUptodate(page)) 2434 goto async_fail; 2435 *ppage = page; 2436 return kmap(page); 2437 2438 async_fail: 2439 page_cache_release(page); 2440 return ERR_PTR(-EIO); 2441 2442 sync_fail: 2443 return (char*)page; 2444 } 2445 2446 int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) 2447 { 2448 struct page *page = NULL; 2449 char *s = page_getlink(dentry, &page); 2450 int res = vfs_readlink(dentry,buffer,buflen,s); 2451 if (page) { 2452 kunmap(page); 2453 page_cache_release(page); 2454 } 2455 return res; 2456 } 2457 2458 void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd) 2459 { 2460 struct page *page = NULL; 2461 nd_set_link(nd, page_getlink(dentry, &page)); 2462 return page; 2463 } 2464 2465 void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) 2466 { 2467 struct page *page = cookie; 2468 2469 if (page) { 2470 kunmap(page); 2471 page_cache_release(page); 2472 } 2473 } 2474 2475 int page_symlink(struct inode *inode, const char *symname, int len) 2476 { 2477 struct address_space *mapping = inode->i_mapping; 2478 struct page *page = grab_cache_page(mapping, 0); 2479 int err = -ENOMEM; 2480 char *kaddr; 2481 2482 if (!page) 2483 goto fail; 2484 err = mapping->a_ops->prepare_write(NULL, page, 0, len-1); 2485 if (err) 2486 goto fail_map; 2487 kaddr = kmap_atomic(page, KM_USER0); 2488 memcpy(kaddr, symname, len-1); 2489 kunmap_atomic(kaddr, KM_USER0); 2490 mapping->a_ops->commit_write(NULL, page, 0, len-1); 2491 /* 2492 * Notice that we are _not_ going to block here - end of page is 2493 * unmapped, so this will only try to map the rest of page, see 2494 * that it is unmapped (typically even will not look into inode - 2495 * ->i_size will be enough for everything) and zero it out. 2496 * OTOH it's obviously correct and should make the page up-to-date. 2497 */ 2498 if (!PageUptodate(page)) { 2499 err = mapping->a_ops->readpage(NULL, page); 2500 wait_on_page_locked(page); 2501 } else { 2502 unlock_page(page); 2503 } 2504 page_cache_release(page); 2505 if (err < 0) 2506 goto fail; 2507 mark_inode_dirty(inode); 2508 return 0; 2509 fail_map: 2510 unlock_page(page); 2511 page_cache_release(page); 2512 fail: 2513 return err; 2514 } 2515 2516 struct inode_operations page_symlink_inode_operations = { 2517 .readlink = generic_readlink, 2518 .follow_link = page_follow_link_light, 2519 .put_link = page_put_link, 2520 }; 2521 2522 EXPORT_SYMBOL(__user_walk); 2523 EXPORT_SYMBOL(follow_down); 2524 EXPORT_SYMBOL(follow_up); 2525 EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ 2526 EXPORT_SYMBOL(getname); 2527 EXPORT_SYMBOL(lock_rename); 2528 EXPORT_SYMBOL(lookup_hash); 2529 EXPORT_SYMBOL(lookup_one_len); 2530 EXPORT_SYMBOL(page_follow_link_light); 2531 EXPORT_SYMBOL(page_put_link); 2532 EXPORT_SYMBOL(page_readlink); 2533 EXPORT_SYMBOL(page_symlink); 2534 EXPORT_SYMBOL(page_symlink_inode_operations); 2535 EXPORT_SYMBOL(path_lookup); 2536 EXPORT_SYMBOL(path_release); 2537 EXPORT_SYMBOL(path_walk); 2538 EXPORT_SYMBOL(permission); 2539 EXPORT_SYMBOL(unlock_rename); 2540 EXPORT_SYMBOL(vfs_create); 2541 EXPORT_SYMBOL(vfs_follow_link); 2542 EXPORT_SYMBOL(vfs_link); 2543 EXPORT_SYMBOL(vfs_mkdir); 2544 EXPORT_SYMBOL(vfs_mknod); 2545 EXPORT_SYMBOL(generic_permission); 2546 EXPORT_SYMBOL(vfs_readlink); 2547 EXPORT_SYMBOL(vfs_rename); 2548 EXPORT_SYMBOL(vfs_rmdir); 2549 EXPORT_SYMBOL(vfs_symlink); 2550 EXPORT_SYMBOL(vfs_unlink); 2551 EXPORT_SYMBOL(dentry_unhash); 2552 EXPORT_SYMBOL(generic_readlink); 2553