1 /* 2 * linux/fs/namei.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 /* 8 * Some corrections by tytso. 9 */ 10 11 /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname 12 * lookup logic. 13 */ 14 /* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture. 15 */ 16 17 #include <linux/init.h> 18 #include <linux/module.h> 19 #include <linux/slab.h> 20 #include <linux/fs.h> 21 #include <linux/namei.h> 22 #include <linux/quotaops.h> 23 #include <linux/pagemap.h> 24 #include <linux/fsnotify.h> 25 #include <linux/personality.h> 26 #include <linux/security.h> 27 #include <linux/syscalls.h> 28 #include <linux/mount.h> 29 #include <linux/audit.h> 30 #include <linux/capability.h> 31 #include <linux/file.h> 32 #include <linux/fcntl.h> 33 #include <linux/namei.h> 34 #include <asm/namei.h> 35 #include <asm/uaccess.h> 36 37 #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) 38 39 /* [Feb-1997 T. Schoebel-Theuer] 40 * Fundamental changes in the pathname lookup mechanisms (namei) 41 * were necessary because of omirr. The reason is that omirr needs 42 * to know the _real_ pathname, not the user-supplied one, in case 43 * of symlinks (and also when transname replacements occur). 44 * 45 * The new code replaces the old recursive symlink resolution with 46 * an iterative one (in case of non-nested symlink chains). It does 47 * this with calls to <fs>_follow_link(). 48 * As a side effect, dir_namei(), _namei() and follow_link() are now 49 * replaced with a single function lookup_dentry() that can handle all 50 * the special cases of the former code. 51 * 52 * With the new dcache, the pathname is stored at each inode, at least as 53 * long as the refcount of the inode is positive. As a side effect, the 54 * size of the dcache depends on the inode cache and thus is dynamic. 55 * 56 * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink 57 * resolution to correspond with current state of the code. 58 * 59 * Note that the symlink resolution is not *completely* iterative. 60 * There is still a significant amount of tail- and mid- recursion in 61 * the algorithm. Also, note that <fs>_readlink() is not used in 62 * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink() 63 * may return different results than <fs>_follow_link(). Many virtual 64 * filesystems (including /proc) exhibit this behavior. 65 */ 66 67 /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation: 68 * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL 69 * and the name already exists in form of a symlink, try to create the new 70 * name indicated by the symlink. The old code always complained that the 71 * name already exists, due to not following the symlink even if its target 72 * is nonexistent. The new semantics affects also mknod() and link() when 73 * the name is a symlink pointing to a non-existant name. 74 * 75 * I don't know which semantics is the right one, since I have no access 76 * to standards. But I found by trial that HP-UX 9.0 has the full "new" 77 * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the 78 * "old" one. Personally, I think the new semantics is much more logical. 79 * Note that "ln old new" where "new" is a symlink pointing to a non-existing 80 * file does succeed in both HP-UX and SunOs, but not in Solaris 81 * and in the old Linux semantics. 82 */ 83 84 /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink 85 * semantics. See the comments in "open_namei" and "do_link" below. 86 * 87 * [10-Sep-98 Alan Modra] Another symlink change. 88 */ 89 90 /* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks: 91 * inside the path - always follow. 92 * in the last component in creation/removal/renaming - never follow. 93 * if LOOKUP_FOLLOW passed - follow. 94 * if the pathname has trailing slashes - follow. 95 * otherwise - don't follow. 96 * (applied in that order). 97 * 98 * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT 99 * restored for 2.4. This is the last surviving part of old 4.2BSD bug. 100 * During the 2.4 we need to fix the userland stuff depending on it - 101 * hopefully we will be able to get rid of that wart in 2.5. So far only 102 * XEmacs seems to be relying on it... 103 */ 104 /* 105 * [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland) 106 * implemented. Let's see if raised priority of ->s_vfs_rename_mutex gives 107 * any extra contention... 108 */ 109 110 static int fastcall link_path_walk(const char *name, struct nameidata *nd); 111 112 /* In order to reduce some races, while at the same time doing additional 113 * checking and hopefully speeding things up, we copy filenames to the 114 * kernel data space before using them.. 115 * 116 * POSIX.1 2.4: an empty pathname is invalid (ENOENT). 117 * PATH_MAX includes the nul terminator --RR. 118 */ 119 static int do_getname(const char __user *filename, char *page) 120 { 121 int retval; 122 unsigned long len = PATH_MAX; 123 124 if (!segment_eq(get_fs(), KERNEL_DS)) { 125 if ((unsigned long) filename >= TASK_SIZE) 126 return -EFAULT; 127 if (TASK_SIZE - (unsigned long) filename < PATH_MAX) 128 len = TASK_SIZE - (unsigned long) filename; 129 } 130 131 retval = strncpy_from_user(page, filename, len); 132 if (retval > 0) { 133 if (retval < len) 134 return 0; 135 return -ENAMETOOLONG; 136 } else if (!retval) 137 retval = -ENOENT; 138 return retval; 139 } 140 141 char * getname(const char __user * filename) 142 { 143 char *tmp, *result; 144 145 result = ERR_PTR(-ENOMEM); 146 tmp = __getname(); 147 if (tmp) { 148 int retval = do_getname(filename, tmp); 149 150 result = tmp; 151 if (retval < 0) { 152 __putname(tmp); 153 result = ERR_PTR(retval); 154 } 155 } 156 audit_getname(result); 157 return result; 158 } 159 160 #ifdef CONFIG_AUDITSYSCALL 161 void putname(const char *name) 162 { 163 if (unlikely(!audit_dummy_context())) 164 audit_putname(name); 165 else 166 __putname(name); 167 } 168 EXPORT_SYMBOL(putname); 169 #endif 170 171 172 /** 173 * generic_permission - check for access rights on a Posix-like filesystem 174 * @inode: inode to check access rights for 175 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) 176 * @check_acl: optional callback to check for Posix ACLs 177 * 178 * Used to check for read/write/execute permissions on a file. 179 * We use "fsuid" for this, letting us set arbitrary permissions 180 * for filesystem access without changing the "normal" uids which 181 * are used for other things.. 182 */ 183 int generic_permission(struct inode *inode, int mask, 184 int (*check_acl)(struct inode *inode, int mask)) 185 { 186 umode_t mode = inode->i_mode; 187 188 if (current->fsuid == inode->i_uid) 189 mode >>= 6; 190 else { 191 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { 192 int error = check_acl(inode, mask); 193 if (error == -EACCES) 194 goto check_capabilities; 195 else if (error != -EAGAIN) 196 return error; 197 } 198 199 if (in_group_p(inode->i_gid)) 200 mode >>= 3; 201 } 202 203 /* 204 * If the DACs are ok we don't need any capability check. 205 */ 206 if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)) 207 return 0; 208 209 check_capabilities: 210 /* 211 * Read/write DACs are always overridable. 212 * Executable DACs are overridable if at least one exec bit is set. 213 */ 214 if (!(mask & MAY_EXEC) || 215 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode)) 216 if (capable(CAP_DAC_OVERRIDE)) 217 return 0; 218 219 /* 220 * Searching includes executable on directories, else just read. 221 */ 222 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) 223 if (capable(CAP_DAC_READ_SEARCH)) 224 return 0; 225 226 return -EACCES; 227 } 228 229 int permission(struct inode *inode, int mask, struct nameidata *nd) 230 { 231 umode_t mode = inode->i_mode; 232 int retval, submask; 233 234 if (mask & MAY_WRITE) { 235 236 /* 237 * Nobody gets write access to a read-only fs. 238 */ 239 if (IS_RDONLY(inode) && 240 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) 241 return -EROFS; 242 243 /* 244 * Nobody gets write access to an immutable file. 245 */ 246 if (IS_IMMUTABLE(inode)) 247 return -EACCES; 248 } 249 250 251 /* 252 * MAY_EXEC on regular files requires special handling: We override 253 * filesystem execute permissions if the mode bits aren't set or 254 * the fs is mounted with the "noexec" flag. 255 */ 256 if ((mask & MAY_EXEC) && S_ISREG(mode) && (!(mode & S_IXUGO) || 257 (nd && nd->mnt && (nd->mnt->mnt_flags & MNT_NOEXEC)))) 258 return -EACCES; 259 260 /* Ordinary permission routines do not understand MAY_APPEND. */ 261 submask = mask & ~MAY_APPEND; 262 if (inode->i_op && inode->i_op->permission) 263 retval = inode->i_op->permission(inode, submask, nd); 264 else 265 retval = generic_permission(inode, submask, NULL); 266 if (retval) 267 return retval; 268 269 return security_inode_permission(inode, mask, nd); 270 } 271 272 /** 273 * vfs_permission - check for access rights to a given path 274 * @nd: lookup result that describes the path 275 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) 276 * 277 * Used to check for read/write/execute permissions on a path. 278 * We use "fsuid" for this, letting us set arbitrary permissions 279 * for filesystem access without changing the "normal" uids which 280 * are used for other things. 281 */ 282 int vfs_permission(struct nameidata *nd, int mask) 283 { 284 return permission(nd->dentry->d_inode, mask, nd); 285 } 286 287 /** 288 * file_permission - check for additional access rights to a given file 289 * @file: file to check access rights for 290 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) 291 * 292 * Used to check for read/write/execute permissions on an already opened 293 * file. 294 * 295 * Note: 296 * Do not use this function in new code. All access checks should 297 * be done using vfs_permission(). 298 */ 299 int file_permission(struct file *file, int mask) 300 { 301 return permission(file->f_path.dentry->d_inode, mask, NULL); 302 } 303 304 /* 305 * get_write_access() gets write permission for a file. 306 * put_write_access() releases this write permission. 307 * This is used for regular files. 308 * We cannot support write (and maybe mmap read-write shared) accesses and 309 * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode 310 * can have the following values: 311 * 0: no writers, no VM_DENYWRITE mappings 312 * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist 313 * > 0: (i_writecount) users are writing to the file. 314 * 315 * Normally we operate on that counter with atomic_{inc,dec} and it's safe 316 * except for the cases where we don't hold i_writecount yet. Then we need to 317 * use {get,deny}_write_access() - these functions check the sign and refuse 318 * to do the change if sign is wrong. Exclusion between them is provided by 319 * the inode->i_lock spinlock. 320 */ 321 322 int get_write_access(struct inode * inode) 323 { 324 spin_lock(&inode->i_lock); 325 if (atomic_read(&inode->i_writecount) < 0) { 326 spin_unlock(&inode->i_lock); 327 return -ETXTBSY; 328 } 329 atomic_inc(&inode->i_writecount); 330 spin_unlock(&inode->i_lock); 331 332 return 0; 333 } 334 335 int deny_write_access(struct file * file) 336 { 337 struct inode *inode = file->f_path.dentry->d_inode; 338 339 spin_lock(&inode->i_lock); 340 if (atomic_read(&inode->i_writecount) > 0) { 341 spin_unlock(&inode->i_lock); 342 return -ETXTBSY; 343 } 344 atomic_dec(&inode->i_writecount); 345 spin_unlock(&inode->i_lock); 346 347 return 0; 348 } 349 350 void path_release(struct nameidata *nd) 351 { 352 dput(nd->dentry); 353 mntput(nd->mnt); 354 } 355 356 /* 357 * umount() mustn't call path_release()/mntput() as that would clear 358 * mnt_expiry_mark 359 */ 360 void path_release_on_umount(struct nameidata *nd) 361 { 362 dput(nd->dentry); 363 mntput_no_expire(nd->mnt); 364 } 365 366 /** 367 * release_open_intent - free up open intent resources 368 * @nd: pointer to nameidata 369 */ 370 void release_open_intent(struct nameidata *nd) 371 { 372 if (nd->intent.open.file->f_path.dentry == NULL) 373 put_filp(nd->intent.open.file); 374 else 375 fput(nd->intent.open.file); 376 } 377 378 static inline struct dentry * 379 do_revalidate(struct dentry *dentry, struct nameidata *nd) 380 { 381 int status = dentry->d_op->d_revalidate(dentry, nd); 382 if (unlikely(status <= 0)) { 383 /* 384 * The dentry failed validation. 385 * If d_revalidate returned 0 attempt to invalidate 386 * the dentry otherwise d_revalidate is asking us 387 * to return a fail status. 388 */ 389 if (!status) { 390 if (!d_invalidate(dentry)) { 391 dput(dentry); 392 dentry = NULL; 393 } 394 } else { 395 dput(dentry); 396 dentry = ERR_PTR(status); 397 } 398 } 399 return dentry; 400 } 401 402 /* 403 * Internal lookup() using the new generic dcache. 404 * SMP-safe 405 */ 406 static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd) 407 { 408 struct dentry * dentry = __d_lookup(parent, name); 409 410 /* lockess __d_lookup may fail due to concurrent d_move() 411 * in some unrelated directory, so try with d_lookup 412 */ 413 if (!dentry) 414 dentry = d_lookup(parent, name); 415 416 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) 417 dentry = do_revalidate(dentry, nd); 418 419 return dentry; 420 } 421 422 /* 423 * Short-cut version of permission(), for calling by 424 * path_walk(), when dcache lock is held. Combines parts 425 * of permission() and generic_permission(), and tests ONLY for 426 * MAY_EXEC permission. 427 * 428 * If appropriate, check DAC only. If not appropriate, or 429 * short-cut DAC fails, then call permission() to do more 430 * complete permission check. 431 */ 432 static int exec_permission_lite(struct inode *inode, 433 struct nameidata *nd) 434 { 435 umode_t mode = inode->i_mode; 436 437 if (inode->i_op && inode->i_op->permission) 438 return -EAGAIN; 439 440 if (current->fsuid == inode->i_uid) 441 mode >>= 6; 442 else if (in_group_p(inode->i_gid)) 443 mode >>= 3; 444 445 if (mode & MAY_EXEC) 446 goto ok; 447 448 if ((inode->i_mode & S_IXUGO) && capable(CAP_DAC_OVERRIDE)) 449 goto ok; 450 451 if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_OVERRIDE)) 452 goto ok; 453 454 if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_READ_SEARCH)) 455 goto ok; 456 457 return -EACCES; 458 ok: 459 return security_inode_permission(inode, MAY_EXEC, nd); 460 } 461 462 /* 463 * This is called when everything else fails, and we actually have 464 * to go to the low-level filesystem to find out what we should do.. 465 * 466 * We get the directory semaphore, and after getting that we also 467 * make sure that nobody added the entry to the dcache in the meantime.. 468 * SMP-safe 469 */ 470 static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd) 471 { 472 struct dentry * result; 473 struct inode *dir = parent->d_inode; 474 475 mutex_lock(&dir->i_mutex); 476 /* 477 * First re-do the cached lookup just in case it was created 478 * while we waited for the directory semaphore.. 479 * 480 * FIXME! This could use version numbering or similar to 481 * avoid unnecessary cache lookups. 482 * 483 * The "dcache_lock" is purely to protect the RCU list walker 484 * from concurrent renames at this point (we mustn't get false 485 * negatives from the RCU list walk here, unlike the optimistic 486 * fast walk). 487 * 488 * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup 489 */ 490 result = d_lookup(parent, name); 491 if (!result) { 492 struct dentry * dentry = d_alloc(parent, name); 493 result = ERR_PTR(-ENOMEM); 494 if (dentry) { 495 result = dir->i_op->lookup(dir, dentry, nd); 496 if (result) 497 dput(dentry); 498 else 499 result = dentry; 500 } 501 mutex_unlock(&dir->i_mutex); 502 return result; 503 } 504 505 /* 506 * Uhhuh! Nasty case: the cache was re-populated while 507 * we waited on the semaphore. Need to revalidate. 508 */ 509 mutex_unlock(&dir->i_mutex); 510 if (result->d_op && result->d_op->d_revalidate) { 511 result = do_revalidate(result, nd); 512 if (!result) 513 result = ERR_PTR(-ENOENT); 514 } 515 return result; 516 } 517 518 static int __emul_lookup_dentry(const char *, struct nameidata *); 519 520 /* SMP-safe */ 521 static __always_inline int 522 walk_init_root(const char *name, struct nameidata *nd) 523 { 524 struct fs_struct *fs = current->fs; 525 526 read_lock(&fs->lock); 527 if (fs->altroot && !(nd->flags & LOOKUP_NOALT)) { 528 nd->mnt = mntget(fs->altrootmnt); 529 nd->dentry = dget(fs->altroot); 530 read_unlock(&fs->lock); 531 if (__emul_lookup_dentry(name,nd)) 532 return 0; 533 read_lock(&fs->lock); 534 } 535 nd->mnt = mntget(fs->rootmnt); 536 nd->dentry = dget(fs->root); 537 read_unlock(&fs->lock); 538 return 1; 539 } 540 541 static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) 542 { 543 int res = 0; 544 char *name; 545 if (IS_ERR(link)) 546 goto fail; 547 548 if (*link == '/') { 549 path_release(nd); 550 if (!walk_init_root(link, nd)) 551 /* weird __emul_prefix() stuff did it */ 552 goto out; 553 } 554 res = link_path_walk(link, nd); 555 out: 556 if (nd->depth || res || nd->last_type!=LAST_NORM) 557 return res; 558 /* 559 * If it is an iterative symlinks resolution in open_namei() we 560 * have to copy the last component. And all that crap because of 561 * bloody create() on broken symlinks. Furrfu... 562 */ 563 name = __getname(); 564 if (unlikely(!name)) { 565 path_release(nd); 566 return -ENOMEM; 567 } 568 strcpy(name, nd->last.name); 569 nd->last.name = name; 570 return 0; 571 fail: 572 path_release(nd); 573 return PTR_ERR(link); 574 } 575 576 static inline void dput_path(struct path *path, struct nameidata *nd) 577 { 578 dput(path->dentry); 579 if (path->mnt != nd->mnt) 580 mntput(path->mnt); 581 } 582 583 static inline void path_to_nameidata(struct path *path, struct nameidata *nd) 584 { 585 dput(nd->dentry); 586 if (nd->mnt != path->mnt) 587 mntput(nd->mnt); 588 nd->mnt = path->mnt; 589 nd->dentry = path->dentry; 590 } 591 592 static __always_inline int __do_follow_link(struct path *path, struct nameidata *nd) 593 { 594 int error; 595 void *cookie; 596 struct dentry *dentry = path->dentry; 597 598 touch_atime(path->mnt, dentry); 599 nd_set_link(nd, NULL); 600 601 if (path->mnt != nd->mnt) { 602 path_to_nameidata(path, nd); 603 dget(dentry); 604 } 605 mntget(path->mnt); 606 cookie = dentry->d_inode->i_op->follow_link(dentry, nd); 607 error = PTR_ERR(cookie); 608 if (!IS_ERR(cookie)) { 609 char *s = nd_get_link(nd); 610 error = 0; 611 if (s) 612 error = __vfs_follow_link(nd, s); 613 if (dentry->d_inode->i_op->put_link) 614 dentry->d_inode->i_op->put_link(dentry, nd, cookie); 615 } 616 dput(dentry); 617 mntput(path->mnt); 618 619 return error; 620 } 621 622 /* 623 * This limits recursive symlink follows to 8, while 624 * limiting consecutive symlinks to 40. 625 * 626 * Without that kind of total limit, nasty chains of consecutive 627 * symlinks can cause almost arbitrarily long lookups. 628 */ 629 static inline int do_follow_link(struct path *path, struct nameidata *nd) 630 { 631 int err = -ELOOP; 632 if (current->link_count >= MAX_NESTED_LINKS) 633 goto loop; 634 if (current->total_link_count >= 40) 635 goto loop; 636 BUG_ON(nd->depth >= MAX_NESTED_LINKS); 637 cond_resched(); 638 err = security_inode_follow_link(path->dentry, nd); 639 if (err) 640 goto loop; 641 current->link_count++; 642 current->total_link_count++; 643 nd->depth++; 644 err = __do_follow_link(path, nd); 645 current->link_count--; 646 nd->depth--; 647 return err; 648 loop: 649 dput_path(path, nd); 650 path_release(nd); 651 return err; 652 } 653 654 int follow_up(struct vfsmount **mnt, struct dentry **dentry) 655 { 656 struct vfsmount *parent; 657 struct dentry *mountpoint; 658 spin_lock(&vfsmount_lock); 659 parent=(*mnt)->mnt_parent; 660 if (parent == *mnt) { 661 spin_unlock(&vfsmount_lock); 662 return 0; 663 } 664 mntget(parent); 665 mountpoint=dget((*mnt)->mnt_mountpoint); 666 spin_unlock(&vfsmount_lock); 667 dput(*dentry); 668 *dentry = mountpoint; 669 mntput(*mnt); 670 *mnt = parent; 671 return 1; 672 } 673 674 /* no need for dcache_lock, as serialization is taken care in 675 * namespace.c 676 */ 677 static int __follow_mount(struct path *path) 678 { 679 int res = 0; 680 while (d_mountpoint(path->dentry)) { 681 struct vfsmount *mounted = lookup_mnt(path->mnt, path->dentry); 682 if (!mounted) 683 break; 684 dput(path->dentry); 685 if (res) 686 mntput(path->mnt); 687 path->mnt = mounted; 688 path->dentry = dget(mounted->mnt_root); 689 res = 1; 690 } 691 return res; 692 } 693 694 static void follow_mount(struct vfsmount **mnt, struct dentry **dentry) 695 { 696 while (d_mountpoint(*dentry)) { 697 struct vfsmount *mounted = lookup_mnt(*mnt, *dentry); 698 if (!mounted) 699 break; 700 dput(*dentry); 701 mntput(*mnt); 702 *mnt = mounted; 703 *dentry = dget(mounted->mnt_root); 704 } 705 } 706 707 /* no need for dcache_lock, as serialization is taken care in 708 * namespace.c 709 */ 710 int follow_down(struct vfsmount **mnt, struct dentry **dentry) 711 { 712 struct vfsmount *mounted; 713 714 mounted = lookup_mnt(*mnt, *dentry); 715 if (mounted) { 716 dput(*dentry); 717 mntput(*mnt); 718 *mnt = mounted; 719 *dentry = dget(mounted->mnt_root); 720 return 1; 721 } 722 return 0; 723 } 724 725 static __always_inline void follow_dotdot(struct nameidata *nd) 726 { 727 struct fs_struct *fs = current->fs; 728 729 while(1) { 730 struct vfsmount *parent; 731 struct dentry *old = nd->dentry; 732 733 read_lock(&fs->lock); 734 if (nd->dentry == fs->root && 735 nd->mnt == fs->rootmnt) { 736 read_unlock(&fs->lock); 737 break; 738 } 739 read_unlock(&fs->lock); 740 spin_lock(&dcache_lock); 741 if (nd->dentry != nd->mnt->mnt_root) { 742 nd->dentry = dget(nd->dentry->d_parent); 743 spin_unlock(&dcache_lock); 744 dput(old); 745 break; 746 } 747 spin_unlock(&dcache_lock); 748 spin_lock(&vfsmount_lock); 749 parent = nd->mnt->mnt_parent; 750 if (parent == nd->mnt) { 751 spin_unlock(&vfsmount_lock); 752 break; 753 } 754 mntget(parent); 755 nd->dentry = dget(nd->mnt->mnt_mountpoint); 756 spin_unlock(&vfsmount_lock); 757 dput(old); 758 mntput(nd->mnt); 759 nd->mnt = parent; 760 } 761 follow_mount(&nd->mnt, &nd->dentry); 762 } 763 764 /* 765 * It's more convoluted than I'd like it to be, but... it's still fairly 766 * small and for now I'd prefer to have fast path as straight as possible. 767 * It _is_ time-critical. 768 */ 769 static int do_lookup(struct nameidata *nd, struct qstr *name, 770 struct path *path) 771 { 772 struct vfsmount *mnt = nd->mnt; 773 struct dentry *dentry = __d_lookup(nd->dentry, name); 774 775 if (!dentry) 776 goto need_lookup; 777 if (dentry->d_op && dentry->d_op->d_revalidate) 778 goto need_revalidate; 779 done: 780 path->mnt = mnt; 781 path->dentry = dentry; 782 __follow_mount(path); 783 return 0; 784 785 need_lookup: 786 dentry = real_lookup(nd->dentry, name, nd); 787 if (IS_ERR(dentry)) 788 goto fail; 789 goto done; 790 791 need_revalidate: 792 dentry = do_revalidate(dentry, nd); 793 if (!dentry) 794 goto need_lookup; 795 if (IS_ERR(dentry)) 796 goto fail; 797 goto done; 798 799 fail: 800 return PTR_ERR(dentry); 801 } 802 803 /* 804 * Name resolution. 805 * This is the basic name resolution function, turning a pathname into 806 * the final dentry. We expect 'base' to be positive and a directory. 807 * 808 * Returns 0 and nd will have valid dentry and mnt on success. 809 * Returns error and drops reference to input namei data on failure. 810 */ 811 static fastcall int __link_path_walk(const char * name, struct nameidata *nd) 812 { 813 struct path next; 814 struct inode *inode; 815 int err; 816 unsigned int lookup_flags = nd->flags; 817 818 while (*name=='/') 819 name++; 820 if (!*name) 821 goto return_reval; 822 823 inode = nd->dentry->d_inode; 824 if (nd->depth) 825 lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE); 826 827 /* At this point we know we have a real path component. */ 828 for(;;) { 829 unsigned long hash; 830 struct qstr this; 831 unsigned int c; 832 833 nd->flags |= LOOKUP_CONTINUE; 834 err = exec_permission_lite(inode, nd); 835 if (err == -EAGAIN) 836 err = vfs_permission(nd, MAY_EXEC); 837 if (err) 838 break; 839 840 this.name = name; 841 c = *(const unsigned char *)name; 842 843 hash = init_name_hash(); 844 do { 845 name++; 846 hash = partial_name_hash(c, hash); 847 c = *(const unsigned char *)name; 848 } while (c && (c != '/')); 849 this.len = name - (const char *) this.name; 850 this.hash = end_name_hash(hash); 851 852 /* remove trailing slashes? */ 853 if (!c) 854 goto last_component; 855 while (*++name == '/'); 856 if (!*name) 857 goto last_with_slashes; 858 859 /* 860 * "." and ".." are special - ".." especially so because it has 861 * to be able to know about the current root directory and 862 * parent relationships. 863 */ 864 if (this.name[0] == '.') switch (this.len) { 865 default: 866 break; 867 case 2: 868 if (this.name[1] != '.') 869 break; 870 follow_dotdot(nd); 871 inode = nd->dentry->d_inode; 872 /* fallthrough */ 873 case 1: 874 continue; 875 } 876 /* 877 * See if the low-level filesystem might want 878 * to use its own hash.. 879 */ 880 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { 881 err = nd->dentry->d_op->d_hash(nd->dentry, &this); 882 if (err < 0) 883 break; 884 } 885 /* This does the actual lookups.. */ 886 err = do_lookup(nd, &this, &next); 887 if (err) 888 break; 889 890 err = -ENOENT; 891 inode = next.dentry->d_inode; 892 if (!inode) 893 goto out_dput; 894 err = -ENOTDIR; 895 if (!inode->i_op) 896 goto out_dput; 897 898 if (inode->i_op->follow_link) { 899 err = do_follow_link(&next, nd); 900 if (err) 901 goto return_err; 902 err = -ENOENT; 903 inode = nd->dentry->d_inode; 904 if (!inode) 905 break; 906 err = -ENOTDIR; 907 if (!inode->i_op) 908 break; 909 } else 910 path_to_nameidata(&next, nd); 911 err = -ENOTDIR; 912 if (!inode->i_op->lookup) 913 break; 914 continue; 915 /* here ends the main loop */ 916 917 last_with_slashes: 918 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; 919 last_component: 920 /* Clear LOOKUP_CONTINUE iff it was previously unset */ 921 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE; 922 if (lookup_flags & LOOKUP_PARENT) 923 goto lookup_parent; 924 if (this.name[0] == '.') switch (this.len) { 925 default: 926 break; 927 case 2: 928 if (this.name[1] != '.') 929 break; 930 follow_dotdot(nd); 931 inode = nd->dentry->d_inode; 932 /* fallthrough */ 933 case 1: 934 goto return_reval; 935 } 936 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { 937 err = nd->dentry->d_op->d_hash(nd->dentry, &this); 938 if (err < 0) 939 break; 940 } 941 err = do_lookup(nd, &this, &next); 942 if (err) 943 break; 944 inode = next.dentry->d_inode; 945 if ((lookup_flags & LOOKUP_FOLLOW) 946 && inode && inode->i_op && inode->i_op->follow_link) { 947 err = do_follow_link(&next, nd); 948 if (err) 949 goto return_err; 950 inode = nd->dentry->d_inode; 951 } else 952 path_to_nameidata(&next, nd); 953 err = -ENOENT; 954 if (!inode) 955 break; 956 if (lookup_flags & LOOKUP_DIRECTORY) { 957 err = -ENOTDIR; 958 if (!inode->i_op || !inode->i_op->lookup) 959 break; 960 } 961 goto return_base; 962 lookup_parent: 963 nd->last = this; 964 nd->last_type = LAST_NORM; 965 if (this.name[0] != '.') 966 goto return_base; 967 if (this.len == 1) 968 nd->last_type = LAST_DOT; 969 else if (this.len == 2 && this.name[1] == '.') 970 nd->last_type = LAST_DOTDOT; 971 else 972 goto return_base; 973 return_reval: 974 /* 975 * We bypassed the ordinary revalidation routines. 976 * We may need to check the cached dentry for staleness. 977 */ 978 if (nd->dentry && nd->dentry->d_sb && 979 (nd->dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) { 980 err = -ESTALE; 981 /* Note: we do not d_invalidate() */ 982 if (!nd->dentry->d_op->d_revalidate(nd->dentry, nd)) 983 break; 984 } 985 return_base: 986 return 0; 987 out_dput: 988 dput_path(&next, nd); 989 break; 990 } 991 path_release(nd); 992 return_err: 993 return err; 994 } 995 996 /* 997 * Wrapper to retry pathname resolution whenever the underlying 998 * file system returns an ESTALE. 999 * 1000 * Retry the whole path once, forcing real lookup requests 1001 * instead of relying on the dcache. 1002 */ 1003 static int fastcall link_path_walk(const char *name, struct nameidata *nd) 1004 { 1005 struct nameidata save = *nd; 1006 int result; 1007 1008 /* make sure the stuff we saved doesn't go away */ 1009 dget(save.dentry); 1010 mntget(save.mnt); 1011 1012 result = __link_path_walk(name, nd); 1013 if (result == -ESTALE) { 1014 *nd = save; 1015 dget(nd->dentry); 1016 mntget(nd->mnt); 1017 nd->flags |= LOOKUP_REVAL; 1018 result = __link_path_walk(name, nd); 1019 } 1020 1021 dput(save.dentry); 1022 mntput(save.mnt); 1023 1024 return result; 1025 } 1026 1027 static int fastcall path_walk(const char * name, struct nameidata *nd) 1028 { 1029 current->total_link_count = 0; 1030 return link_path_walk(name, nd); 1031 } 1032 1033 /* 1034 * SMP-safe: Returns 1 and nd will have valid dentry and mnt, if 1035 * everything is done. Returns 0 and drops input nd, if lookup failed; 1036 */ 1037 static int __emul_lookup_dentry(const char *name, struct nameidata *nd) 1038 { 1039 if (path_walk(name, nd)) 1040 return 0; /* something went wrong... */ 1041 1042 if (!nd->dentry->d_inode || S_ISDIR(nd->dentry->d_inode->i_mode)) { 1043 struct dentry *old_dentry = nd->dentry; 1044 struct vfsmount *old_mnt = nd->mnt; 1045 struct qstr last = nd->last; 1046 int last_type = nd->last_type; 1047 struct fs_struct *fs = current->fs; 1048 1049 /* 1050 * NAME was not found in alternate root or it's a directory. 1051 * Try to find it in the normal root: 1052 */ 1053 nd->last_type = LAST_ROOT; 1054 read_lock(&fs->lock); 1055 nd->mnt = mntget(fs->rootmnt); 1056 nd->dentry = dget(fs->root); 1057 read_unlock(&fs->lock); 1058 if (path_walk(name, nd) == 0) { 1059 if (nd->dentry->d_inode) { 1060 dput(old_dentry); 1061 mntput(old_mnt); 1062 return 1; 1063 } 1064 path_release(nd); 1065 } 1066 nd->dentry = old_dentry; 1067 nd->mnt = old_mnt; 1068 nd->last = last; 1069 nd->last_type = last_type; 1070 } 1071 return 1; 1072 } 1073 1074 void set_fs_altroot(void) 1075 { 1076 char *emul = __emul_prefix(); 1077 struct nameidata nd; 1078 struct vfsmount *mnt = NULL, *oldmnt; 1079 struct dentry *dentry = NULL, *olddentry; 1080 int err; 1081 struct fs_struct *fs = current->fs; 1082 1083 if (!emul) 1084 goto set_it; 1085 err = path_lookup(emul, LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_NOALT, &nd); 1086 if (!err) { 1087 mnt = nd.mnt; 1088 dentry = nd.dentry; 1089 } 1090 set_it: 1091 write_lock(&fs->lock); 1092 oldmnt = fs->altrootmnt; 1093 olddentry = fs->altroot; 1094 fs->altrootmnt = mnt; 1095 fs->altroot = dentry; 1096 write_unlock(&fs->lock); 1097 if (olddentry) { 1098 dput(olddentry); 1099 mntput(oldmnt); 1100 } 1101 } 1102 1103 /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ 1104 static int fastcall do_path_lookup(int dfd, const char *name, 1105 unsigned int flags, struct nameidata *nd) 1106 { 1107 int retval = 0; 1108 int fput_needed; 1109 struct file *file; 1110 struct fs_struct *fs = current->fs; 1111 1112 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1113 nd->flags = flags; 1114 nd->depth = 0; 1115 1116 if (*name=='/') { 1117 read_lock(&fs->lock); 1118 if (fs->altroot && !(nd->flags & LOOKUP_NOALT)) { 1119 nd->mnt = mntget(fs->altrootmnt); 1120 nd->dentry = dget(fs->altroot); 1121 read_unlock(&fs->lock); 1122 if (__emul_lookup_dentry(name,nd)) 1123 goto out; /* found in altroot */ 1124 read_lock(&fs->lock); 1125 } 1126 nd->mnt = mntget(fs->rootmnt); 1127 nd->dentry = dget(fs->root); 1128 read_unlock(&fs->lock); 1129 } else if (dfd == AT_FDCWD) { 1130 read_lock(&fs->lock); 1131 nd->mnt = mntget(fs->pwdmnt); 1132 nd->dentry = dget(fs->pwd); 1133 read_unlock(&fs->lock); 1134 } else { 1135 struct dentry *dentry; 1136 1137 file = fget_light(dfd, &fput_needed); 1138 retval = -EBADF; 1139 if (!file) 1140 goto out_fail; 1141 1142 dentry = file->f_path.dentry; 1143 1144 retval = -ENOTDIR; 1145 if (!S_ISDIR(dentry->d_inode->i_mode)) 1146 goto fput_fail; 1147 1148 retval = file_permission(file, MAY_EXEC); 1149 if (retval) 1150 goto fput_fail; 1151 1152 nd->mnt = mntget(file->f_path.mnt); 1153 nd->dentry = dget(dentry); 1154 1155 fput_light(file, fput_needed); 1156 } 1157 1158 retval = path_walk(name, nd); 1159 out: 1160 if (unlikely(!retval && !audit_dummy_context() && nd->dentry && 1161 nd->dentry->d_inode)) 1162 audit_inode(name, nd->dentry->d_inode); 1163 out_fail: 1164 return retval; 1165 1166 fput_fail: 1167 fput_light(file, fput_needed); 1168 goto out_fail; 1169 } 1170 1171 int fastcall path_lookup(const char *name, unsigned int flags, 1172 struct nameidata *nd) 1173 { 1174 return do_path_lookup(AT_FDCWD, name, flags, nd); 1175 } 1176 1177 /** 1178 * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair 1179 * @dentry: pointer to dentry of the base directory 1180 * @mnt: pointer to vfs mount of the base directory 1181 * @name: pointer to file name 1182 * @flags: lookup flags 1183 * @nd: pointer to nameidata 1184 */ 1185 int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, 1186 const char *name, unsigned int flags, 1187 struct nameidata *nd) 1188 { 1189 int retval; 1190 1191 /* same as do_path_lookup */ 1192 nd->last_type = LAST_ROOT; 1193 nd->flags = flags; 1194 nd->depth = 0; 1195 1196 nd->mnt = mntget(mnt); 1197 nd->dentry = dget(dentry); 1198 1199 retval = path_walk(name, nd); 1200 if (unlikely(!retval && !audit_dummy_context() && nd->dentry && 1201 nd->dentry->d_inode)) 1202 audit_inode(name, nd->dentry->d_inode); 1203 1204 return retval; 1205 1206 } 1207 1208 static int __path_lookup_intent_open(int dfd, const char *name, 1209 unsigned int lookup_flags, struct nameidata *nd, 1210 int open_flags, int create_mode) 1211 { 1212 struct file *filp = get_empty_filp(); 1213 int err; 1214 1215 if (filp == NULL) 1216 return -ENFILE; 1217 nd->intent.open.file = filp; 1218 nd->intent.open.flags = open_flags; 1219 nd->intent.open.create_mode = create_mode; 1220 err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd); 1221 if (IS_ERR(nd->intent.open.file)) { 1222 if (err == 0) { 1223 err = PTR_ERR(nd->intent.open.file); 1224 path_release(nd); 1225 } 1226 } else if (err != 0) 1227 release_open_intent(nd); 1228 return err; 1229 } 1230 1231 /** 1232 * path_lookup_open - lookup a file path with open intent 1233 * @dfd: the directory to use as base, or AT_FDCWD 1234 * @name: pointer to file name 1235 * @lookup_flags: lookup intent flags 1236 * @nd: pointer to nameidata 1237 * @open_flags: open intent flags 1238 */ 1239 int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags, 1240 struct nameidata *nd, int open_flags) 1241 { 1242 return __path_lookup_intent_open(dfd, name, lookup_flags, nd, 1243 open_flags, 0); 1244 } 1245 1246 /** 1247 * path_lookup_create - lookup a file path with open + create intent 1248 * @dfd: the directory to use as base, or AT_FDCWD 1249 * @name: pointer to file name 1250 * @lookup_flags: lookup intent flags 1251 * @nd: pointer to nameidata 1252 * @open_flags: open intent flags 1253 * @create_mode: create intent flags 1254 */ 1255 static int path_lookup_create(int dfd, const char *name, 1256 unsigned int lookup_flags, struct nameidata *nd, 1257 int open_flags, int create_mode) 1258 { 1259 return __path_lookup_intent_open(dfd, name, lookup_flags|LOOKUP_CREATE, 1260 nd, open_flags, create_mode); 1261 } 1262 1263 int __user_path_lookup_open(const char __user *name, unsigned int lookup_flags, 1264 struct nameidata *nd, int open_flags) 1265 { 1266 char *tmp = getname(name); 1267 int err = PTR_ERR(tmp); 1268 1269 if (!IS_ERR(tmp)) { 1270 err = __path_lookup_intent_open(AT_FDCWD, tmp, lookup_flags, nd, open_flags, 0); 1271 putname(tmp); 1272 } 1273 return err; 1274 } 1275 1276 static inline struct dentry *__lookup_hash_kern(struct qstr *name, struct dentry *base, struct nameidata *nd) 1277 { 1278 struct dentry *dentry; 1279 struct inode *inode; 1280 int err; 1281 1282 inode = base->d_inode; 1283 1284 /* 1285 * See if the low-level filesystem might want 1286 * to use its own hash.. 1287 */ 1288 if (base->d_op && base->d_op->d_hash) { 1289 err = base->d_op->d_hash(base, name); 1290 dentry = ERR_PTR(err); 1291 if (err < 0) 1292 goto out; 1293 } 1294 1295 dentry = cached_lookup(base, name, nd); 1296 if (!dentry) { 1297 struct dentry *new = d_alloc(base, name); 1298 dentry = ERR_PTR(-ENOMEM); 1299 if (!new) 1300 goto out; 1301 dentry = inode->i_op->lookup(inode, new, nd); 1302 if (!dentry) 1303 dentry = new; 1304 else 1305 dput(new); 1306 } 1307 out: 1308 return dentry; 1309 } 1310 1311 /* 1312 * Restricted form of lookup. Doesn't follow links, single-component only, 1313 * needs parent already locked. Doesn't follow mounts. 1314 * SMP-safe. 1315 */ 1316 static inline struct dentry * __lookup_hash(struct qstr *name, struct dentry *base, struct nameidata *nd) 1317 { 1318 struct dentry *dentry; 1319 struct inode *inode; 1320 int err; 1321 1322 inode = base->d_inode; 1323 1324 err = permission(inode, MAY_EXEC, nd); 1325 dentry = ERR_PTR(err); 1326 if (err) 1327 goto out; 1328 1329 dentry = __lookup_hash_kern(name, base, nd); 1330 out: 1331 return dentry; 1332 } 1333 1334 static struct dentry *lookup_hash(struct nameidata *nd) 1335 { 1336 return __lookup_hash(&nd->last, nd->dentry, nd); 1337 } 1338 1339 /* SMP-safe */ 1340 static inline int __lookup_one_len(const char *name, struct qstr *this, struct dentry *base, int len) 1341 { 1342 unsigned long hash; 1343 unsigned int c; 1344 1345 this->name = name; 1346 this->len = len; 1347 if (!len) 1348 return -EACCES; 1349 1350 hash = init_name_hash(); 1351 while (len--) { 1352 c = *(const unsigned char *)name++; 1353 if (c == '/' || c == '\0') 1354 return -EACCES; 1355 hash = partial_name_hash(c, hash); 1356 } 1357 this->hash = end_name_hash(hash); 1358 return 0; 1359 } 1360 1361 struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) 1362 { 1363 int err; 1364 struct qstr this; 1365 1366 err = __lookup_one_len(name, &this, base, len); 1367 if (err) 1368 return ERR_PTR(err); 1369 return __lookup_hash(&this, base, NULL); 1370 } 1371 1372 struct dentry *lookup_one_len_kern(const char *name, struct dentry *base, int len) 1373 { 1374 int err; 1375 struct qstr this; 1376 1377 err = __lookup_one_len(name, &this, base, len); 1378 if (err) 1379 return ERR_PTR(err); 1380 return __lookup_hash_kern(&this, base, NULL); 1381 } 1382 1383 int fastcall __user_walk_fd(int dfd, const char __user *name, unsigned flags, 1384 struct nameidata *nd) 1385 { 1386 char *tmp = getname(name); 1387 int err = PTR_ERR(tmp); 1388 1389 if (!IS_ERR(tmp)) { 1390 err = do_path_lookup(dfd, tmp, flags, nd); 1391 putname(tmp); 1392 } 1393 return err; 1394 } 1395 1396 int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) 1397 { 1398 return __user_walk_fd(AT_FDCWD, name, flags, nd); 1399 } 1400 1401 /* 1402 * It's inline, so penalty for filesystems that don't use sticky bit is 1403 * minimal. 1404 */ 1405 static inline int check_sticky(struct inode *dir, struct inode *inode) 1406 { 1407 if (!(dir->i_mode & S_ISVTX)) 1408 return 0; 1409 if (inode->i_uid == current->fsuid) 1410 return 0; 1411 if (dir->i_uid == current->fsuid) 1412 return 0; 1413 return !capable(CAP_FOWNER); 1414 } 1415 1416 /* 1417 * Check whether we can remove a link victim from directory dir, check 1418 * whether the type of victim is right. 1419 * 1. We can't do it if dir is read-only (done in permission()) 1420 * 2. We should have write and exec permissions on dir 1421 * 3. We can't remove anything from append-only dir 1422 * 4. We can't do anything with immutable dir (done in permission()) 1423 * 5. If the sticky bit on dir is set we should either 1424 * a. be owner of dir, or 1425 * b. be owner of victim, or 1426 * c. have CAP_FOWNER capability 1427 * 6. If the victim is append-only or immutable we can't do antyhing with 1428 * links pointing to it. 1429 * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR. 1430 * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR. 1431 * 9. We can't remove a root or mountpoint. 1432 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by 1433 * nfs_async_unlink(). 1434 */ 1435 static int may_delete(struct inode *dir,struct dentry *victim,int isdir) 1436 { 1437 int error; 1438 1439 if (!victim->d_inode) 1440 return -ENOENT; 1441 1442 BUG_ON(victim->d_parent->d_inode != dir); 1443 audit_inode_child(victim->d_name.name, victim->d_inode, dir); 1444 1445 error = permission(dir,MAY_WRITE | MAY_EXEC, NULL); 1446 if (error) 1447 return error; 1448 if (IS_APPEND(dir)) 1449 return -EPERM; 1450 if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| 1451 IS_IMMUTABLE(victim->d_inode)) 1452 return -EPERM; 1453 if (isdir) { 1454 if (!S_ISDIR(victim->d_inode->i_mode)) 1455 return -ENOTDIR; 1456 if (IS_ROOT(victim)) 1457 return -EBUSY; 1458 } else if (S_ISDIR(victim->d_inode->i_mode)) 1459 return -EISDIR; 1460 if (IS_DEADDIR(dir)) 1461 return -ENOENT; 1462 if (victim->d_flags & DCACHE_NFSFS_RENAMED) 1463 return -EBUSY; 1464 return 0; 1465 } 1466 1467 /* Check whether we can create an object with dentry child in directory 1468 * dir. 1469 * 1. We can't do it if child already exists (open has special treatment for 1470 * this case, but since we are inlined it's OK) 1471 * 2. We can't do it if dir is read-only (done in permission()) 1472 * 3. We should have write and exec permissions on dir 1473 * 4. We can't do it if dir is immutable (done in permission()) 1474 */ 1475 static inline int may_create(struct inode *dir, struct dentry *child, 1476 struct nameidata *nd) 1477 { 1478 if (child->d_inode) 1479 return -EEXIST; 1480 if (IS_DEADDIR(dir)) 1481 return -ENOENT; 1482 return permission(dir,MAY_WRITE | MAY_EXEC, nd); 1483 } 1484 1485 /* 1486 * O_DIRECTORY translates into forcing a directory lookup. 1487 */ 1488 static inline int lookup_flags(unsigned int f) 1489 { 1490 unsigned long retval = LOOKUP_FOLLOW; 1491 1492 if (f & O_NOFOLLOW) 1493 retval &= ~LOOKUP_FOLLOW; 1494 1495 if (f & O_DIRECTORY) 1496 retval |= LOOKUP_DIRECTORY; 1497 1498 return retval; 1499 } 1500 1501 /* 1502 * p1 and p2 should be directories on the same fs. 1503 */ 1504 struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) 1505 { 1506 struct dentry *p; 1507 1508 if (p1 == p2) { 1509 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); 1510 return NULL; 1511 } 1512 1513 mutex_lock(&p1->d_inode->i_sb->s_vfs_rename_mutex); 1514 1515 for (p = p1; p->d_parent != p; p = p->d_parent) { 1516 if (p->d_parent == p2) { 1517 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT); 1518 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD); 1519 return p; 1520 } 1521 } 1522 1523 for (p = p2; p->d_parent != p; p = p->d_parent) { 1524 if (p->d_parent == p1) { 1525 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); 1526 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); 1527 return p; 1528 } 1529 } 1530 1531 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); 1532 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); 1533 return NULL; 1534 } 1535 1536 void unlock_rename(struct dentry *p1, struct dentry *p2) 1537 { 1538 mutex_unlock(&p1->d_inode->i_mutex); 1539 if (p1 != p2) { 1540 mutex_unlock(&p2->d_inode->i_mutex); 1541 mutex_unlock(&p1->d_inode->i_sb->s_vfs_rename_mutex); 1542 } 1543 } 1544 1545 int vfs_create(struct inode *dir, struct dentry *dentry, int mode, 1546 struct nameidata *nd) 1547 { 1548 int error = may_create(dir, dentry, nd); 1549 1550 if (error) 1551 return error; 1552 1553 if (!dir->i_op || !dir->i_op->create) 1554 return -EACCES; /* shouldn't it be ENOSYS? */ 1555 mode &= S_IALLUGO; 1556 mode |= S_IFREG; 1557 error = security_inode_create(dir, dentry, mode); 1558 if (error) 1559 return error; 1560 DQUOT_INIT(dir); 1561 error = dir->i_op->create(dir, dentry, mode, nd); 1562 if (!error) 1563 fsnotify_create(dir, dentry); 1564 return error; 1565 } 1566 1567 int may_open(struct nameidata *nd, int acc_mode, int flag) 1568 { 1569 struct dentry *dentry = nd->dentry; 1570 struct inode *inode = dentry->d_inode; 1571 int error; 1572 1573 if (!inode) 1574 return -ENOENT; 1575 1576 if (S_ISLNK(inode->i_mode)) 1577 return -ELOOP; 1578 1579 if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE)) 1580 return -EISDIR; 1581 1582 error = vfs_permission(nd, acc_mode); 1583 if (error) 1584 return error; 1585 1586 /* 1587 * FIFO's, sockets and device files are special: they don't 1588 * actually live on the filesystem itself, and as such you 1589 * can write to them even if the filesystem is read-only. 1590 */ 1591 if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { 1592 flag &= ~O_TRUNC; 1593 } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) { 1594 if (nd->mnt->mnt_flags & MNT_NODEV) 1595 return -EACCES; 1596 1597 flag &= ~O_TRUNC; 1598 } else if (IS_RDONLY(inode) && (flag & FMODE_WRITE)) 1599 return -EROFS; 1600 /* 1601 * An append-only file must be opened in append mode for writing. 1602 */ 1603 if (IS_APPEND(inode)) { 1604 if ((flag & FMODE_WRITE) && !(flag & O_APPEND)) 1605 return -EPERM; 1606 if (flag & O_TRUNC) 1607 return -EPERM; 1608 } 1609 1610 /* O_NOATIME can only be set by the owner or superuser */ 1611 if (flag & O_NOATIME) 1612 if (!is_owner_or_cap(inode)) 1613 return -EPERM; 1614 1615 /* 1616 * Ensure there are no outstanding leases on the file. 1617 */ 1618 error = break_lease(inode, flag); 1619 if (error) 1620 return error; 1621 1622 if (flag & O_TRUNC) { 1623 error = get_write_access(inode); 1624 if (error) 1625 return error; 1626 1627 /* 1628 * Refuse to truncate files with mandatory locks held on them. 1629 */ 1630 error = locks_verify_locked(inode); 1631 if (!error) { 1632 DQUOT_INIT(inode); 1633 1634 error = do_truncate(dentry, 0, ATTR_MTIME|ATTR_CTIME, NULL); 1635 } 1636 put_write_access(inode); 1637 if (error) 1638 return error; 1639 } else 1640 if (flag & FMODE_WRITE) 1641 DQUOT_INIT(inode); 1642 1643 return 0; 1644 } 1645 1646 static int open_namei_create(struct nameidata *nd, struct path *path, 1647 int flag, int mode) 1648 { 1649 int error; 1650 struct dentry *dir = nd->dentry; 1651 1652 if (!IS_POSIXACL(dir->d_inode)) 1653 mode &= ~current->fs->umask; 1654 error = vfs_create(dir->d_inode, path->dentry, mode, nd); 1655 mutex_unlock(&dir->d_inode->i_mutex); 1656 dput(nd->dentry); 1657 nd->dentry = path->dentry; 1658 if (error) 1659 return error; 1660 /* Don't check for write permission, don't truncate */ 1661 return may_open(nd, 0, flag & ~O_TRUNC); 1662 } 1663 1664 /* 1665 * open_namei() 1666 * 1667 * namei for open - this is in fact almost the whole open-routine. 1668 * 1669 * Note that the low bits of "flag" aren't the same as in the open 1670 * system call - they are 00 - no permissions needed 1671 * 01 - read permission needed 1672 * 10 - write permission needed 1673 * 11 - read/write permissions needed 1674 * which is a lot more logical, and also allows the "no perm" needed 1675 * for symlinks (where the permissions are checked later). 1676 * SMP-safe 1677 */ 1678 int open_namei(int dfd, const char *pathname, int flag, 1679 int mode, struct nameidata *nd) 1680 { 1681 int acc_mode, error; 1682 struct path path; 1683 struct dentry *dir; 1684 int count = 0; 1685 1686 acc_mode = ACC_MODE(flag); 1687 1688 /* O_TRUNC implies we need access checks for write permissions */ 1689 if (flag & O_TRUNC) 1690 acc_mode |= MAY_WRITE; 1691 1692 /* Allow the LSM permission hook to distinguish append 1693 access from general write access. */ 1694 if (flag & O_APPEND) 1695 acc_mode |= MAY_APPEND; 1696 1697 /* 1698 * The simplest case - just a plain lookup. 1699 */ 1700 if (!(flag & O_CREAT)) { 1701 error = path_lookup_open(dfd, pathname, lookup_flags(flag), 1702 nd, flag); 1703 if (error) 1704 return error; 1705 goto ok; 1706 } 1707 1708 /* 1709 * Create - we need to know the parent. 1710 */ 1711 error = path_lookup_create(dfd,pathname,LOOKUP_PARENT,nd,flag,mode); 1712 if (error) 1713 return error; 1714 1715 /* 1716 * We have the parent and last component. First of all, check 1717 * that we are not asked to creat(2) an obvious directory - that 1718 * will not do. 1719 */ 1720 error = -EISDIR; 1721 if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len]) 1722 goto exit; 1723 1724 dir = nd->dentry; 1725 nd->flags &= ~LOOKUP_PARENT; 1726 mutex_lock(&dir->d_inode->i_mutex); 1727 path.dentry = lookup_hash(nd); 1728 path.mnt = nd->mnt; 1729 1730 do_last: 1731 error = PTR_ERR(path.dentry); 1732 if (IS_ERR(path.dentry)) { 1733 mutex_unlock(&dir->d_inode->i_mutex); 1734 goto exit; 1735 } 1736 1737 if (IS_ERR(nd->intent.open.file)) { 1738 mutex_unlock(&dir->d_inode->i_mutex); 1739 error = PTR_ERR(nd->intent.open.file); 1740 goto exit_dput; 1741 } 1742 1743 /* Negative dentry, just create the file */ 1744 if (!path.dentry->d_inode) { 1745 error = open_namei_create(nd, &path, flag, mode); 1746 if (error) 1747 goto exit; 1748 return 0; 1749 } 1750 1751 /* 1752 * It already exists. 1753 */ 1754 mutex_unlock(&dir->d_inode->i_mutex); 1755 audit_inode(pathname, path.dentry->d_inode); 1756 1757 error = -EEXIST; 1758 if (flag & O_EXCL) 1759 goto exit_dput; 1760 1761 if (__follow_mount(&path)) { 1762 error = -ELOOP; 1763 if (flag & O_NOFOLLOW) 1764 goto exit_dput; 1765 } 1766 1767 error = -ENOENT; 1768 if (!path.dentry->d_inode) 1769 goto exit_dput; 1770 if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link) 1771 goto do_link; 1772 1773 path_to_nameidata(&path, nd); 1774 error = -EISDIR; 1775 if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) 1776 goto exit; 1777 ok: 1778 error = may_open(nd, acc_mode, flag); 1779 if (error) 1780 goto exit; 1781 return 0; 1782 1783 exit_dput: 1784 dput_path(&path, nd); 1785 exit: 1786 if (!IS_ERR(nd->intent.open.file)) 1787 release_open_intent(nd); 1788 path_release(nd); 1789 return error; 1790 1791 do_link: 1792 error = -ELOOP; 1793 if (flag & O_NOFOLLOW) 1794 goto exit_dput; 1795 /* 1796 * This is subtle. Instead of calling do_follow_link() we do the 1797 * thing by hands. The reason is that this way we have zero link_count 1798 * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT. 1799 * After that we have the parent and last component, i.e. 1800 * we are in the same situation as after the first path_walk(). 1801 * Well, almost - if the last component is normal we get its copy 1802 * stored in nd->last.name and we will have to putname() it when we 1803 * are done. Procfs-like symlinks just set LAST_BIND. 1804 */ 1805 nd->flags |= LOOKUP_PARENT; 1806 error = security_inode_follow_link(path.dentry, nd); 1807 if (error) 1808 goto exit_dput; 1809 error = __do_follow_link(&path, nd); 1810 if (error) { 1811 /* Does someone understand code flow here? Or it is only 1812 * me so stupid? Anathema to whoever designed this non-sense 1813 * with "intent.open". 1814 */ 1815 release_open_intent(nd); 1816 return error; 1817 } 1818 nd->flags &= ~LOOKUP_PARENT; 1819 if (nd->last_type == LAST_BIND) 1820 goto ok; 1821 error = -EISDIR; 1822 if (nd->last_type != LAST_NORM) 1823 goto exit; 1824 if (nd->last.name[nd->last.len]) { 1825 __putname(nd->last.name); 1826 goto exit; 1827 } 1828 error = -ELOOP; 1829 if (count++==32) { 1830 __putname(nd->last.name); 1831 goto exit; 1832 } 1833 dir = nd->dentry; 1834 mutex_lock(&dir->d_inode->i_mutex); 1835 path.dentry = lookup_hash(nd); 1836 path.mnt = nd->mnt; 1837 __putname(nd->last.name); 1838 goto do_last; 1839 } 1840 1841 /** 1842 * lookup_create - lookup a dentry, creating it if it doesn't exist 1843 * @nd: nameidata info 1844 * @is_dir: directory flag 1845 * 1846 * Simple function to lookup and return a dentry and create it 1847 * if it doesn't exist. Is SMP-safe. 1848 * 1849 * Returns with nd->dentry->d_inode->i_mutex locked. 1850 */ 1851 struct dentry *lookup_create(struct nameidata *nd, int is_dir) 1852 { 1853 struct dentry *dentry = ERR_PTR(-EEXIST); 1854 1855 mutex_lock_nested(&nd->dentry->d_inode->i_mutex, I_MUTEX_PARENT); 1856 /* 1857 * Yucky last component or no last component at all? 1858 * (foo/., foo/.., /////) 1859 */ 1860 if (nd->last_type != LAST_NORM) 1861 goto fail; 1862 nd->flags &= ~LOOKUP_PARENT; 1863 nd->flags |= LOOKUP_CREATE; 1864 nd->intent.open.flags = O_EXCL; 1865 1866 /* 1867 * Do the final lookup. 1868 */ 1869 dentry = lookup_hash(nd); 1870 if (IS_ERR(dentry)) 1871 goto fail; 1872 1873 /* 1874 * Special case - lookup gave negative, but... we had foo/bar/ 1875 * From the vfs_mknod() POV we just have a negative dentry - 1876 * all is fine. Let's be bastards - you had / on the end, you've 1877 * been asking for (non-existent) directory. -ENOENT for you. 1878 */ 1879 if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) 1880 goto enoent; 1881 return dentry; 1882 enoent: 1883 dput(dentry); 1884 dentry = ERR_PTR(-ENOENT); 1885 fail: 1886 return dentry; 1887 } 1888 EXPORT_SYMBOL_GPL(lookup_create); 1889 1890 int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) 1891 { 1892 int error = may_create(dir, dentry, NULL); 1893 1894 if (error) 1895 return error; 1896 1897 if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) 1898 return -EPERM; 1899 1900 if (!dir->i_op || !dir->i_op->mknod) 1901 return -EPERM; 1902 1903 error = security_inode_mknod(dir, dentry, mode, dev); 1904 if (error) 1905 return error; 1906 1907 DQUOT_INIT(dir); 1908 error = dir->i_op->mknod(dir, dentry, mode, dev); 1909 if (!error) 1910 fsnotify_create(dir, dentry); 1911 return error; 1912 } 1913 1914 asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode, 1915 unsigned dev) 1916 { 1917 int error = 0; 1918 char * tmp; 1919 struct dentry * dentry; 1920 struct nameidata nd; 1921 1922 if (S_ISDIR(mode)) 1923 return -EPERM; 1924 tmp = getname(filename); 1925 if (IS_ERR(tmp)) 1926 return PTR_ERR(tmp); 1927 1928 error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd); 1929 if (error) 1930 goto out; 1931 dentry = lookup_create(&nd, 0); 1932 error = PTR_ERR(dentry); 1933 1934 if (!IS_POSIXACL(nd.dentry->d_inode)) 1935 mode &= ~current->fs->umask; 1936 if (!IS_ERR(dentry)) { 1937 switch (mode & S_IFMT) { 1938 case 0: case S_IFREG: 1939 error = vfs_create(nd.dentry->d_inode,dentry,mode,&nd); 1940 break; 1941 case S_IFCHR: case S_IFBLK: 1942 error = vfs_mknod(nd.dentry->d_inode,dentry,mode, 1943 new_decode_dev(dev)); 1944 break; 1945 case S_IFIFO: case S_IFSOCK: 1946 error = vfs_mknod(nd.dentry->d_inode,dentry,mode,0); 1947 break; 1948 case S_IFDIR: 1949 error = -EPERM; 1950 break; 1951 default: 1952 error = -EINVAL; 1953 } 1954 dput(dentry); 1955 } 1956 mutex_unlock(&nd.dentry->d_inode->i_mutex); 1957 path_release(&nd); 1958 out: 1959 putname(tmp); 1960 1961 return error; 1962 } 1963 1964 asmlinkage long sys_mknod(const char __user *filename, int mode, unsigned dev) 1965 { 1966 return sys_mknodat(AT_FDCWD, filename, mode, dev); 1967 } 1968 1969 int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 1970 { 1971 int error = may_create(dir, dentry, NULL); 1972 1973 if (error) 1974 return error; 1975 1976 if (!dir->i_op || !dir->i_op->mkdir) 1977 return -EPERM; 1978 1979 mode &= (S_IRWXUGO|S_ISVTX); 1980 error = security_inode_mkdir(dir, dentry, mode); 1981 if (error) 1982 return error; 1983 1984 DQUOT_INIT(dir); 1985 error = dir->i_op->mkdir(dir, dentry, mode); 1986 if (!error) 1987 fsnotify_mkdir(dir, dentry); 1988 return error; 1989 } 1990 1991 asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode) 1992 { 1993 int error = 0; 1994 char * tmp; 1995 struct dentry *dentry; 1996 struct nameidata nd; 1997 1998 tmp = getname(pathname); 1999 error = PTR_ERR(tmp); 2000 if (IS_ERR(tmp)) 2001 goto out_err; 2002 2003 error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd); 2004 if (error) 2005 goto out; 2006 dentry = lookup_create(&nd, 1); 2007 error = PTR_ERR(dentry); 2008 if (IS_ERR(dentry)) 2009 goto out_unlock; 2010 2011 if (!IS_POSIXACL(nd.dentry->d_inode)) 2012 mode &= ~current->fs->umask; 2013 error = vfs_mkdir(nd.dentry->d_inode, dentry, mode); 2014 dput(dentry); 2015 out_unlock: 2016 mutex_unlock(&nd.dentry->d_inode->i_mutex); 2017 path_release(&nd); 2018 out: 2019 putname(tmp); 2020 out_err: 2021 return error; 2022 } 2023 2024 asmlinkage long sys_mkdir(const char __user *pathname, int mode) 2025 { 2026 return sys_mkdirat(AT_FDCWD, pathname, mode); 2027 } 2028 2029 /* 2030 * We try to drop the dentry early: we should have 2031 * a usage count of 2 if we're the only user of this 2032 * dentry, and if that is true (possibly after pruning 2033 * the dcache), then we drop the dentry now. 2034 * 2035 * A low-level filesystem can, if it choses, legally 2036 * do a 2037 * 2038 * if (!d_unhashed(dentry)) 2039 * return -EBUSY; 2040 * 2041 * if it cannot handle the case of removing a directory 2042 * that is still in use by something else.. 2043 */ 2044 void dentry_unhash(struct dentry *dentry) 2045 { 2046 dget(dentry); 2047 shrink_dcache_parent(dentry); 2048 spin_lock(&dcache_lock); 2049 spin_lock(&dentry->d_lock); 2050 if (atomic_read(&dentry->d_count) == 2) 2051 __d_drop(dentry); 2052 spin_unlock(&dentry->d_lock); 2053 spin_unlock(&dcache_lock); 2054 } 2055 2056 int vfs_rmdir(struct inode *dir, struct dentry *dentry) 2057 { 2058 int error = may_delete(dir, dentry, 1); 2059 2060 if (error) 2061 return error; 2062 2063 if (!dir->i_op || !dir->i_op->rmdir) 2064 return -EPERM; 2065 2066 DQUOT_INIT(dir); 2067 2068 mutex_lock(&dentry->d_inode->i_mutex); 2069 dentry_unhash(dentry); 2070 if (d_mountpoint(dentry)) 2071 error = -EBUSY; 2072 else { 2073 error = security_inode_rmdir(dir, dentry); 2074 if (!error) { 2075 error = dir->i_op->rmdir(dir, dentry); 2076 if (!error) 2077 dentry->d_inode->i_flags |= S_DEAD; 2078 } 2079 } 2080 mutex_unlock(&dentry->d_inode->i_mutex); 2081 if (!error) { 2082 d_delete(dentry); 2083 } 2084 dput(dentry); 2085 2086 return error; 2087 } 2088 2089 static long do_rmdir(int dfd, const char __user *pathname) 2090 { 2091 int error = 0; 2092 char * name; 2093 struct dentry *dentry; 2094 struct nameidata nd; 2095 2096 name = getname(pathname); 2097 if(IS_ERR(name)) 2098 return PTR_ERR(name); 2099 2100 error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd); 2101 if (error) 2102 goto exit; 2103 2104 switch(nd.last_type) { 2105 case LAST_DOTDOT: 2106 error = -ENOTEMPTY; 2107 goto exit1; 2108 case LAST_DOT: 2109 error = -EINVAL; 2110 goto exit1; 2111 case LAST_ROOT: 2112 error = -EBUSY; 2113 goto exit1; 2114 } 2115 mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 2116 dentry = lookup_hash(&nd); 2117 error = PTR_ERR(dentry); 2118 if (IS_ERR(dentry)) 2119 goto exit2; 2120 error = vfs_rmdir(nd.dentry->d_inode, dentry); 2121 dput(dentry); 2122 exit2: 2123 mutex_unlock(&nd.dentry->d_inode->i_mutex); 2124 exit1: 2125 path_release(&nd); 2126 exit: 2127 putname(name); 2128 return error; 2129 } 2130 2131 asmlinkage long sys_rmdir(const char __user *pathname) 2132 { 2133 return do_rmdir(AT_FDCWD, pathname); 2134 } 2135 2136 int vfs_unlink(struct inode *dir, struct dentry *dentry) 2137 { 2138 int error = may_delete(dir, dentry, 0); 2139 2140 if (error) 2141 return error; 2142 2143 if (!dir->i_op || !dir->i_op->unlink) 2144 return -EPERM; 2145 2146 DQUOT_INIT(dir); 2147 2148 mutex_lock(&dentry->d_inode->i_mutex); 2149 if (d_mountpoint(dentry)) 2150 error = -EBUSY; 2151 else { 2152 error = security_inode_unlink(dir, dentry); 2153 if (!error) 2154 error = dir->i_op->unlink(dir, dentry); 2155 } 2156 mutex_unlock(&dentry->d_inode->i_mutex); 2157 2158 /* We don't d_delete() NFS sillyrenamed files--they still exist. */ 2159 if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) { 2160 d_delete(dentry); 2161 } 2162 2163 return error; 2164 } 2165 2166 /* 2167 * Make sure that the actual truncation of the file will occur outside its 2168 * directory's i_mutex. Truncate can take a long time if there is a lot of 2169 * writeout happening, and we don't want to prevent access to the directory 2170 * while waiting on the I/O. 2171 */ 2172 static long do_unlinkat(int dfd, const char __user *pathname) 2173 { 2174 int error = 0; 2175 char * name; 2176 struct dentry *dentry; 2177 struct nameidata nd; 2178 struct inode *inode = NULL; 2179 2180 name = getname(pathname); 2181 if(IS_ERR(name)) 2182 return PTR_ERR(name); 2183 2184 error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd); 2185 if (error) 2186 goto exit; 2187 error = -EISDIR; 2188 if (nd.last_type != LAST_NORM) 2189 goto exit1; 2190 mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 2191 dentry = lookup_hash(&nd); 2192 error = PTR_ERR(dentry); 2193 if (!IS_ERR(dentry)) { 2194 /* Why not before? Because we want correct error value */ 2195 if (nd.last.name[nd.last.len]) 2196 goto slashes; 2197 inode = dentry->d_inode; 2198 if (inode) 2199 atomic_inc(&inode->i_count); 2200 error = vfs_unlink(nd.dentry->d_inode, dentry); 2201 exit2: 2202 dput(dentry); 2203 } 2204 mutex_unlock(&nd.dentry->d_inode->i_mutex); 2205 if (inode) 2206 iput(inode); /* truncate the inode here */ 2207 exit1: 2208 path_release(&nd); 2209 exit: 2210 putname(name); 2211 return error; 2212 2213 slashes: 2214 error = !dentry->d_inode ? -ENOENT : 2215 S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR; 2216 goto exit2; 2217 } 2218 2219 asmlinkage long sys_unlinkat(int dfd, const char __user *pathname, int flag) 2220 { 2221 if ((flag & ~AT_REMOVEDIR) != 0) 2222 return -EINVAL; 2223 2224 if (flag & AT_REMOVEDIR) 2225 return do_rmdir(dfd, pathname); 2226 2227 return do_unlinkat(dfd, pathname); 2228 } 2229 2230 asmlinkage long sys_unlink(const char __user *pathname) 2231 { 2232 return do_unlinkat(AT_FDCWD, pathname); 2233 } 2234 2235 int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode) 2236 { 2237 int error = may_create(dir, dentry, NULL); 2238 2239 if (error) 2240 return error; 2241 2242 if (!dir->i_op || !dir->i_op->symlink) 2243 return -EPERM; 2244 2245 error = security_inode_symlink(dir, dentry, oldname); 2246 if (error) 2247 return error; 2248 2249 DQUOT_INIT(dir); 2250 error = dir->i_op->symlink(dir, dentry, oldname); 2251 if (!error) 2252 fsnotify_create(dir, dentry); 2253 return error; 2254 } 2255 2256 asmlinkage long sys_symlinkat(const char __user *oldname, 2257 int newdfd, const char __user *newname) 2258 { 2259 int error = 0; 2260 char * from; 2261 char * to; 2262 struct dentry *dentry; 2263 struct nameidata nd; 2264 2265 from = getname(oldname); 2266 if(IS_ERR(from)) 2267 return PTR_ERR(from); 2268 to = getname(newname); 2269 error = PTR_ERR(to); 2270 if (IS_ERR(to)) 2271 goto out_putname; 2272 2273 error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd); 2274 if (error) 2275 goto out; 2276 dentry = lookup_create(&nd, 0); 2277 error = PTR_ERR(dentry); 2278 if (IS_ERR(dentry)) 2279 goto out_unlock; 2280 2281 error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO); 2282 dput(dentry); 2283 out_unlock: 2284 mutex_unlock(&nd.dentry->d_inode->i_mutex); 2285 path_release(&nd); 2286 out: 2287 putname(to); 2288 out_putname: 2289 putname(from); 2290 return error; 2291 } 2292 2293 asmlinkage long sys_symlink(const char __user *oldname, const char __user *newname) 2294 { 2295 return sys_symlinkat(oldname, AT_FDCWD, newname); 2296 } 2297 2298 int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) 2299 { 2300 struct inode *inode = old_dentry->d_inode; 2301 int error; 2302 2303 if (!inode) 2304 return -ENOENT; 2305 2306 error = may_create(dir, new_dentry, NULL); 2307 if (error) 2308 return error; 2309 2310 if (dir->i_sb != inode->i_sb) 2311 return -EXDEV; 2312 2313 /* 2314 * A link to an append-only or immutable file cannot be created. 2315 */ 2316 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 2317 return -EPERM; 2318 if (!dir->i_op || !dir->i_op->link) 2319 return -EPERM; 2320 if (S_ISDIR(old_dentry->d_inode->i_mode)) 2321 return -EPERM; 2322 2323 error = security_inode_link(old_dentry, dir, new_dentry); 2324 if (error) 2325 return error; 2326 2327 mutex_lock(&old_dentry->d_inode->i_mutex); 2328 DQUOT_INIT(dir); 2329 error = dir->i_op->link(old_dentry, dir, new_dentry); 2330 mutex_unlock(&old_dentry->d_inode->i_mutex); 2331 if (!error) 2332 fsnotify_create(dir, new_dentry); 2333 return error; 2334 } 2335 2336 /* 2337 * Hardlinks are often used in delicate situations. We avoid 2338 * security-related surprises by not following symlinks on the 2339 * newname. --KAB 2340 * 2341 * We don't follow them on the oldname either to be compatible 2342 * with linux 2.0, and to avoid hard-linking to directories 2343 * and other special files. --ADM 2344 */ 2345 asmlinkage long sys_linkat(int olddfd, const char __user *oldname, 2346 int newdfd, const char __user *newname, 2347 int flags) 2348 { 2349 struct dentry *new_dentry; 2350 struct nameidata nd, old_nd; 2351 int error; 2352 char * to; 2353 2354 if ((flags & ~AT_SYMLINK_FOLLOW) != 0) 2355 return -EINVAL; 2356 2357 to = getname(newname); 2358 if (IS_ERR(to)) 2359 return PTR_ERR(to); 2360 2361 error = __user_walk_fd(olddfd, oldname, 2362 flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0, 2363 &old_nd); 2364 if (error) 2365 goto exit; 2366 error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd); 2367 if (error) 2368 goto out; 2369 error = -EXDEV; 2370 if (old_nd.mnt != nd.mnt) 2371 goto out_release; 2372 new_dentry = lookup_create(&nd, 0); 2373 error = PTR_ERR(new_dentry); 2374 if (IS_ERR(new_dentry)) 2375 goto out_unlock; 2376 error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); 2377 dput(new_dentry); 2378 out_unlock: 2379 mutex_unlock(&nd.dentry->d_inode->i_mutex); 2380 out_release: 2381 path_release(&nd); 2382 out: 2383 path_release(&old_nd); 2384 exit: 2385 putname(to); 2386 2387 return error; 2388 } 2389 2390 asmlinkage long sys_link(const char __user *oldname, const char __user *newname) 2391 { 2392 return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0); 2393 } 2394 2395 /* 2396 * The worst of all namespace operations - renaming directory. "Perverted" 2397 * doesn't even start to describe it. Somebody in UCB had a heck of a trip... 2398 * Problems: 2399 * a) we can get into loop creation. Check is done in is_subdir(). 2400 * b) race potential - two innocent renames can create a loop together. 2401 * That's where 4.4 screws up. Current fix: serialization on 2402 * sb->s_vfs_rename_mutex. We might be more accurate, but that's another 2403 * story. 2404 * c) we have to lock _three_ objects - parents and victim (if it exists). 2405 * And that - after we got ->i_mutex on parents (until then we don't know 2406 * whether the target exists). Solution: try to be smart with locking 2407 * order for inodes. We rely on the fact that tree topology may change 2408 * only under ->s_vfs_rename_mutex _and_ that parent of the object we 2409 * move will be locked. Thus we can rank directories by the tree 2410 * (ancestors first) and rank all non-directories after them. 2411 * That works since everybody except rename does "lock parent, lookup, 2412 * lock child" and rename is under ->s_vfs_rename_mutex. 2413 * HOWEVER, it relies on the assumption that any object with ->lookup() 2414 * has no more than 1 dentry. If "hybrid" objects will ever appear, 2415 * we'd better make sure that there's no link(2) for them. 2416 * d) some filesystems don't support opened-but-unlinked directories, 2417 * either because of layout or because they are not ready to deal with 2418 * all cases correctly. The latter will be fixed (taking this sort of 2419 * stuff into VFS), but the former is not going away. Solution: the same 2420 * trick as in rmdir(). 2421 * e) conversion from fhandle to dentry may come in the wrong moment - when 2422 * we are removing the target. Solution: we will have to grab ->i_mutex 2423 * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on 2424 * ->i_mutex on parents, which works but leads to some truely excessive 2425 * locking]. 2426 */ 2427 static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, 2428 struct inode *new_dir, struct dentry *new_dentry) 2429 { 2430 int error = 0; 2431 struct inode *target; 2432 2433 /* 2434 * If we are going to change the parent - check write permissions, 2435 * we'll need to flip '..'. 2436 */ 2437 if (new_dir != old_dir) { 2438 error = permission(old_dentry->d_inode, MAY_WRITE, NULL); 2439 if (error) 2440 return error; 2441 } 2442 2443 error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); 2444 if (error) 2445 return error; 2446 2447 target = new_dentry->d_inode; 2448 if (target) { 2449 mutex_lock(&target->i_mutex); 2450 dentry_unhash(new_dentry); 2451 } 2452 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) 2453 error = -EBUSY; 2454 else 2455 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2456 if (target) { 2457 if (!error) 2458 target->i_flags |= S_DEAD; 2459 mutex_unlock(&target->i_mutex); 2460 if (d_unhashed(new_dentry)) 2461 d_rehash(new_dentry); 2462 dput(new_dentry); 2463 } 2464 if (!error) 2465 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) 2466 d_move(old_dentry,new_dentry); 2467 return error; 2468 } 2469 2470 static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, 2471 struct inode *new_dir, struct dentry *new_dentry) 2472 { 2473 struct inode *target; 2474 int error; 2475 2476 error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); 2477 if (error) 2478 return error; 2479 2480 dget(new_dentry); 2481 target = new_dentry->d_inode; 2482 if (target) 2483 mutex_lock(&target->i_mutex); 2484 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) 2485 error = -EBUSY; 2486 else 2487 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2488 if (!error) { 2489 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) 2490 d_move(old_dentry, new_dentry); 2491 } 2492 if (target) 2493 mutex_unlock(&target->i_mutex); 2494 dput(new_dentry); 2495 return error; 2496 } 2497 2498 int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, 2499 struct inode *new_dir, struct dentry *new_dentry) 2500 { 2501 int error; 2502 int is_dir = S_ISDIR(old_dentry->d_inode->i_mode); 2503 const char *old_name; 2504 2505 if (old_dentry->d_inode == new_dentry->d_inode) 2506 return 0; 2507 2508 error = may_delete(old_dir, old_dentry, is_dir); 2509 if (error) 2510 return error; 2511 2512 if (!new_dentry->d_inode) 2513 error = may_create(new_dir, new_dentry, NULL); 2514 else 2515 error = may_delete(new_dir, new_dentry, is_dir); 2516 if (error) 2517 return error; 2518 2519 if (!old_dir->i_op || !old_dir->i_op->rename) 2520 return -EPERM; 2521 2522 DQUOT_INIT(old_dir); 2523 DQUOT_INIT(new_dir); 2524 2525 old_name = fsnotify_oldname_init(old_dentry->d_name.name); 2526 2527 if (is_dir) 2528 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); 2529 else 2530 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); 2531 if (!error) { 2532 const char *new_name = old_dentry->d_name.name; 2533 fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir, 2534 new_dentry->d_inode, old_dentry->d_inode); 2535 } 2536 fsnotify_oldname_free(old_name); 2537 2538 return error; 2539 } 2540 2541 static int do_rename(int olddfd, const char *oldname, 2542 int newdfd, const char *newname) 2543 { 2544 int error = 0; 2545 struct dentry * old_dir, * new_dir; 2546 struct dentry * old_dentry, *new_dentry; 2547 struct dentry * trap; 2548 struct nameidata oldnd, newnd; 2549 2550 error = do_path_lookup(olddfd, oldname, LOOKUP_PARENT, &oldnd); 2551 if (error) 2552 goto exit; 2553 2554 error = do_path_lookup(newdfd, newname, LOOKUP_PARENT, &newnd); 2555 if (error) 2556 goto exit1; 2557 2558 error = -EXDEV; 2559 if (oldnd.mnt != newnd.mnt) 2560 goto exit2; 2561 2562 old_dir = oldnd.dentry; 2563 error = -EBUSY; 2564 if (oldnd.last_type != LAST_NORM) 2565 goto exit2; 2566 2567 new_dir = newnd.dentry; 2568 if (newnd.last_type != LAST_NORM) 2569 goto exit2; 2570 2571 trap = lock_rename(new_dir, old_dir); 2572 2573 old_dentry = lookup_hash(&oldnd); 2574 error = PTR_ERR(old_dentry); 2575 if (IS_ERR(old_dentry)) 2576 goto exit3; 2577 /* source must exist */ 2578 error = -ENOENT; 2579 if (!old_dentry->d_inode) 2580 goto exit4; 2581 /* unless the source is a directory trailing slashes give -ENOTDIR */ 2582 if (!S_ISDIR(old_dentry->d_inode->i_mode)) { 2583 error = -ENOTDIR; 2584 if (oldnd.last.name[oldnd.last.len]) 2585 goto exit4; 2586 if (newnd.last.name[newnd.last.len]) 2587 goto exit4; 2588 } 2589 /* source should not be ancestor of target */ 2590 error = -EINVAL; 2591 if (old_dentry == trap) 2592 goto exit4; 2593 new_dentry = lookup_hash(&newnd); 2594 error = PTR_ERR(new_dentry); 2595 if (IS_ERR(new_dentry)) 2596 goto exit4; 2597 /* target should not be an ancestor of source */ 2598 error = -ENOTEMPTY; 2599 if (new_dentry == trap) 2600 goto exit5; 2601 2602 error = vfs_rename(old_dir->d_inode, old_dentry, 2603 new_dir->d_inode, new_dentry); 2604 exit5: 2605 dput(new_dentry); 2606 exit4: 2607 dput(old_dentry); 2608 exit3: 2609 unlock_rename(new_dir, old_dir); 2610 exit2: 2611 path_release(&newnd); 2612 exit1: 2613 path_release(&oldnd); 2614 exit: 2615 return error; 2616 } 2617 2618 asmlinkage long sys_renameat(int olddfd, const char __user *oldname, 2619 int newdfd, const char __user *newname) 2620 { 2621 int error; 2622 char * from; 2623 char * to; 2624 2625 from = getname(oldname); 2626 if(IS_ERR(from)) 2627 return PTR_ERR(from); 2628 to = getname(newname); 2629 error = PTR_ERR(to); 2630 if (!IS_ERR(to)) { 2631 error = do_rename(olddfd, from, newdfd, to); 2632 putname(to); 2633 } 2634 putname(from); 2635 return error; 2636 } 2637 2638 asmlinkage long sys_rename(const char __user *oldname, const char __user *newname) 2639 { 2640 return sys_renameat(AT_FDCWD, oldname, AT_FDCWD, newname); 2641 } 2642 2643 int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link) 2644 { 2645 int len; 2646 2647 len = PTR_ERR(link); 2648 if (IS_ERR(link)) 2649 goto out; 2650 2651 len = strlen(link); 2652 if (len > (unsigned) buflen) 2653 len = buflen; 2654 if (copy_to_user(buffer, link, len)) 2655 len = -EFAULT; 2656 out: 2657 return len; 2658 } 2659 2660 /* 2661 * A helper for ->readlink(). This should be used *ONLY* for symlinks that 2662 * have ->follow_link() touching nd only in nd_set_link(). Using (or not 2663 * using) it for any given inode is up to filesystem. 2664 */ 2665 int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) 2666 { 2667 struct nameidata nd; 2668 void *cookie; 2669 2670 nd.depth = 0; 2671 cookie = dentry->d_inode->i_op->follow_link(dentry, &nd); 2672 if (!IS_ERR(cookie)) { 2673 int res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); 2674 if (dentry->d_inode->i_op->put_link) 2675 dentry->d_inode->i_op->put_link(dentry, &nd, cookie); 2676 cookie = ERR_PTR(res); 2677 } 2678 return PTR_ERR(cookie); 2679 } 2680 2681 int vfs_follow_link(struct nameidata *nd, const char *link) 2682 { 2683 return __vfs_follow_link(nd, link); 2684 } 2685 2686 /* get the link contents into pagecache */ 2687 static char *page_getlink(struct dentry * dentry, struct page **ppage) 2688 { 2689 struct page * page; 2690 struct address_space *mapping = dentry->d_inode->i_mapping; 2691 page = read_mapping_page(mapping, 0, NULL); 2692 if (IS_ERR(page)) 2693 return (char*)page; 2694 *ppage = page; 2695 return kmap(page); 2696 } 2697 2698 int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) 2699 { 2700 struct page *page = NULL; 2701 char *s = page_getlink(dentry, &page); 2702 int res = vfs_readlink(dentry,buffer,buflen,s); 2703 if (page) { 2704 kunmap(page); 2705 page_cache_release(page); 2706 } 2707 return res; 2708 } 2709 2710 void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd) 2711 { 2712 struct page *page = NULL; 2713 nd_set_link(nd, page_getlink(dentry, &page)); 2714 return page; 2715 } 2716 2717 void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) 2718 { 2719 struct page *page = cookie; 2720 2721 if (page) { 2722 kunmap(page); 2723 page_cache_release(page); 2724 } 2725 } 2726 2727 int __page_symlink(struct inode *inode, const char *symname, int len, 2728 gfp_t gfp_mask) 2729 { 2730 struct address_space *mapping = inode->i_mapping; 2731 struct page *page; 2732 int err; 2733 char *kaddr; 2734 2735 retry: 2736 err = -ENOMEM; 2737 page = find_or_create_page(mapping, 0, gfp_mask); 2738 if (!page) 2739 goto fail; 2740 err = mapping->a_ops->prepare_write(NULL, page, 0, len-1); 2741 if (err == AOP_TRUNCATED_PAGE) { 2742 page_cache_release(page); 2743 goto retry; 2744 } 2745 if (err) 2746 goto fail_map; 2747 kaddr = kmap_atomic(page, KM_USER0); 2748 memcpy(kaddr, symname, len-1); 2749 kunmap_atomic(kaddr, KM_USER0); 2750 err = mapping->a_ops->commit_write(NULL, page, 0, len-1); 2751 if (err == AOP_TRUNCATED_PAGE) { 2752 page_cache_release(page); 2753 goto retry; 2754 } 2755 if (err) 2756 goto fail_map; 2757 /* 2758 * Notice that we are _not_ going to block here - end of page is 2759 * unmapped, so this will only try to map the rest of page, see 2760 * that it is unmapped (typically even will not look into inode - 2761 * ->i_size will be enough for everything) and zero it out. 2762 * OTOH it's obviously correct and should make the page up-to-date. 2763 */ 2764 if (!PageUptodate(page)) { 2765 err = mapping->a_ops->readpage(NULL, page); 2766 if (err != AOP_TRUNCATED_PAGE) 2767 wait_on_page_locked(page); 2768 } else { 2769 unlock_page(page); 2770 } 2771 page_cache_release(page); 2772 if (err < 0) 2773 goto fail; 2774 mark_inode_dirty(inode); 2775 return 0; 2776 fail_map: 2777 unlock_page(page); 2778 page_cache_release(page); 2779 fail: 2780 return err; 2781 } 2782 2783 int page_symlink(struct inode *inode, const char *symname, int len) 2784 { 2785 return __page_symlink(inode, symname, len, 2786 mapping_gfp_mask(inode->i_mapping)); 2787 } 2788 2789 const struct inode_operations page_symlink_inode_operations = { 2790 .readlink = generic_readlink, 2791 .follow_link = page_follow_link_light, 2792 .put_link = page_put_link, 2793 }; 2794 2795 EXPORT_SYMBOL(__user_walk); 2796 EXPORT_SYMBOL(__user_walk_fd); 2797 EXPORT_SYMBOL(follow_down); 2798 EXPORT_SYMBOL(follow_up); 2799 EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ 2800 EXPORT_SYMBOL(getname); 2801 EXPORT_SYMBOL(lock_rename); 2802 EXPORT_SYMBOL(lookup_one_len); 2803 EXPORT_SYMBOL(page_follow_link_light); 2804 EXPORT_SYMBOL(page_put_link); 2805 EXPORT_SYMBOL(page_readlink); 2806 EXPORT_SYMBOL(__page_symlink); 2807 EXPORT_SYMBOL(page_symlink); 2808 EXPORT_SYMBOL(page_symlink_inode_operations); 2809 EXPORT_SYMBOL(path_lookup); 2810 EXPORT_SYMBOL(vfs_path_lookup); 2811 EXPORT_SYMBOL(path_release); 2812 EXPORT_SYMBOL(permission); 2813 EXPORT_SYMBOL(vfs_permission); 2814 EXPORT_SYMBOL(file_permission); 2815 EXPORT_SYMBOL(unlock_rename); 2816 EXPORT_SYMBOL(vfs_create); 2817 EXPORT_SYMBOL(vfs_follow_link); 2818 EXPORT_SYMBOL(vfs_link); 2819 EXPORT_SYMBOL(vfs_mkdir); 2820 EXPORT_SYMBOL(vfs_mknod); 2821 EXPORT_SYMBOL(generic_permission); 2822 EXPORT_SYMBOL(vfs_readlink); 2823 EXPORT_SYMBOL(vfs_rename); 2824 EXPORT_SYMBOL(vfs_rmdir); 2825 EXPORT_SYMBOL(vfs_symlink); 2826 EXPORT_SYMBOL(vfs_unlink); 2827 EXPORT_SYMBOL(dentry_unhash); 2828 EXPORT_SYMBOL(generic_readlink); 2829