1 /* 2 * linux/fs/namei.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 /* 8 * Some corrections by tytso. 9 */ 10 11 /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname 12 * lookup logic. 13 */ 14 /* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture. 15 */ 16 17 #include <linux/init.h> 18 #include <linux/module.h> 19 #include <linux/slab.h> 20 #include <linux/fs.h> 21 #include <linux/namei.h> 22 #include <linux/quotaops.h> 23 #include <linux/pagemap.h> 24 #include <linux/fsnotify.h> 25 #include <linux/personality.h> 26 #include <linux/security.h> 27 #include <linux/syscalls.h> 28 #include <linux/mount.h> 29 #include <linux/audit.h> 30 #include <linux/capability.h> 31 #include <linux/file.h> 32 #include <linux/fcntl.h> 33 #include <linux/device_cgroup.h> 34 #include <asm/namei.h> 35 #include <asm/uaccess.h> 36 37 #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) 38 39 /* [Feb-1997 T. Schoebel-Theuer] 40 * Fundamental changes in the pathname lookup mechanisms (namei) 41 * were necessary because of omirr. The reason is that omirr needs 42 * to know the _real_ pathname, not the user-supplied one, in case 43 * of symlinks (and also when transname replacements occur). 44 * 45 * The new code replaces the old recursive symlink resolution with 46 * an iterative one (in case of non-nested symlink chains). It does 47 * this with calls to <fs>_follow_link(). 48 * As a side effect, dir_namei(), _namei() and follow_link() are now 49 * replaced with a single function lookup_dentry() that can handle all 50 * the special cases of the former code. 51 * 52 * With the new dcache, the pathname is stored at each inode, at least as 53 * long as the refcount of the inode is positive. As a side effect, the 54 * size of the dcache depends on the inode cache and thus is dynamic. 55 * 56 * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink 57 * resolution to correspond with current state of the code. 58 * 59 * Note that the symlink resolution is not *completely* iterative. 60 * There is still a significant amount of tail- and mid- recursion in 61 * the algorithm. Also, note that <fs>_readlink() is not used in 62 * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink() 63 * may return different results than <fs>_follow_link(). Many virtual 64 * filesystems (including /proc) exhibit this behavior. 65 */ 66 67 /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation: 68 * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL 69 * and the name already exists in form of a symlink, try to create the new 70 * name indicated by the symlink. The old code always complained that the 71 * name already exists, due to not following the symlink even if its target 72 * is nonexistent. The new semantics affects also mknod() and link() when 73 * the name is a symlink pointing to a non-existant name. 74 * 75 * I don't know which semantics is the right one, since I have no access 76 * to standards. But I found by trial that HP-UX 9.0 has the full "new" 77 * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the 78 * "old" one. Personally, I think the new semantics is much more logical. 79 * Note that "ln old new" where "new" is a symlink pointing to a non-existing 80 * file does succeed in both HP-UX and SunOs, but not in Solaris 81 * and in the old Linux semantics. 82 */ 83 84 /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink 85 * semantics. See the comments in "open_namei" and "do_link" below. 86 * 87 * [10-Sep-98 Alan Modra] Another symlink change. 88 */ 89 90 /* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks: 91 * inside the path - always follow. 92 * in the last component in creation/removal/renaming - never follow. 93 * if LOOKUP_FOLLOW passed - follow. 94 * if the pathname has trailing slashes - follow. 95 * otherwise - don't follow. 96 * (applied in that order). 97 * 98 * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT 99 * restored for 2.4. This is the last surviving part of old 4.2BSD bug. 100 * During the 2.4 we need to fix the userland stuff depending on it - 101 * hopefully we will be able to get rid of that wart in 2.5. So far only 102 * XEmacs seems to be relying on it... 103 */ 104 /* 105 * [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland) 106 * implemented. Let's see if raised priority of ->s_vfs_rename_mutex gives 107 * any extra contention... 108 */ 109 110 static int __link_path_walk(const char *name, struct nameidata *nd); 111 112 /* In order to reduce some races, while at the same time doing additional 113 * checking and hopefully speeding things up, we copy filenames to the 114 * kernel data space before using them.. 115 * 116 * POSIX.1 2.4: an empty pathname is invalid (ENOENT). 117 * PATH_MAX includes the nul terminator --RR. 118 */ 119 static int do_getname(const char __user *filename, char *page) 120 { 121 int retval; 122 unsigned long len = PATH_MAX; 123 124 if (!segment_eq(get_fs(), KERNEL_DS)) { 125 if ((unsigned long) filename >= TASK_SIZE) 126 return -EFAULT; 127 if (TASK_SIZE - (unsigned long) filename < PATH_MAX) 128 len = TASK_SIZE - (unsigned long) filename; 129 } 130 131 retval = strncpy_from_user(page, filename, len); 132 if (retval > 0) { 133 if (retval < len) 134 return 0; 135 return -ENAMETOOLONG; 136 } else if (!retval) 137 retval = -ENOENT; 138 return retval; 139 } 140 141 char * getname(const char __user * filename) 142 { 143 char *tmp, *result; 144 145 result = ERR_PTR(-ENOMEM); 146 tmp = __getname(); 147 if (tmp) { 148 int retval = do_getname(filename, tmp); 149 150 result = tmp; 151 if (retval < 0) { 152 __putname(tmp); 153 result = ERR_PTR(retval); 154 } 155 } 156 audit_getname(result); 157 return result; 158 } 159 160 #ifdef CONFIG_AUDITSYSCALL 161 void putname(const char *name) 162 { 163 if (unlikely(!audit_dummy_context())) 164 audit_putname(name); 165 else 166 __putname(name); 167 } 168 EXPORT_SYMBOL(putname); 169 #endif 170 171 172 /** 173 * generic_permission - check for access rights on a Posix-like filesystem 174 * @inode: inode to check access rights for 175 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) 176 * @check_acl: optional callback to check for Posix ACLs 177 * 178 * Used to check for read/write/execute permissions on a file. 179 * We use "fsuid" for this, letting us set arbitrary permissions 180 * for filesystem access without changing the "normal" uids which 181 * are used for other things.. 182 */ 183 int generic_permission(struct inode *inode, int mask, 184 int (*check_acl)(struct inode *inode, int mask)) 185 { 186 umode_t mode = inode->i_mode; 187 188 if (current->fsuid == inode->i_uid) 189 mode >>= 6; 190 else { 191 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { 192 int error = check_acl(inode, mask); 193 if (error == -EACCES) 194 goto check_capabilities; 195 else if (error != -EAGAIN) 196 return error; 197 } 198 199 if (in_group_p(inode->i_gid)) 200 mode >>= 3; 201 } 202 203 /* 204 * If the DACs are ok we don't need any capability check. 205 */ 206 if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)) 207 return 0; 208 209 check_capabilities: 210 /* 211 * Read/write DACs are always overridable. 212 * Executable DACs are overridable if at least one exec bit is set. 213 */ 214 if (!(mask & MAY_EXEC) || 215 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode)) 216 if (capable(CAP_DAC_OVERRIDE)) 217 return 0; 218 219 /* 220 * Searching includes executable on directories, else just read. 221 */ 222 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) 223 if (capable(CAP_DAC_READ_SEARCH)) 224 return 0; 225 226 return -EACCES; 227 } 228 229 int permission(struct inode *inode, int mask, struct nameidata *nd) 230 { 231 int retval, submask; 232 struct vfsmount *mnt = NULL; 233 234 if (nd) 235 mnt = nd->path.mnt; 236 237 if (mask & MAY_WRITE) { 238 umode_t mode = inode->i_mode; 239 240 /* 241 * Nobody gets write access to a read-only fs. 242 */ 243 if (IS_RDONLY(inode) && 244 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) 245 return -EROFS; 246 247 /* 248 * Nobody gets write access to an immutable file. 249 */ 250 if (IS_IMMUTABLE(inode)) 251 return -EACCES; 252 } 253 254 if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) { 255 /* 256 * MAY_EXEC on regular files is denied if the fs is mounted 257 * with the "noexec" flag. 258 */ 259 if (mnt && (mnt->mnt_flags & MNT_NOEXEC)) 260 return -EACCES; 261 } 262 263 /* Ordinary permission routines do not understand MAY_APPEND. */ 264 submask = mask & ~MAY_APPEND; 265 if (inode->i_op && inode->i_op->permission) { 266 retval = inode->i_op->permission(inode, submask, nd); 267 if (!retval) { 268 /* 269 * Exec permission on a regular file is denied if none 270 * of the execute bits are set. 271 * 272 * This check should be done by the ->permission() 273 * method. 274 */ 275 if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode) && 276 !(inode->i_mode & S_IXUGO)) 277 return -EACCES; 278 } 279 } else { 280 retval = generic_permission(inode, submask, NULL); 281 } 282 if (retval) 283 return retval; 284 285 retval = devcgroup_inode_permission(inode, mask); 286 if (retval) 287 return retval; 288 289 return security_inode_permission(inode, mask, nd); 290 } 291 292 /** 293 * vfs_permission - check for access rights to a given path 294 * @nd: lookup result that describes the path 295 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) 296 * 297 * Used to check for read/write/execute permissions on a path. 298 * We use "fsuid" for this, letting us set arbitrary permissions 299 * for filesystem access without changing the "normal" uids which 300 * are used for other things. 301 */ 302 int vfs_permission(struct nameidata *nd, int mask) 303 { 304 return permission(nd->path.dentry->d_inode, mask, nd); 305 } 306 307 /** 308 * file_permission - check for additional access rights to a given file 309 * @file: file to check access rights for 310 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) 311 * 312 * Used to check for read/write/execute permissions on an already opened 313 * file. 314 * 315 * Note: 316 * Do not use this function in new code. All access checks should 317 * be done using vfs_permission(). 318 */ 319 int file_permission(struct file *file, int mask) 320 { 321 return permission(file->f_path.dentry->d_inode, mask, NULL); 322 } 323 324 /* 325 * get_write_access() gets write permission for a file. 326 * put_write_access() releases this write permission. 327 * This is used for regular files. 328 * We cannot support write (and maybe mmap read-write shared) accesses and 329 * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode 330 * can have the following values: 331 * 0: no writers, no VM_DENYWRITE mappings 332 * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist 333 * > 0: (i_writecount) users are writing to the file. 334 * 335 * Normally we operate on that counter with atomic_{inc,dec} and it's safe 336 * except for the cases where we don't hold i_writecount yet. Then we need to 337 * use {get,deny}_write_access() - these functions check the sign and refuse 338 * to do the change if sign is wrong. Exclusion between them is provided by 339 * the inode->i_lock spinlock. 340 */ 341 342 int get_write_access(struct inode * inode) 343 { 344 spin_lock(&inode->i_lock); 345 if (atomic_read(&inode->i_writecount) < 0) { 346 spin_unlock(&inode->i_lock); 347 return -ETXTBSY; 348 } 349 atomic_inc(&inode->i_writecount); 350 spin_unlock(&inode->i_lock); 351 352 return 0; 353 } 354 355 int deny_write_access(struct file * file) 356 { 357 struct inode *inode = file->f_path.dentry->d_inode; 358 359 spin_lock(&inode->i_lock); 360 if (atomic_read(&inode->i_writecount) > 0) { 361 spin_unlock(&inode->i_lock); 362 return -ETXTBSY; 363 } 364 atomic_dec(&inode->i_writecount); 365 spin_unlock(&inode->i_lock); 366 367 return 0; 368 } 369 370 /** 371 * path_get - get a reference to a path 372 * @path: path to get the reference to 373 * 374 * Given a path increment the reference count to the dentry and the vfsmount. 375 */ 376 void path_get(struct path *path) 377 { 378 mntget(path->mnt); 379 dget(path->dentry); 380 } 381 EXPORT_SYMBOL(path_get); 382 383 /** 384 * path_put - put a reference to a path 385 * @path: path to put the reference to 386 * 387 * Given a path decrement the reference count to the dentry and the vfsmount. 388 */ 389 void path_put(struct path *path) 390 { 391 dput(path->dentry); 392 mntput(path->mnt); 393 } 394 EXPORT_SYMBOL(path_put); 395 396 /** 397 * release_open_intent - free up open intent resources 398 * @nd: pointer to nameidata 399 */ 400 void release_open_intent(struct nameidata *nd) 401 { 402 if (nd->intent.open.file->f_path.dentry == NULL) 403 put_filp(nd->intent.open.file); 404 else 405 fput(nd->intent.open.file); 406 } 407 408 static inline struct dentry * 409 do_revalidate(struct dentry *dentry, struct nameidata *nd) 410 { 411 int status = dentry->d_op->d_revalidate(dentry, nd); 412 if (unlikely(status <= 0)) { 413 /* 414 * The dentry failed validation. 415 * If d_revalidate returned 0 attempt to invalidate 416 * the dentry otherwise d_revalidate is asking us 417 * to return a fail status. 418 */ 419 if (!status) { 420 if (!d_invalidate(dentry)) { 421 dput(dentry); 422 dentry = NULL; 423 } 424 } else { 425 dput(dentry); 426 dentry = ERR_PTR(status); 427 } 428 } 429 return dentry; 430 } 431 432 /* 433 * Internal lookup() using the new generic dcache. 434 * SMP-safe 435 */ 436 static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd) 437 { 438 struct dentry * dentry = __d_lookup(parent, name); 439 440 /* lockess __d_lookup may fail due to concurrent d_move() 441 * in some unrelated directory, so try with d_lookup 442 */ 443 if (!dentry) 444 dentry = d_lookup(parent, name); 445 446 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) 447 dentry = do_revalidate(dentry, nd); 448 449 return dentry; 450 } 451 452 /* 453 * Short-cut version of permission(), for calling by 454 * path_walk(), when dcache lock is held. Combines parts 455 * of permission() and generic_permission(), and tests ONLY for 456 * MAY_EXEC permission. 457 * 458 * If appropriate, check DAC only. If not appropriate, or 459 * short-cut DAC fails, then call permission() to do more 460 * complete permission check. 461 */ 462 static int exec_permission_lite(struct inode *inode, 463 struct nameidata *nd) 464 { 465 umode_t mode = inode->i_mode; 466 467 if (inode->i_op && inode->i_op->permission) 468 return -EAGAIN; 469 470 if (current->fsuid == inode->i_uid) 471 mode >>= 6; 472 else if (in_group_p(inode->i_gid)) 473 mode >>= 3; 474 475 if (mode & MAY_EXEC) 476 goto ok; 477 478 if ((inode->i_mode & S_IXUGO) && capable(CAP_DAC_OVERRIDE)) 479 goto ok; 480 481 if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_OVERRIDE)) 482 goto ok; 483 484 if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_READ_SEARCH)) 485 goto ok; 486 487 return -EACCES; 488 ok: 489 return security_inode_permission(inode, MAY_EXEC, nd); 490 } 491 492 /* 493 * This is called when everything else fails, and we actually have 494 * to go to the low-level filesystem to find out what we should do.. 495 * 496 * We get the directory semaphore, and after getting that we also 497 * make sure that nobody added the entry to the dcache in the meantime.. 498 * SMP-safe 499 */ 500 static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd) 501 { 502 struct dentry * result; 503 struct inode *dir = parent->d_inode; 504 505 mutex_lock(&dir->i_mutex); 506 /* 507 * First re-do the cached lookup just in case it was created 508 * while we waited for the directory semaphore.. 509 * 510 * FIXME! This could use version numbering or similar to 511 * avoid unnecessary cache lookups. 512 * 513 * The "dcache_lock" is purely to protect the RCU list walker 514 * from concurrent renames at this point (we mustn't get false 515 * negatives from the RCU list walk here, unlike the optimistic 516 * fast walk). 517 * 518 * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup 519 */ 520 result = d_lookup(parent, name); 521 if (!result) { 522 struct dentry * dentry = d_alloc(parent, name); 523 result = ERR_PTR(-ENOMEM); 524 if (dentry) { 525 result = dir->i_op->lookup(dir, dentry, nd); 526 if (result) 527 dput(dentry); 528 else 529 result = dentry; 530 } 531 mutex_unlock(&dir->i_mutex); 532 return result; 533 } 534 535 /* 536 * Uhhuh! Nasty case: the cache was re-populated while 537 * we waited on the semaphore. Need to revalidate. 538 */ 539 mutex_unlock(&dir->i_mutex); 540 if (result->d_op && result->d_op->d_revalidate) { 541 result = do_revalidate(result, nd); 542 if (!result) 543 result = ERR_PTR(-ENOENT); 544 } 545 return result; 546 } 547 548 static int __emul_lookup_dentry(const char *, struct nameidata *); 549 550 /* SMP-safe */ 551 static __always_inline int 552 walk_init_root(const char *name, struct nameidata *nd) 553 { 554 struct fs_struct *fs = current->fs; 555 556 read_lock(&fs->lock); 557 if (fs->altroot.dentry && !(nd->flags & LOOKUP_NOALT)) { 558 nd->path = fs->altroot; 559 path_get(&fs->altroot); 560 read_unlock(&fs->lock); 561 if (__emul_lookup_dentry(name,nd)) 562 return 0; 563 read_lock(&fs->lock); 564 } 565 nd->path = fs->root; 566 path_get(&fs->root); 567 read_unlock(&fs->lock); 568 return 1; 569 } 570 571 /* 572 * Wrapper to retry pathname resolution whenever the underlying 573 * file system returns an ESTALE. 574 * 575 * Retry the whole path once, forcing real lookup requests 576 * instead of relying on the dcache. 577 */ 578 static __always_inline int link_path_walk(const char *name, struct nameidata *nd) 579 { 580 struct path save = nd->path; 581 int result; 582 583 /* make sure the stuff we saved doesn't go away */ 584 dget(save.dentry); 585 mntget(save.mnt); 586 587 result = __link_path_walk(name, nd); 588 if (result == -ESTALE) { 589 /* nd->path had been dropped */ 590 nd->path = save; 591 dget(nd->path.dentry); 592 mntget(nd->path.mnt); 593 nd->flags |= LOOKUP_REVAL; 594 result = __link_path_walk(name, nd); 595 } 596 597 path_put(&save); 598 599 return result; 600 } 601 602 static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) 603 { 604 int res = 0; 605 char *name; 606 if (IS_ERR(link)) 607 goto fail; 608 609 if (*link == '/') { 610 path_put(&nd->path); 611 if (!walk_init_root(link, nd)) 612 /* weird __emul_prefix() stuff did it */ 613 goto out; 614 } 615 res = link_path_walk(link, nd); 616 out: 617 if (nd->depth || res || nd->last_type!=LAST_NORM) 618 return res; 619 /* 620 * If it is an iterative symlinks resolution in open_namei() we 621 * have to copy the last component. And all that crap because of 622 * bloody create() on broken symlinks. Furrfu... 623 */ 624 name = __getname(); 625 if (unlikely(!name)) { 626 path_put(&nd->path); 627 return -ENOMEM; 628 } 629 strcpy(name, nd->last.name); 630 nd->last.name = name; 631 return 0; 632 fail: 633 path_put(&nd->path); 634 return PTR_ERR(link); 635 } 636 637 static void path_put_conditional(struct path *path, struct nameidata *nd) 638 { 639 dput(path->dentry); 640 if (path->mnt != nd->path.mnt) 641 mntput(path->mnt); 642 } 643 644 static inline void path_to_nameidata(struct path *path, struct nameidata *nd) 645 { 646 dput(nd->path.dentry); 647 if (nd->path.mnt != path->mnt) 648 mntput(nd->path.mnt); 649 nd->path.mnt = path->mnt; 650 nd->path.dentry = path->dentry; 651 } 652 653 static __always_inline int __do_follow_link(struct path *path, struct nameidata *nd) 654 { 655 int error; 656 void *cookie; 657 struct dentry *dentry = path->dentry; 658 659 touch_atime(path->mnt, dentry); 660 nd_set_link(nd, NULL); 661 662 if (path->mnt != nd->path.mnt) { 663 path_to_nameidata(path, nd); 664 dget(dentry); 665 } 666 mntget(path->mnt); 667 cookie = dentry->d_inode->i_op->follow_link(dentry, nd); 668 error = PTR_ERR(cookie); 669 if (!IS_ERR(cookie)) { 670 char *s = nd_get_link(nd); 671 error = 0; 672 if (s) 673 error = __vfs_follow_link(nd, s); 674 if (dentry->d_inode->i_op->put_link) 675 dentry->d_inode->i_op->put_link(dentry, nd, cookie); 676 } 677 path_put(path); 678 679 return error; 680 } 681 682 /* 683 * This limits recursive symlink follows to 8, while 684 * limiting consecutive symlinks to 40. 685 * 686 * Without that kind of total limit, nasty chains of consecutive 687 * symlinks can cause almost arbitrarily long lookups. 688 */ 689 static inline int do_follow_link(struct path *path, struct nameidata *nd) 690 { 691 int err = -ELOOP; 692 if (current->link_count >= MAX_NESTED_LINKS) 693 goto loop; 694 if (current->total_link_count >= 40) 695 goto loop; 696 BUG_ON(nd->depth >= MAX_NESTED_LINKS); 697 cond_resched(); 698 err = security_inode_follow_link(path->dentry, nd); 699 if (err) 700 goto loop; 701 current->link_count++; 702 current->total_link_count++; 703 nd->depth++; 704 err = __do_follow_link(path, nd); 705 current->link_count--; 706 nd->depth--; 707 return err; 708 loop: 709 path_put_conditional(path, nd); 710 path_put(&nd->path); 711 return err; 712 } 713 714 int follow_up(struct vfsmount **mnt, struct dentry **dentry) 715 { 716 struct vfsmount *parent; 717 struct dentry *mountpoint; 718 spin_lock(&vfsmount_lock); 719 parent=(*mnt)->mnt_parent; 720 if (parent == *mnt) { 721 spin_unlock(&vfsmount_lock); 722 return 0; 723 } 724 mntget(parent); 725 mountpoint=dget((*mnt)->mnt_mountpoint); 726 spin_unlock(&vfsmount_lock); 727 dput(*dentry); 728 *dentry = mountpoint; 729 mntput(*mnt); 730 *mnt = parent; 731 return 1; 732 } 733 734 /* no need for dcache_lock, as serialization is taken care in 735 * namespace.c 736 */ 737 static int __follow_mount(struct path *path) 738 { 739 int res = 0; 740 while (d_mountpoint(path->dentry)) { 741 struct vfsmount *mounted = lookup_mnt(path->mnt, path->dentry); 742 if (!mounted) 743 break; 744 dput(path->dentry); 745 if (res) 746 mntput(path->mnt); 747 path->mnt = mounted; 748 path->dentry = dget(mounted->mnt_root); 749 res = 1; 750 } 751 return res; 752 } 753 754 static void follow_mount(struct vfsmount **mnt, struct dentry **dentry) 755 { 756 while (d_mountpoint(*dentry)) { 757 struct vfsmount *mounted = lookup_mnt(*mnt, *dentry); 758 if (!mounted) 759 break; 760 dput(*dentry); 761 mntput(*mnt); 762 *mnt = mounted; 763 *dentry = dget(mounted->mnt_root); 764 } 765 } 766 767 /* no need for dcache_lock, as serialization is taken care in 768 * namespace.c 769 */ 770 int follow_down(struct vfsmount **mnt, struct dentry **dentry) 771 { 772 struct vfsmount *mounted; 773 774 mounted = lookup_mnt(*mnt, *dentry); 775 if (mounted) { 776 dput(*dentry); 777 mntput(*mnt); 778 *mnt = mounted; 779 *dentry = dget(mounted->mnt_root); 780 return 1; 781 } 782 return 0; 783 } 784 785 static __always_inline void follow_dotdot(struct nameidata *nd) 786 { 787 struct fs_struct *fs = current->fs; 788 789 while(1) { 790 struct vfsmount *parent; 791 struct dentry *old = nd->path.dentry; 792 793 read_lock(&fs->lock); 794 if (nd->path.dentry == fs->root.dentry && 795 nd->path.mnt == fs->root.mnt) { 796 read_unlock(&fs->lock); 797 break; 798 } 799 read_unlock(&fs->lock); 800 spin_lock(&dcache_lock); 801 if (nd->path.dentry != nd->path.mnt->mnt_root) { 802 nd->path.dentry = dget(nd->path.dentry->d_parent); 803 spin_unlock(&dcache_lock); 804 dput(old); 805 break; 806 } 807 spin_unlock(&dcache_lock); 808 spin_lock(&vfsmount_lock); 809 parent = nd->path.mnt->mnt_parent; 810 if (parent == nd->path.mnt) { 811 spin_unlock(&vfsmount_lock); 812 break; 813 } 814 mntget(parent); 815 nd->path.dentry = dget(nd->path.mnt->mnt_mountpoint); 816 spin_unlock(&vfsmount_lock); 817 dput(old); 818 mntput(nd->path.mnt); 819 nd->path.mnt = parent; 820 } 821 follow_mount(&nd->path.mnt, &nd->path.dentry); 822 } 823 824 /* 825 * It's more convoluted than I'd like it to be, but... it's still fairly 826 * small and for now I'd prefer to have fast path as straight as possible. 827 * It _is_ time-critical. 828 */ 829 static int do_lookup(struct nameidata *nd, struct qstr *name, 830 struct path *path) 831 { 832 struct vfsmount *mnt = nd->path.mnt; 833 struct dentry *dentry = __d_lookup(nd->path.dentry, name); 834 835 if (!dentry) 836 goto need_lookup; 837 if (dentry->d_op && dentry->d_op->d_revalidate) 838 goto need_revalidate; 839 done: 840 path->mnt = mnt; 841 path->dentry = dentry; 842 __follow_mount(path); 843 return 0; 844 845 need_lookup: 846 dentry = real_lookup(nd->path.dentry, name, nd); 847 if (IS_ERR(dentry)) 848 goto fail; 849 goto done; 850 851 need_revalidate: 852 dentry = do_revalidate(dentry, nd); 853 if (!dentry) 854 goto need_lookup; 855 if (IS_ERR(dentry)) 856 goto fail; 857 goto done; 858 859 fail: 860 return PTR_ERR(dentry); 861 } 862 863 /* 864 * Name resolution. 865 * This is the basic name resolution function, turning a pathname into 866 * the final dentry. We expect 'base' to be positive and a directory. 867 * 868 * Returns 0 and nd will have valid dentry and mnt on success. 869 * Returns error and drops reference to input namei data on failure. 870 */ 871 static int __link_path_walk(const char *name, struct nameidata *nd) 872 { 873 struct path next; 874 struct inode *inode; 875 int err; 876 unsigned int lookup_flags = nd->flags; 877 878 while (*name=='/') 879 name++; 880 if (!*name) 881 goto return_reval; 882 883 inode = nd->path.dentry->d_inode; 884 if (nd->depth) 885 lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE); 886 887 /* At this point we know we have a real path component. */ 888 for(;;) { 889 unsigned long hash; 890 struct qstr this; 891 unsigned int c; 892 893 nd->flags |= LOOKUP_CONTINUE; 894 err = exec_permission_lite(inode, nd); 895 if (err == -EAGAIN) 896 err = vfs_permission(nd, MAY_EXEC); 897 if (err) 898 break; 899 900 this.name = name; 901 c = *(const unsigned char *)name; 902 903 hash = init_name_hash(); 904 do { 905 name++; 906 hash = partial_name_hash(c, hash); 907 c = *(const unsigned char *)name; 908 } while (c && (c != '/')); 909 this.len = name - (const char *) this.name; 910 this.hash = end_name_hash(hash); 911 912 /* remove trailing slashes? */ 913 if (!c) 914 goto last_component; 915 while (*++name == '/'); 916 if (!*name) 917 goto last_with_slashes; 918 919 /* 920 * "." and ".." are special - ".." especially so because it has 921 * to be able to know about the current root directory and 922 * parent relationships. 923 */ 924 if (this.name[0] == '.') switch (this.len) { 925 default: 926 break; 927 case 2: 928 if (this.name[1] != '.') 929 break; 930 follow_dotdot(nd); 931 inode = nd->path.dentry->d_inode; 932 /* fallthrough */ 933 case 1: 934 continue; 935 } 936 /* 937 * See if the low-level filesystem might want 938 * to use its own hash.. 939 */ 940 if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) { 941 err = nd->path.dentry->d_op->d_hash(nd->path.dentry, 942 &this); 943 if (err < 0) 944 break; 945 } 946 /* This does the actual lookups.. */ 947 err = do_lookup(nd, &this, &next); 948 if (err) 949 break; 950 951 err = -ENOENT; 952 inode = next.dentry->d_inode; 953 if (!inode) 954 goto out_dput; 955 err = -ENOTDIR; 956 if (!inode->i_op) 957 goto out_dput; 958 959 if (inode->i_op->follow_link) { 960 err = do_follow_link(&next, nd); 961 if (err) 962 goto return_err; 963 err = -ENOENT; 964 inode = nd->path.dentry->d_inode; 965 if (!inode) 966 break; 967 err = -ENOTDIR; 968 if (!inode->i_op) 969 break; 970 } else 971 path_to_nameidata(&next, nd); 972 err = -ENOTDIR; 973 if (!inode->i_op->lookup) 974 break; 975 continue; 976 /* here ends the main loop */ 977 978 last_with_slashes: 979 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; 980 last_component: 981 /* Clear LOOKUP_CONTINUE iff it was previously unset */ 982 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE; 983 if (lookup_flags & LOOKUP_PARENT) 984 goto lookup_parent; 985 if (this.name[0] == '.') switch (this.len) { 986 default: 987 break; 988 case 2: 989 if (this.name[1] != '.') 990 break; 991 follow_dotdot(nd); 992 inode = nd->path.dentry->d_inode; 993 /* fallthrough */ 994 case 1: 995 goto return_reval; 996 } 997 if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) { 998 err = nd->path.dentry->d_op->d_hash(nd->path.dentry, 999 &this); 1000 if (err < 0) 1001 break; 1002 } 1003 err = do_lookup(nd, &this, &next); 1004 if (err) 1005 break; 1006 inode = next.dentry->d_inode; 1007 if ((lookup_flags & LOOKUP_FOLLOW) 1008 && inode && inode->i_op && inode->i_op->follow_link) { 1009 err = do_follow_link(&next, nd); 1010 if (err) 1011 goto return_err; 1012 inode = nd->path.dentry->d_inode; 1013 } else 1014 path_to_nameidata(&next, nd); 1015 err = -ENOENT; 1016 if (!inode) 1017 break; 1018 if (lookup_flags & LOOKUP_DIRECTORY) { 1019 err = -ENOTDIR; 1020 if (!inode->i_op || !inode->i_op->lookup) 1021 break; 1022 } 1023 goto return_base; 1024 lookup_parent: 1025 nd->last = this; 1026 nd->last_type = LAST_NORM; 1027 if (this.name[0] != '.') 1028 goto return_base; 1029 if (this.len == 1) 1030 nd->last_type = LAST_DOT; 1031 else if (this.len == 2 && this.name[1] == '.') 1032 nd->last_type = LAST_DOTDOT; 1033 else 1034 goto return_base; 1035 return_reval: 1036 /* 1037 * We bypassed the ordinary revalidation routines. 1038 * We may need to check the cached dentry for staleness. 1039 */ 1040 if (nd->path.dentry && nd->path.dentry->d_sb && 1041 (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) { 1042 err = -ESTALE; 1043 /* Note: we do not d_invalidate() */ 1044 if (!nd->path.dentry->d_op->d_revalidate( 1045 nd->path.dentry, nd)) 1046 break; 1047 } 1048 return_base: 1049 return 0; 1050 out_dput: 1051 path_put_conditional(&next, nd); 1052 break; 1053 } 1054 path_put(&nd->path); 1055 return_err: 1056 return err; 1057 } 1058 1059 static int path_walk(const char *name, struct nameidata *nd) 1060 { 1061 current->total_link_count = 0; 1062 return link_path_walk(name, nd); 1063 } 1064 1065 /* 1066 * SMP-safe: Returns 1 and nd will have valid dentry and mnt, if 1067 * everything is done. Returns 0 and drops input nd, if lookup failed; 1068 */ 1069 static int __emul_lookup_dentry(const char *name, struct nameidata *nd) 1070 { 1071 if (path_walk(name, nd)) 1072 return 0; /* something went wrong... */ 1073 1074 if (!nd->path.dentry->d_inode || 1075 S_ISDIR(nd->path.dentry->d_inode->i_mode)) { 1076 struct path old_path = nd->path; 1077 struct qstr last = nd->last; 1078 int last_type = nd->last_type; 1079 struct fs_struct *fs = current->fs; 1080 1081 /* 1082 * NAME was not found in alternate root or it's a directory. 1083 * Try to find it in the normal root: 1084 */ 1085 nd->last_type = LAST_ROOT; 1086 read_lock(&fs->lock); 1087 nd->path = fs->root; 1088 path_get(&fs->root); 1089 read_unlock(&fs->lock); 1090 if (path_walk(name, nd) == 0) { 1091 if (nd->path.dentry->d_inode) { 1092 path_put(&old_path); 1093 return 1; 1094 } 1095 path_put(&nd->path); 1096 } 1097 nd->path = old_path; 1098 nd->last = last; 1099 nd->last_type = last_type; 1100 } 1101 return 1; 1102 } 1103 1104 void set_fs_altroot(void) 1105 { 1106 char *emul = __emul_prefix(); 1107 struct nameidata nd; 1108 struct path path = {}, old_path; 1109 int err; 1110 struct fs_struct *fs = current->fs; 1111 1112 if (!emul) 1113 goto set_it; 1114 err = path_lookup(emul, LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_NOALT, &nd); 1115 if (!err) 1116 path = nd.path; 1117 set_it: 1118 write_lock(&fs->lock); 1119 old_path = fs->altroot; 1120 fs->altroot = path; 1121 write_unlock(&fs->lock); 1122 if (old_path.dentry) 1123 path_put(&old_path); 1124 } 1125 1126 /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ 1127 static int do_path_lookup(int dfd, const char *name, 1128 unsigned int flags, struct nameidata *nd) 1129 { 1130 int retval = 0; 1131 int fput_needed; 1132 struct file *file; 1133 struct fs_struct *fs = current->fs; 1134 1135 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1136 nd->flags = flags; 1137 nd->depth = 0; 1138 1139 if (*name=='/') { 1140 read_lock(&fs->lock); 1141 if (fs->altroot.dentry && !(nd->flags & LOOKUP_NOALT)) { 1142 nd->path = fs->altroot; 1143 path_get(&fs->altroot); 1144 read_unlock(&fs->lock); 1145 if (__emul_lookup_dentry(name,nd)) 1146 goto out; /* found in altroot */ 1147 read_lock(&fs->lock); 1148 } 1149 nd->path = fs->root; 1150 path_get(&fs->root); 1151 read_unlock(&fs->lock); 1152 } else if (dfd == AT_FDCWD) { 1153 read_lock(&fs->lock); 1154 nd->path = fs->pwd; 1155 path_get(&fs->pwd); 1156 read_unlock(&fs->lock); 1157 } else { 1158 struct dentry *dentry; 1159 1160 file = fget_light(dfd, &fput_needed); 1161 retval = -EBADF; 1162 if (!file) 1163 goto out_fail; 1164 1165 dentry = file->f_path.dentry; 1166 1167 retval = -ENOTDIR; 1168 if (!S_ISDIR(dentry->d_inode->i_mode)) 1169 goto fput_fail; 1170 1171 retval = file_permission(file, MAY_EXEC); 1172 if (retval) 1173 goto fput_fail; 1174 1175 nd->path = file->f_path; 1176 path_get(&file->f_path); 1177 1178 fput_light(file, fput_needed); 1179 } 1180 1181 retval = path_walk(name, nd); 1182 out: 1183 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && 1184 nd->path.dentry->d_inode)) 1185 audit_inode(name, nd->path.dentry); 1186 out_fail: 1187 return retval; 1188 1189 fput_fail: 1190 fput_light(file, fput_needed); 1191 goto out_fail; 1192 } 1193 1194 int path_lookup(const char *name, unsigned int flags, 1195 struct nameidata *nd) 1196 { 1197 return do_path_lookup(AT_FDCWD, name, flags, nd); 1198 } 1199 1200 /** 1201 * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair 1202 * @dentry: pointer to dentry of the base directory 1203 * @mnt: pointer to vfs mount of the base directory 1204 * @name: pointer to file name 1205 * @flags: lookup flags 1206 * @nd: pointer to nameidata 1207 */ 1208 int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, 1209 const char *name, unsigned int flags, 1210 struct nameidata *nd) 1211 { 1212 int retval; 1213 1214 /* same as do_path_lookup */ 1215 nd->last_type = LAST_ROOT; 1216 nd->flags = flags; 1217 nd->depth = 0; 1218 1219 nd->path.mnt = mntget(mnt); 1220 nd->path.dentry = dget(dentry); 1221 1222 retval = path_walk(name, nd); 1223 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && 1224 nd->path.dentry->d_inode)) 1225 audit_inode(name, nd->path.dentry); 1226 1227 return retval; 1228 1229 } 1230 1231 static int __path_lookup_intent_open(int dfd, const char *name, 1232 unsigned int lookup_flags, struct nameidata *nd, 1233 int open_flags, int create_mode) 1234 { 1235 struct file *filp = get_empty_filp(); 1236 int err; 1237 1238 if (filp == NULL) 1239 return -ENFILE; 1240 nd->intent.open.file = filp; 1241 nd->intent.open.flags = open_flags; 1242 nd->intent.open.create_mode = create_mode; 1243 err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd); 1244 if (IS_ERR(nd->intent.open.file)) { 1245 if (err == 0) { 1246 err = PTR_ERR(nd->intent.open.file); 1247 path_put(&nd->path); 1248 } 1249 } else if (err != 0) 1250 release_open_intent(nd); 1251 return err; 1252 } 1253 1254 /** 1255 * path_lookup_open - lookup a file path with open intent 1256 * @dfd: the directory to use as base, or AT_FDCWD 1257 * @name: pointer to file name 1258 * @lookup_flags: lookup intent flags 1259 * @nd: pointer to nameidata 1260 * @open_flags: open intent flags 1261 */ 1262 int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags, 1263 struct nameidata *nd, int open_flags) 1264 { 1265 return __path_lookup_intent_open(dfd, name, lookup_flags, nd, 1266 open_flags, 0); 1267 } 1268 1269 /** 1270 * path_lookup_create - lookup a file path with open + create intent 1271 * @dfd: the directory to use as base, or AT_FDCWD 1272 * @name: pointer to file name 1273 * @lookup_flags: lookup intent flags 1274 * @nd: pointer to nameidata 1275 * @open_flags: open intent flags 1276 * @create_mode: create intent flags 1277 */ 1278 static int path_lookup_create(int dfd, const char *name, 1279 unsigned int lookup_flags, struct nameidata *nd, 1280 int open_flags, int create_mode) 1281 { 1282 return __path_lookup_intent_open(dfd, name, lookup_flags|LOOKUP_CREATE, 1283 nd, open_flags, create_mode); 1284 } 1285 1286 int __user_path_lookup_open(const char __user *name, unsigned int lookup_flags, 1287 struct nameidata *nd, int open_flags) 1288 { 1289 char *tmp = getname(name); 1290 int err = PTR_ERR(tmp); 1291 1292 if (!IS_ERR(tmp)) { 1293 err = __path_lookup_intent_open(AT_FDCWD, tmp, lookup_flags, nd, open_flags, 0); 1294 putname(tmp); 1295 } 1296 return err; 1297 } 1298 1299 static struct dentry *__lookup_hash(struct qstr *name, 1300 struct dentry *base, struct nameidata *nd) 1301 { 1302 struct dentry *dentry; 1303 struct inode *inode; 1304 int err; 1305 1306 inode = base->d_inode; 1307 1308 /* 1309 * See if the low-level filesystem might want 1310 * to use its own hash.. 1311 */ 1312 if (base->d_op && base->d_op->d_hash) { 1313 err = base->d_op->d_hash(base, name); 1314 dentry = ERR_PTR(err); 1315 if (err < 0) 1316 goto out; 1317 } 1318 1319 dentry = cached_lookup(base, name, nd); 1320 if (!dentry) { 1321 struct dentry *new = d_alloc(base, name); 1322 dentry = ERR_PTR(-ENOMEM); 1323 if (!new) 1324 goto out; 1325 dentry = inode->i_op->lookup(inode, new, nd); 1326 if (!dentry) 1327 dentry = new; 1328 else 1329 dput(new); 1330 } 1331 out: 1332 return dentry; 1333 } 1334 1335 /* 1336 * Restricted form of lookup. Doesn't follow links, single-component only, 1337 * needs parent already locked. Doesn't follow mounts. 1338 * SMP-safe. 1339 */ 1340 static struct dentry *lookup_hash(struct nameidata *nd) 1341 { 1342 int err; 1343 1344 err = permission(nd->path.dentry->d_inode, MAY_EXEC, nd); 1345 if (err) 1346 return ERR_PTR(err); 1347 return __lookup_hash(&nd->last, nd->path.dentry, nd); 1348 } 1349 1350 static int __lookup_one_len(const char *name, struct qstr *this, 1351 struct dentry *base, int len) 1352 { 1353 unsigned long hash; 1354 unsigned int c; 1355 1356 this->name = name; 1357 this->len = len; 1358 if (!len) 1359 return -EACCES; 1360 1361 hash = init_name_hash(); 1362 while (len--) { 1363 c = *(const unsigned char *)name++; 1364 if (c == '/' || c == '\0') 1365 return -EACCES; 1366 hash = partial_name_hash(c, hash); 1367 } 1368 this->hash = end_name_hash(hash); 1369 return 0; 1370 } 1371 1372 /** 1373 * lookup_one_len - filesystem helper to lookup single pathname component 1374 * @name: pathname component to lookup 1375 * @base: base directory to lookup from 1376 * @len: maximum length @len should be interpreted to 1377 * 1378 * Note that this routine is purely a helper for filesystem usage and should 1379 * not be called by generic code. Also note that by using this function the 1380 * nameidata argument is passed to the filesystem methods and a filesystem 1381 * using this helper needs to be prepared for that. 1382 */ 1383 struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) 1384 { 1385 int err; 1386 struct qstr this; 1387 1388 err = __lookup_one_len(name, &this, base, len); 1389 if (err) 1390 return ERR_PTR(err); 1391 1392 err = permission(base->d_inode, MAY_EXEC, NULL); 1393 if (err) 1394 return ERR_PTR(err); 1395 return __lookup_hash(&this, base, NULL); 1396 } 1397 1398 /** 1399 * lookup_one_noperm - bad hack for sysfs 1400 * @name: pathname component to lookup 1401 * @base: base directory to lookup from 1402 * 1403 * This is a variant of lookup_one_len that doesn't perform any permission 1404 * checks. It's a horrible hack to work around the braindead sysfs 1405 * architecture and should not be used anywhere else. 1406 * 1407 * DON'T USE THIS FUNCTION EVER, thanks. 1408 */ 1409 struct dentry *lookup_one_noperm(const char *name, struct dentry *base) 1410 { 1411 int err; 1412 struct qstr this; 1413 1414 err = __lookup_one_len(name, &this, base, strlen(name)); 1415 if (err) 1416 return ERR_PTR(err); 1417 return __lookup_hash(&this, base, NULL); 1418 } 1419 1420 int __user_walk_fd(int dfd, const char __user *name, unsigned flags, 1421 struct nameidata *nd) 1422 { 1423 char *tmp = getname(name); 1424 int err = PTR_ERR(tmp); 1425 1426 if (!IS_ERR(tmp)) { 1427 err = do_path_lookup(dfd, tmp, flags, nd); 1428 putname(tmp); 1429 } 1430 return err; 1431 } 1432 1433 int __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) 1434 { 1435 return __user_walk_fd(AT_FDCWD, name, flags, nd); 1436 } 1437 1438 /* 1439 * It's inline, so penalty for filesystems that don't use sticky bit is 1440 * minimal. 1441 */ 1442 static inline int check_sticky(struct inode *dir, struct inode *inode) 1443 { 1444 if (!(dir->i_mode & S_ISVTX)) 1445 return 0; 1446 if (inode->i_uid == current->fsuid) 1447 return 0; 1448 if (dir->i_uid == current->fsuid) 1449 return 0; 1450 return !capable(CAP_FOWNER); 1451 } 1452 1453 /* 1454 * Check whether we can remove a link victim from directory dir, check 1455 * whether the type of victim is right. 1456 * 1. We can't do it if dir is read-only (done in permission()) 1457 * 2. We should have write and exec permissions on dir 1458 * 3. We can't remove anything from append-only dir 1459 * 4. We can't do anything with immutable dir (done in permission()) 1460 * 5. If the sticky bit on dir is set we should either 1461 * a. be owner of dir, or 1462 * b. be owner of victim, or 1463 * c. have CAP_FOWNER capability 1464 * 6. If the victim is append-only or immutable we can't do antyhing with 1465 * links pointing to it. 1466 * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR. 1467 * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR. 1468 * 9. We can't remove a root or mountpoint. 1469 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by 1470 * nfs_async_unlink(). 1471 */ 1472 static int may_delete(struct inode *dir,struct dentry *victim,int isdir) 1473 { 1474 int error; 1475 1476 if (!victim->d_inode) 1477 return -ENOENT; 1478 1479 BUG_ON(victim->d_parent->d_inode != dir); 1480 audit_inode_child(victim->d_name.name, victim, dir); 1481 1482 error = permission(dir,MAY_WRITE | MAY_EXEC, NULL); 1483 if (error) 1484 return error; 1485 if (IS_APPEND(dir)) 1486 return -EPERM; 1487 if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| 1488 IS_IMMUTABLE(victim->d_inode)) 1489 return -EPERM; 1490 if (isdir) { 1491 if (!S_ISDIR(victim->d_inode->i_mode)) 1492 return -ENOTDIR; 1493 if (IS_ROOT(victim)) 1494 return -EBUSY; 1495 } else if (S_ISDIR(victim->d_inode->i_mode)) 1496 return -EISDIR; 1497 if (IS_DEADDIR(dir)) 1498 return -ENOENT; 1499 if (victim->d_flags & DCACHE_NFSFS_RENAMED) 1500 return -EBUSY; 1501 return 0; 1502 } 1503 1504 /* Check whether we can create an object with dentry child in directory 1505 * dir. 1506 * 1. We can't do it if child already exists (open has special treatment for 1507 * this case, but since we are inlined it's OK) 1508 * 2. We can't do it if dir is read-only (done in permission()) 1509 * 3. We should have write and exec permissions on dir 1510 * 4. We can't do it if dir is immutable (done in permission()) 1511 */ 1512 static inline int may_create(struct inode *dir, struct dentry *child, 1513 struct nameidata *nd) 1514 { 1515 if (child->d_inode) 1516 return -EEXIST; 1517 if (IS_DEADDIR(dir)) 1518 return -ENOENT; 1519 return permission(dir,MAY_WRITE | MAY_EXEC, nd); 1520 } 1521 1522 /* 1523 * O_DIRECTORY translates into forcing a directory lookup. 1524 */ 1525 static inline int lookup_flags(unsigned int f) 1526 { 1527 unsigned long retval = LOOKUP_FOLLOW; 1528 1529 if (f & O_NOFOLLOW) 1530 retval &= ~LOOKUP_FOLLOW; 1531 1532 if (f & O_DIRECTORY) 1533 retval |= LOOKUP_DIRECTORY; 1534 1535 return retval; 1536 } 1537 1538 /* 1539 * p1 and p2 should be directories on the same fs. 1540 */ 1541 struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) 1542 { 1543 struct dentry *p; 1544 1545 if (p1 == p2) { 1546 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); 1547 return NULL; 1548 } 1549 1550 mutex_lock(&p1->d_inode->i_sb->s_vfs_rename_mutex); 1551 1552 for (p = p1; p->d_parent != p; p = p->d_parent) { 1553 if (p->d_parent == p2) { 1554 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT); 1555 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD); 1556 return p; 1557 } 1558 } 1559 1560 for (p = p2; p->d_parent != p; p = p->d_parent) { 1561 if (p->d_parent == p1) { 1562 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); 1563 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); 1564 return p; 1565 } 1566 } 1567 1568 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); 1569 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); 1570 return NULL; 1571 } 1572 1573 void unlock_rename(struct dentry *p1, struct dentry *p2) 1574 { 1575 mutex_unlock(&p1->d_inode->i_mutex); 1576 if (p1 != p2) { 1577 mutex_unlock(&p2->d_inode->i_mutex); 1578 mutex_unlock(&p1->d_inode->i_sb->s_vfs_rename_mutex); 1579 } 1580 } 1581 1582 int vfs_create(struct inode *dir, struct dentry *dentry, int mode, 1583 struct nameidata *nd) 1584 { 1585 int error = may_create(dir, dentry, nd); 1586 1587 if (error) 1588 return error; 1589 1590 if (!dir->i_op || !dir->i_op->create) 1591 return -EACCES; /* shouldn't it be ENOSYS? */ 1592 mode &= S_IALLUGO; 1593 mode |= S_IFREG; 1594 error = security_inode_create(dir, dentry, mode); 1595 if (error) 1596 return error; 1597 DQUOT_INIT(dir); 1598 error = dir->i_op->create(dir, dentry, mode, nd); 1599 if (!error) 1600 fsnotify_create(dir, dentry); 1601 return error; 1602 } 1603 1604 int may_open(struct nameidata *nd, int acc_mode, int flag) 1605 { 1606 struct dentry *dentry = nd->path.dentry; 1607 struct inode *inode = dentry->d_inode; 1608 int error; 1609 1610 if (!inode) 1611 return -ENOENT; 1612 1613 if (S_ISLNK(inode->i_mode)) 1614 return -ELOOP; 1615 1616 if (S_ISDIR(inode->i_mode) && (acc_mode & MAY_WRITE)) 1617 return -EISDIR; 1618 1619 /* 1620 * FIFO's, sockets and device files are special: they don't 1621 * actually live on the filesystem itself, and as such you 1622 * can write to them even if the filesystem is read-only. 1623 */ 1624 if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { 1625 flag &= ~O_TRUNC; 1626 } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) { 1627 if (nd->path.mnt->mnt_flags & MNT_NODEV) 1628 return -EACCES; 1629 1630 flag &= ~O_TRUNC; 1631 } 1632 1633 error = vfs_permission(nd, acc_mode); 1634 if (error) 1635 return error; 1636 /* 1637 * An append-only file must be opened in append mode for writing. 1638 */ 1639 if (IS_APPEND(inode)) { 1640 if ((flag & FMODE_WRITE) && !(flag & O_APPEND)) 1641 return -EPERM; 1642 if (flag & O_TRUNC) 1643 return -EPERM; 1644 } 1645 1646 /* O_NOATIME can only be set by the owner or superuser */ 1647 if (flag & O_NOATIME) 1648 if (!is_owner_or_cap(inode)) 1649 return -EPERM; 1650 1651 /* 1652 * Ensure there are no outstanding leases on the file. 1653 */ 1654 error = break_lease(inode, flag); 1655 if (error) 1656 return error; 1657 1658 if (flag & O_TRUNC) { 1659 error = get_write_access(inode); 1660 if (error) 1661 return error; 1662 1663 /* 1664 * Refuse to truncate files with mandatory locks held on them. 1665 */ 1666 error = locks_verify_locked(inode); 1667 if (!error) { 1668 DQUOT_INIT(inode); 1669 1670 error = do_truncate(dentry, 0, 1671 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN, 1672 NULL); 1673 } 1674 put_write_access(inode); 1675 if (error) 1676 return error; 1677 } else 1678 if (flag & FMODE_WRITE) 1679 DQUOT_INIT(inode); 1680 1681 return 0; 1682 } 1683 1684 /* 1685 * Be careful about ever adding any more callers of this 1686 * function. Its flags must be in the namei format, not 1687 * what get passed to sys_open(). 1688 */ 1689 static int __open_namei_create(struct nameidata *nd, struct path *path, 1690 int flag, int mode) 1691 { 1692 int error; 1693 struct dentry *dir = nd->path.dentry; 1694 1695 if (!IS_POSIXACL(dir->d_inode)) 1696 mode &= ~current->fs->umask; 1697 error = vfs_create(dir->d_inode, path->dentry, mode, nd); 1698 mutex_unlock(&dir->d_inode->i_mutex); 1699 dput(nd->path.dentry); 1700 nd->path.dentry = path->dentry; 1701 if (error) 1702 return error; 1703 /* Don't check for write permission, don't truncate */ 1704 return may_open(nd, 0, flag & ~O_TRUNC); 1705 } 1706 1707 /* 1708 * Note that while the flag value (low two bits) for sys_open means: 1709 * 00 - read-only 1710 * 01 - write-only 1711 * 10 - read-write 1712 * 11 - special 1713 * it is changed into 1714 * 00 - no permissions needed 1715 * 01 - read-permission 1716 * 10 - write-permission 1717 * 11 - read-write 1718 * for the internal routines (ie open_namei()/follow_link() etc) 1719 * This is more logical, and also allows the 00 "no perm needed" 1720 * to be used for symlinks (where the permissions are checked 1721 * later). 1722 * 1723 */ 1724 static inline int open_to_namei_flags(int flag) 1725 { 1726 if ((flag+1) & O_ACCMODE) 1727 flag++; 1728 return flag; 1729 } 1730 1731 static int open_will_write_to_fs(int flag, struct inode *inode) 1732 { 1733 /* 1734 * We'll never write to the fs underlying 1735 * a device file. 1736 */ 1737 if (special_file(inode->i_mode)) 1738 return 0; 1739 return (flag & O_TRUNC); 1740 } 1741 1742 /* 1743 * Note that the low bits of the passed in "open_flag" 1744 * are not the same as in the local variable "flag". See 1745 * open_to_namei_flags() for more details. 1746 */ 1747 struct file *do_filp_open(int dfd, const char *pathname, 1748 int open_flag, int mode) 1749 { 1750 struct file *filp; 1751 struct nameidata nd; 1752 int acc_mode, error; 1753 struct path path; 1754 struct dentry *dir; 1755 int count = 0; 1756 int will_write; 1757 int flag = open_to_namei_flags(open_flag); 1758 1759 acc_mode = ACC_MODE(flag); 1760 1761 /* O_TRUNC implies we need access checks for write permissions */ 1762 if (flag & O_TRUNC) 1763 acc_mode |= MAY_WRITE; 1764 1765 /* Allow the LSM permission hook to distinguish append 1766 access from general write access. */ 1767 if (flag & O_APPEND) 1768 acc_mode |= MAY_APPEND; 1769 1770 /* 1771 * The simplest case - just a plain lookup. 1772 */ 1773 if (!(flag & O_CREAT)) { 1774 error = path_lookup_open(dfd, pathname, lookup_flags(flag), 1775 &nd, flag); 1776 if (error) 1777 return ERR_PTR(error); 1778 goto ok; 1779 } 1780 1781 /* 1782 * Create - we need to know the parent. 1783 */ 1784 error = path_lookup_create(dfd, pathname, LOOKUP_PARENT, 1785 &nd, flag, mode); 1786 if (error) 1787 return ERR_PTR(error); 1788 1789 /* 1790 * We have the parent and last component. First of all, check 1791 * that we are not asked to creat(2) an obvious directory - that 1792 * will not do. 1793 */ 1794 error = -EISDIR; 1795 if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len]) 1796 goto exit; 1797 1798 dir = nd.path.dentry; 1799 nd.flags &= ~LOOKUP_PARENT; 1800 mutex_lock(&dir->d_inode->i_mutex); 1801 path.dentry = lookup_hash(&nd); 1802 path.mnt = nd.path.mnt; 1803 1804 do_last: 1805 error = PTR_ERR(path.dentry); 1806 if (IS_ERR(path.dentry)) { 1807 mutex_unlock(&dir->d_inode->i_mutex); 1808 goto exit; 1809 } 1810 1811 if (IS_ERR(nd.intent.open.file)) { 1812 error = PTR_ERR(nd.intent.open.file); 1813 goto exit_mutex_unlock; 1814 } 1815 1816 /* Negative dentry, just create the file */ 1817 if (!path.dentry->d_inode) { 1818 /* 1819 * This write is needed to ensure that a 1820 * ro->rw transition does not occur between 1821 * the time when the file is created and when 1822 * a permanent write count is taken through 1823 * the 'struct file' in nameidata_to_filp(). 1824 */ 1825 error = mnt_want_write(nd.path.mnt); 1826 if (error) 1827 goto exit_mutex_unlock; 1828 error = __open_namei_create(&nd, &path, flag, mode); 1829 if (error) { 1830 mnt_drop_write(nd.path.mnt); 1831 goto exit; 1832 } 1833 filp = nameidata_to_filp(&nd, open_flag); 1834 mnt_drop_write(nd.path.mnt); 1835 return filp; 1836 } 1837 1838 /* 1839 * It already exists. 1840 */ 1841 mutex_unlock(&dir->d_inode->i_mutex); 1842 audit_inode(pathname, path.dentry); 1843 1844 error = -EEXIST; 1845 if (flag & O_EXCL) 1846 goto exit_dput; 1847 1848 if (__follow_mount(&path)) { 1849 error = -ELOOP; 1850 if (flag & O_NOFOLLOW) 1851 goto exit_dput; 1852 } 1853 1854 error = -ENOENT; 1855 if (!path.dentry->d_inode) 1856 goto exit_dput; 1857 if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link) 1858 goto do_link; 1859 1860 path_to_nameidata(&path, &nd); 1861 error = -EISDIR; 1862 if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) 1863 goto exit; 1864 ok: 1865 /* 1866 * Consider: 1867 * 1. may_open() truncates a file 1868 * 2. a rw->ro mount transition occurs 1869 * 3. nameidata_to_filp() fails due to 1870 * the ro mount. 1871 * That would be inconsistent, and should 1872 * be avoided. Taking this mnt write here 1873 * ensures that (2) can not occur. 1874 */ 1875 will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode); 1876 if (will_write) { 1877 error = mnt_want_write(nd.path.mnt); 1878 if (error) 1879 goto exit; 1880 } 1881 error = may_open(&nd, acc_mode, flag); 1882 if (error) { 1883 if (will_write) 1884 mnt_drop_write(nd.path.mnt); 1885 goto exit; 1886 } 1887 filp = nameidata_to_filp(&nd, open_flag); 1888 /* 1889 * It is now safe to drop the mnt write 1890 * because the filp has had a write taken 1891 * on its behalf. 1892 */ 1893 if (will_write) 1894 mnt_drop_write(nd.path.mnt); 1895 return filp; 1896 1897 exit_mutex_unlock: 1898 mutex_unlock(&dir->d_inode->i_mutex); 1899 exit_dput: 1900 path_put_conditional(&path, &nd); 1901 exit: 1902 if (!IS_ERR(nd.intent.open.file)) 1903 release_open_intent(&nd); 1904 path_put(&nd.path); 1905 return ERR_PTR(error); 1906 1907 do_link: 1908 error = -ELOOP; 1909 if (flag & O_NOFOLLOW) 1910 goto exit_dput; 1911 /* 1912 * This is subtle. Instead of calling do_follow_link() we do the 1913 * thing by hands. The reason is that this way we have zero link_count 1914 * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT. 1915 * After that we have the parent and last component, i.e. 1916 * we are in the same situation as after the first path_walk(). 1917 * Well, almost - if the last component is normal we get its copy 1918 * stored in nd->last.name and we will have to putname() it when we 1919 * are done. Procfs-like symlinks just set LAST_BIND. 1920 */ 1921 nd.flags |= LOOKUP_PARENT; 1922 error = security_inode_follow_link(path.dentry, &nd); 1923 if (error) 1924 goto exit_dput; 1925 error = __do_follow_link(&path, &nd); 1926 if (error) { 1927 /* Does someone understand code flow here? Or it is only 1928 * me so stupid? Anathema to whoever designed this non-sense 1929 * with "intent.open". 1930 */ 1931 release_open_intent(&nd); 1932 return ERR_PTR(error); 1933 } 1934 nd.flags &= ~LOOKUP_PARENT; 1935 if (nd.last_type == LAST_BIND) 1936 goto ok; 1937 error = -EISDIR; 1938 if (nd.last_type != LAST_NORM) 1939 goto exit; 1940 if (nd.last.name[nd.last.len]) { 1941 __putname(nd.last.name); 1942 goto exit; 1943 } 1944 error = -ELOOP; 1945 if (count++==32) { 1946 __putname(nd.last.name); 1947 goto exit; 1948 } 1949 dir = nd.path.dentry; 1950 mutex_lock(&dir->d_inode->i_mutex); 1951 path.dentry = lookup_hash(&nd); 1952 path.mnt = nd.path.mnt; 1953 __putname(nd.last.name); 1954 goto do_last; 1955 } 1956 1957 /** 1958 * filp_open - open file and return file pointer 1959 * 1960 * @filename: path to open 1961 * @flags: open flags as per the open(2) second argument 1962 * @mode: mode for the new file if O_CREAT is set, else ignored 1963 * 1964 * This is the helper to open a file from kernelspace if you really 1965 * have to. But in generally you should not do this, so please move 1966 * along, nothing to see here.. 1967 */ 1968 struct file *filp_open(const char *filename, int flags, int mode) 1969 { 1970 return do_filp_open(AT_FDCWD, filename, flags, mode); 1971 } 1972 EXPORT_SYMBOL(filp_open); 1973 1974 /** 1975 * lookup_create - lookup a dentry, creating it if it doesn't exist 1976 * @nd: nameidata info 1977 * @is_dir: directory flag 1978 * 1979 * Simple function to lookup and return a dentry and create it 1980 * if it doesn't exist. Is SMP-safe. 1981 * 1982 * Returns with nd->path.dentry->d_inode->i_mutex locked. 1983 */ 1984 struct dentry *lookup_create(struct nameidata *nd, int is_dir) 1985 { 1986 struct dentry *dentry = ERR_PTR(-EEXIST); 1987 1988 mutex_lock_nested(&nd->path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 1989 /* 1990 * Yucky last component or no last component at all? 1991 * (foo/., foo/.., /////) 1992 */ 1993 if (nd->last_type != LAST_NORM) 1994 goto fail; 1995 nd->flags &= ~LOOKUP_PARENT; 1996 nd->flags |= LOOKUP_CREATE; 1997 nd->intent.open.flags = O_EXCL; 1998 1999 /* 2000 * Do the final lookup. 2001 */ 2002 dentry = lookup_hash(nd); 2003 if (IS_ERR(dentry)) 2004 goto fail; 2005 2006 if (dentry->d_inode) 2007 goto eexist; 2008 /* 2009 * Special case - lookup gave negative, but... we had foo/bar/ 2010 * From the vfs_mknod() POV we just have a negative dentry - 2011 * all is fine. Let's be bastards - you had / on the end, you've 2012 * been asking for (non-existent) directory. -ENOENT for you. 2013 */ 2014 if (unlikely(!is_dir && nd->last.name[nd->last.len])) { 2015 dput(dentry); 2016 dentry = ERR_PTR(-ENOENT); 2017 } 2018 return dentry; 2019 eexist: 2020 dput(dentry); 2021 dentry = ERR_PTR(-EEXIST); 2022 fail: 2023 return dentry; 2024 } 2025 EXPORT_SYMBOL_GPL(lookup_create); 2026 2027 int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) 2028 { 2029 int error = may_create(dir, dentry, NULL); 2030 2031 if (error) 2032 return error; 2033 2034 if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) 2035 return -EPERM; 2036 2037 if (!dir->i_op || !dir->i_op->mknod) 2038 return -EPERM; 2039 2040 error = devcgroup_inode_mknod(mode, dev); 2041 if (error) 2042 return error; 2043 2044 error = security_inode_mknod(dir, dentry, mode, dev); 2045 if (error) 2046 return error; 2047 2048 DQUOT_INIT(dir); 2049 error = dir->i_op->mknod(dir, dentry, mode, dev); 2050 if (!error) 2051 fsnotify_create(dir, dentry); 2052 return error; 2053 } 2054 2055 static int may_mknod(mode_t mode) 2056 { 2057 switch (mode & S_IFMT) { 2058 case S_IFREG: 2059 case S_IFCHR: 2060 case S_IFBLK: 2061 case S_IFIFO: 2062 case S_IFSOCK: 2063 case 0: /* zero mode translates to S_IFREG */ 2064 return 0; 2065 case S_IFDIR: 2066 return -EPERM; 2067 default: 2068 return -EINVAL; 2069 } 2070 } 2071 2072 asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode, 2073 unsigned dev) 2074 { 2075 int error = 0; 2076 char * tmp; 2077 struct dentry * dentry; 2078 struct nameidata nd; 2079 2080 if (S_ISDIR(mode)) 2081 return -EPERM; 2082 tmp = getname(filename); 2083 if (IS_ERR(tmp)) 2084 return PTR_ERR(tmp); 2085 2086 error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd); 2087 if (error) 2088 goto out; 2089 dentry = lookup_create(&nd, 0); 2090 if (IS_ERR(dentry)) { 2091 error = PTR_ERR(dentry); 2092 goto out_unlock; 2093 } 2094 if (!IS_POSIXACL(nd.path.dentry->d_inode)) 2095 mode &= ~current->fs->umask; 2096 error = may_mknod(mode); 2097 if (error) 2098 goto out_dput; 2099 error = mnt_want_write(nd.path.mnt); 2100 if (error) 2101 goto out_dput; 2102 switch (mode & S_IFMT) { 2103 case 0: case S_IFREG: 2104 error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd); 2105 break; 2106 case S_IFCHR: case S_IFBLK: 2107 error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode, 2108 new_decode_dev(dev)); 2109 break; 2110 case S_IFIFO: case S_IFSOCK: 2111 error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0); 2112 break; 2113 } 2114 mnt_drop_write(nd.path.mnt); 2115 out_dput: 2116 dput(dentry); 2117 out_unlock: 2118 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2119 path_put(&nd.path); 2120 out: 2121 putname(tmp); 2122 2123 return error; 2124 } 2125 2126 asmlinkage long sys_mknod(const char __user *filename, int mode, unsigned dev) 2127 { 2128 return sys_mknodat(AT_FDCWD, filename, mode, dev); 2129 } 2130 2131 int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 2132 { 2133 int error = may_create(dir, dentry, NULL); 2134 2135 if (error) 2136 return error; 2137 2138 if (!dir->i_op || !dir->i_op->mkdir) 2139 return -EPERM; 2140 2141 mode &= (S_IRWXUGO|S_ISVTX); 2142 error = security_inode_mkdir(dir, dentry, mode); 2143 if (error) 2144 return error; 2145 2146 DQUOT_INIT(dir); 2147 error = dir->i_op->mkdir(dir, dentry, mode); 2148 if (!error) 2149 fsnotify_mkdir(dir, dentry); 2150 return error; 2151 } 2152 2153 asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode) 2154 { 2155 int error = 0; 2156 char * tmp; 2157 struct dentry *dentry; 2158 struct nameidata nd; 2159 2160 tmp = getname(pathname); 2161 error = PTR_ERR(tmp); 2162 if (IS_ERR(tmp)) 2163 goto out_err; 2164 2165 error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd); 2166 if (error) 2167 goto out; 2168 dentry = lookup_create(&nd, 1); 2169 error = PTR_ERR(dentry); 2170 if (IS_ERR(dentry)) 2171 goto out_unlock; 2172 2173 if (!IS_POSIXACL(nd.path.dentry->d_inode)) 2174 mode &= ~current->fs->umask; 2175 error = mnt_want_write(nd.path.mnt); 2176 if (error) 2177 goto out_dput; 2178 error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode); 2179 mnt_drop_write(nd.path.mnt); 2180 out_dput: 2181 dput(dentry); 2182 out_unlock: 2183 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2184 path_put(&nd.path); 2185 out: 2186 putname(tmp); 2187 out_err: 2188 return error; 2189 } 2190 2191 asmlinkage long sys_mkdir(const char __user *pathname, int mode) 2192 { 2193 return sys_mkdirat(AT_FDCWD, pathname, mode); 2194 } 2195 2196 /* 2197 * We try to drop the dentry early: we should have 2198 * a usage count of 2 if we're the only user of this 2199 * dentry, and if that is true (possibly after pruning 2200 * the dcache), then we drop the dentry now. 2201 * 2202 * A low-level filesystem can, if it choses, legally 2203 * do a 2204 * 2205 * if (!d_unhashed(dentry)) 2206 * return -EBUSY; 2207 * 2208 * if it cannot handle the case of removing a directory 2209 * that is still in use by something else.. 2210 */ 2211 void dentry_unhash(struct dentry *dentry) 2212 { 2213 dget(dentry); 2214 shrink_dcache_parent(dentry); 2215 spin_lock(&dcache_lock); 2216 spin_lock(&dentry->d_lock); 2217 if (atomic_read(&dentry->d_count) == 2) 2218 __d_drop(dentry); 2219 spin_unlock(&dentry->d_lock); 2220 spin_unlock(&dcache_lock); 2221 } 2222 2223 int vfs_rmdir(struct inode *dir, struct dentry *dentry) 2224 { 2225 int error = may_delete(dir, dentry, 1); 2226 2227 if (error) 2228 return error; 2229 2230 if (!dir->i_op || !dir->i_op->rmdir) 2231 return -EPERM; 2232 2233 DQUOT_INIT(dir); 2234 2235 mutex_lock(&dentry->d_inode->i_mutex); 2236 dentry_unhash(dentry); 2237 if (d_mountpoint(dentry)) 2238 error = -EBUSY; 2239 else { 2240 error = security_inode_rmdir(dir, dentry); 2241 if (!error) { 2242 error = dir->i_op->rmdir(dir, dentry); 2243 if (!error) 2244 dentry->d_inode->i_flags |= S_DEAD; 2245 } 2246 } 2247 mutex_unlock(&dentry->d_inode->i_mutex); 2248 if (!error) { 2249 d_delete(dentry); 2250 } 2251 dput(dentry); 2252 2253 return error; 2254 } 2255 2256 static long do_rmdir(int dfd, const char __user *pathname) 2257 { 2258 int error = 0; 2259 char * name; 2260 struct dentry *dentry; 2261 struct nameidata nd; 2262 2263 name = getname(pathname); 2264 if(IS_ERR(name)) 2265 return PTR_ERR(name); 2266 2267 error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd); 2268 if (error) 2269 goto exit; 2270 2271 switch(nd.last_type) { 2272 case LAST_DOTDOT: 2273 error = -ENOTEMPTY; 2274 goto exit1; 2275 case LAST_DOT: 2276 error = -EINVAL; 2277 goto exit1; 2278 case LAST_ROOT: 2279 error = -EBUSY; 2280 goto exit1; 2281 } 2282 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 2283 dentry = lookup_hash(&nd); 2284 error = PTR_ERR(dentry); 2285 if (IS_ERR(dentry)) 2286 goto exit2; 2287 error = mnt_want_write(nd.path.mnt); 2288 if (error) 2289 goto exit3; 2290 error = vfs_rmdir(nd.path.dentry->d_inode, dentry); 2291 mnt_drop_write(nd.path.mnt); 2292 exit3: 2293 dput(dentry); 2294 exit2: 2295 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2296 exit1: 2297 path_put(&nd.path); 2298 exit: 2299 putname(name); 2300 return error; 2301 } 2302 2303 asmlinkage long sys_rmdir(const char __user *pathname) 2304 { 2305 return do_rmdir(AT_FDCWD, pathname); 2306 } 2307 2308 int vfs_unlink(struct inode *dir, struct dentry *dentry) 2309 { 2310 int error = may_delete(dir, dentry, 0); 2311 2312 if (error) 2313 return error; 2314 2315 if (!dir->i_op || !dir->i_op->unlink) 2316 return -EPERM; 2317 2318 DQUOT_INIT(dir); 2319 2320 mutex_lock(&dentry->d_inode->i_mutex); 2321 if (d_mountpoint(dentry)) 2322 error = -EBUSY; 2323 else { 2324 error = security_inode_unlink(dir, dentry); 2325 if (!error) 2326 error = dir->i_op->unlink(dir, dentry); 2327 } 2328 mutex_unlock(&dentry->d_inode->i_mutex); 2329 2330 /* We don't d_delete() NFS sillyrenamed files--they still exist. */ 2331 if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) { 2332 fsnotify_link_count(dentry->d_inode); 2333 d_delete(dentry); 2334 } 2335 2336 return error; 2337 } 2338 2339 /* 2340 * Make sure that the actual truncation of the file will occur outside its 2341 * directory's i_mutex. Truncate can take a long time if there is a lot of 2342 * writeout happening, and we don't want to prevent access to the directory 2343 * while waiting on the I/O. 2344 */ 2345 static long do_unlinkat(int dfd, const char __user *pathname) 2346 { 2347 int error = 0; 2348 char * name; 2349 struct dentry *dentry; 2350 struct nameidata nd; 2351 struct inode *inode = NULL; 2352 2353 name = getname(pathname); 2354 if(IS_ERR(name)) 2355 return PTR_ERR(name); 2356 2357 error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd); 2358 if (error) 2359 goto exit; 2360 error = -EISDIR; 2361 if (nd.last_type != LAST_NORM) 2362 goto exit1; 2363 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 2364 dentry = lookup_hash(&nd); 2365 error = PTR_ERR(dentry); 2366 if (!IS_ERR(dentry)) { 2367 /* Why not before? Because we want correct error value */ 2368 if (nd.last.name[nd.last.len]) 2369 goto slashes; 2370 inode = dentry->d_inode; 2371 if (inode) 2372 atomic_inc(&inode->i_count); 2373 error = mnt_want_write(nd.path.mnt); 2374 if (error) 2375 goto exit2; 2376 error = vfs_unlink(nd.path.dentry->d_inode, dentry); 2377 mnt_drop_write(nd.path.mnt); 2378 exit2: 2379 dput(dentry); 2380 } 2381 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2382 if (inode) 2383 iput(inode); /* truncate the inode here */ 2384 exit1: 2385 path_put(&nd.path); 2386 exit: 2387 putname(name); 2388 return error; 2389 2390 slashes: 2391 error = !dentry->d_inode ? -ENOENT : 2392 S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR; 2393 goto exit2; 2394 } 2395 2396 asmlinkage long sys_unlinkat(int dfd, const char __user *pathname, int flag) 2397 { 2398 if ((flag & ~AT_REMOVEDIR) != 0) 2399 return -EINVAL; 2400 2401 if (flag & AT_REMOVEDIR) 2402 return do_rmdir(dfd, pathname); 2403 2404 return do_unlinkat(dfd, pathname); 2405 } 2406 2407 asmlinkage long sys_unlink(const char __user *pathname) 2408 { 2409 return do_unlinkat(AT_FDCWD, pathname); 2410 } 2411 2412 int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode) 2413 { 2414 int error = may_create(dir, dentry, NULL); 2415 2416 if (error) 2417 return error; 2418 2419 if (!dir->i_op || !dir->i_op->symlink) 2420 return -EPERM; 2421 2422 error = security_inode_symlink(dir, dentry, oldname); 2423 if (error) 2424 return error; 2425 2426 DQUOT_INIT(dir); 2427 error = dir->i_op->symlink(dir, dentry, oldname); 2428 if (!error) 2429 fsnotify_create(dir, dentry); 2430 return error; 2431 } 2432 2433 asmlinkage long sys_symlinkat(const char __user *oldname, 2434 int newdfd, const char __user *newname) 2435 { 2436 int error = 0; 2437 char * from; 2438 char * to; 2439 struct dentry *dentry; 2440 struct nameidata nd; 2441 2442 from = getname(oldname); 2443 if(IS_ERR(from)) 2444 return PTR_ERR(from); 2445 to = getname(newname); 2446 error = PTR_ERR(to); 2447 if (IS_ERR(to)) 2448 goto out_putname; 2449 2450 error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd); 2451 if (error) 2452 goto out; 2453 dentry = lookup_create(&nd, 0); 2454 error = PTR_ERR(dentry); 2455 if (IS_ERR(dentry)) 2456 goto out_unlock; 2457 2458 error = mnt_want_write(nd.path.mnt); 2459 if (error) 2460 goto out_dput; 2461 error = vfs_symlink(nd.path.dentry->d_inode, dentry, from, S_IALLUGO); 2462 mnt_drop_write(nd.path.mnt); 2463 out_dput: 2464 dput(dentry); 2465 out_unlock: 2466 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2467 path_put(&nd.path); 2468 out: 2469 putname(to); 2470 out_putname: 2471 putname(from); 2472 return error; 2473 } 2474 2475 asmlinkage long sys_symlink(const char __user *oldname, const char __user *newname) 2476 { 2477 return sys_symlinkat(oldname, AT_FDCWD, newname); 2478 } 2479 2480 int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) 2481 { 2482 struct inode *inode = old_dentry->d_inode; 2483 int error; 2484 2485 if (!inode) 2486 return -ENOENT; 2487 2488 error = may_create(dir, new_dentry, NULL); 2489 if (error) 2490 return error; 2491 2492 if (dir->i_sb != inode->i_sb) 2493 return -EXDEV; 2494 2495 /* 2496 * A link to an append-only or immutable file cannot be created. 2497 */ 2498 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 2499 return -EPERM; 2500 if (!dir->i_op || !dir->i_op->link) 2501 return -EPERM; 2502 if (S_ISDIR(old_dentry->d_inode->i_mode)) 2503 return -EPERM; 2504 2505 error = security_inode_link(old_dentry, dir, new_dentry); 2506 if (error) 2507 return error; 2508 2509 mutex_lock(&old_dentry->d_inode->i_mutex); 2510 DQUOT_INIT(dir); 2511 error = dir->i_op->link(old_dentry, dir, new_dentry); 2512 mutex_unlock(&old_dentry->d_inode->i_mutex); 2513 if (!error) 2514 fsnotify_link(dir, old_dentry->d_inode, new_dentry); 2515 return error; 2516 } 2517 2518 /* 2519 * Hardlinks are often used in delicate situations. We avoid 2520 * security-related surprises by not following symlinks on the 2521 * newname. --KAB 2522 * 2523 * We don't follow them on the oldname either to be compatible 2524 * with linux 2.0, and to avoid hard-linking to directories 2525 * and other special files. --ADM 2526 */ 2527 asmlinkage long sys_linkat(int olddfd, const char __user *oldname, 2528 int newdfd, const char __user *newname, 2529 int flags) 2530 { 2531 struct dentry *new_dentry; 2532 struct nameidata nd, old_nd; 2533 int error; 2534 char * to; 2535 2536 if ((flags & ~AT_SYMLINK_FOLLOW) != 0) 2537 return -EINVAL; 2538 2539 to = getname(newname); 2540 if (IS_ERR(to)) 2541 return PTR_ERR(to); 2542 2543 error = __user_walk_fd(olddfd, oldname, 2544 flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0, 2545 &old_nd); 2546 if (error) 2547 goto exit; 2548 error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd); 2549 if (error) 2550 goto out; 2551 error = -EXDEV; 2552 if (old_nd.path.mnt != nd.path.mnt) 2553 goto out_release; 2554 new_dentry = lookup_create(&nd, 0); 2555 error = PTR_ERR(new_dentry); 2556 if (IS_ERR(new_dentry)) 2557 goto out_unlock; 2558 error = mnt_want_write(nd.path.mnt); 2559 if (error) 2560 goto out_dput; 2561 error = vfs_link(old_nd.path.dentry, nd.path.dentry->d_inode, new_dentry); 2562 mnt_drop_write(nd.path.mnt); 2563 out_dput: 2564 dput(new_dentry); 2565 out_unlock: 2566 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2567 out_release: 2568 path_put(&nd.path); 2569 out: 2570 path_put(&old_nd.path); 2571 exit: 2572 putname(to); 2573 2574 return error; 2575 } 2576 2577 asmlinkage long sys_link(const char __user *oldname, const char __user *newname) 2578 { 2579 return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0); 2580 } 2581 2582 /* 2583 * The worst of all namespace operations - renaming directory. "Perverted" 2584 * doesn't even start to describe it. Somebody in UCB had a heck of a trip... 2585 * Problems: 2586 * a) we can get into loop creation. Check is done in is_subdir(). 2587 * b) race potential - two innocent renames can create a loop together. 2588 * That's where 4.4 screws up. Current fix: serialization on 2589 * sb->s_vfs_rename_mutex. We might be more accurate, but that's another 2590 * story. 2591 * c) we have to lock _three_ objects - parents and victim (if it exists). 2592 * And that - after we got ->i_mutex on parents (until then we don't know 2593 * whether the target exists). Solution: try to be smart with locking 2594 * order for inodes. We rely on the fact that tree topology may change 2595 * only under ->s_vfs_rename_mutex _and_ that parent of the object we 2596 * move will be locked. Thus we can rank directories by the tree 2597 * (ancestors first) and rank all non-directories after them. 2598 * That works since everybody except rename does "lock parent, lookup, 2599 * lock child" and rename is under ->s_vfs_rename_mutex. 2600 * HOWEVER, it relies on the assumption that any object with ->lookup() 2601 * has no more than 1 dentry. If "hybrid" objects will ever appear, 2602 * we'd better make sure that there's no link(2) for them. 2603 * d) some filesystems don't support opened-but-unlinked directories, 2604 * either because of layout or because they are not ready to deal with 2605 * all cases correctly. The latter will be fixed (taking this sort of 2606 * stuff into VFS), but the former is not going away. Solution: the same 2607 * trick as in rmdir(). 2608 * e) conversion from fhandle to dentry may come in the wrong moment - when 2609 * we are removing the target. Solution: we will have to grab ->i_mutex 2610 * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on 2611 * ->i_mutex on parents, which works but leads to some truely excessive 2612 * locking]. 2613 */ 2614 static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, 2615 struct inode *new_dir, struct dentry *new_dentry) 2616 { 2617 int error = 0; 2618 struct inode *target; 2619 2620 /* 2621 * If we are going to change the parent - check write permissions, 2622 * we'll need to flip '..'. 2623 */ 2624 if (new_dir != old_dir) { 2625 error = permission(old_dentry->d_inode, MAY_WRITE, NULL); 2626 if (error) 2627 return error; 2628 } 2629 2630 error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); 2631 if (error) 2632 return error; 2633 2634 target = new_dentry->d_inode; 2635 if (target) { 2636 mutex_lock(&target->i_mutex); 2637 dentry_unhash(new_dentry); 2638 } 2639 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) 2640 error = -EBUSY; 2641 else 2642 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2643 if (target) { 2644 if (!error) 2645 target->i_flags |= S_DEAD; 2646 mutex_unlock(&target->i_mutex); 2647 if (d_unhashed(new_dentry)) 2648 d_rehash(new_dentry); 2649 dput(new_dentry); 2650 } 2651 if (!error) 2652 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) 2653 d_move(old_dentry,new_dentry); 2654 return error; 2655 } 2656 2657 static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, 2658 struct inode *new_dir, struct dentry *new_dentry) 2659 { 2660 struct inode *target; 2661 int error; 2662 2663 error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); 2664 if (error) 2665 return error; 2666 2667 dget(new_dentry); 2668 target = new_dentry->d_inode; 2669 if (target) 2670 mutex_lock(&target->i_mutex); 2671 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) 2672 error = -EBUSY; 2673 else 2674 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2675 if (!error) { 2676 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) 2677 d_move(old_dentry, new_dentry); 2678 } 2679 if (target) 2680 mutex_unlock(&target->i_mutex); 2681 dput(new_dentry); 2682 return error; 2683 } 2684 2685 int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, 2686 struct inode *new_dir, struct dentry *new_dentry) 2687 { 2688 int error; 2689 int is_dir = S_ISDIR(old_dentry->d_inode->i_mode); 2690 const char *old_name; 2691 2692 if (old_dentry->d_inode == new_dentry->d_inode) 2693 return 0; 2694 2695 error = may_delete(old_dir, old_dentry, is_dir); 2696 if (error) 2697 return error; 2698 2699 if (!new_dentry->d_inode) 2700 error = may_create(new_dir, new_dentry, NULL); 2701 else 2702 error = may_delete(new_dir, new_dentry, is_dir); 2703 if (error) 2704 return error; 2705 2706 if (!old_dir->i_op || !old_dir->i_op->rename) 2707 return -EPERM; 2708 2709 DQUOT_INIT(old_dir); 2710 DQUOT_INIT(new_dir); 2711 2712 old_name = fsnotify_oldname_init(old_dentry->d_name.name); 2713 2714 if (is_dir) 2715 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); 2716 else 2717 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); 2718 if (!error) { 2719 const char *new_name = old_dentry->d_name.name; 2720 fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir, 2721 new_dentry->d_inode, old_dentry); 2722 } 2723 fsnotify_oldname_free(old_name); 2724 2725 return error; 2726 } 2727 2728 static int do_rename(int olddfd, const char *oldname, 2729 int newdfd, const char *newname) 2730 { 2731 int error = 0; 2732 struct dentry * old_dir, * new_dir; 2733 struct dentry * old_dentry, *new_dentry; 2734 struct dentry * trap; 2735 struct nameidata oldnd, newnd; 2736 2737 error = do_path_lookup(olddfd, oldname, LOOKUP_PARENT, &oldnd); 2738 if (error) 2739 goto exit; 2740 2741 error = do_path_lookup(newdfd, newname, LOOKUP_PARENT, &newnd); 2742 if (error) 2743 goto exit1; 2744 2745 error = -EXDEV; 2746 if (oldnd.path.mnt != newnd.path.mnt) 2747 goto exit2; 2748 2749 old_dir = oldnd.path.dentry; 2750 error = -EBUSY; 2751 if (oldnd.last_type != LAST_NORM) 2752 goto exit2; 2753 2754 new_dir = newnd.path.dentry; 2755 if (newnd.last_type != LAST_NORM) 2756 goto exit2; 2757 2758 trap = lock_rename(new_dir, old_dir); 2759 2760 old_dentry = lookup_hash(&oldnd); 2761 error = PTR_ERR(old_dentry); 2762 if (IS_ERR(old_dentry)) 2763 goto exit3; 2764 /* source must exist */ 2765 error = -ENOENT; 2766 if (!old_dentry->d_inode) 2767 goto exit4; 2768 /* unless the source is a directory trailing slashes give -ENOTDIR */ 2769 if (!S_ISDIR(old_dentry->d_inode->i_mode)) { 2770 error = -ENOTDIR; 2771 if (oldnd.last.name[oldnd.last.len]) 2772 goto exit4; 2773 if (newnd.last.name[newnd.last.len]) 2774 goto exit4; 2775 } 2776 /* source should not be ancestor of target */ 2777 error = -EINVAL; 2778 if (old_dentry == trap) 2779 goto exit4; 2780 new_dentry = lookup_hash(&newnd); 2781 error = PTR_ERR(new_dentry); 2782 if (IS_ERR(new_dentry)) 2783 goto exit4; 2784 /* target should not be an ancestor of source */ 2785 error = -ENOTEMPTY; 2786 if (new_dentry == trap) 2787 goto exit5; 2788 2789 error = mnt_want_write(oldnd.path.mnt); 2790 if (error) 2791 goto exit5; 2792 error = vfs_rename(old_dir->d_inode, old_dentry, 2793 new_dir->d_inode, new_dentry); 2794 mnt_drop_write(oldnd.path.mnt); 2795 exit5: 2796 dput(new_dentry); 2797 exit4: 2798 dput(old_dentry); 2799 exit3: 2800 unlock_rename(new_dir, old_dir); 2801 exit2: 2802 path_put(&newnd.path); 2803 exit1: 2804 path_put(&oldnd.path); 2805 exit: 2806 return error; 2807 } 2808 2809 asmlinkage long sys_renameat(int olddfd, const char __user *oldname, 2810 int newdfd, const char __user *newname) 2811 { 2812 int error; 2813 char * from; 2814 char * to; 2815 2816 from = getname(oldname); 2817 if(IS_ERR(from)) 2818 return PTR_ERR(from); 2819 to = getname(newname); 2820 error = PTR_ERR(to); 2821 if (!IS_ERR(to)) { 2822 error = do_rename(olddfd, from, newdfd, to); 2823 putname(to); 2824 } 2825 putname(from); 2826 return error; 2827 } 2828 2829 asmlinkage long sys_rename(const char __user *oldname, const char __user *newname) 2830 { 2831 return sys_renameat(AT_FDCWD, oldname, AT_FDCWD, newname); 2832 } 2833 2834 int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link) 2835 { 2836 int len; 2837 2838 len = PTR_ERR(link); 2839 if (IS_ERR(link)) 2840 goto out; 2841 2842 len = strlen(link); 2843 if (len > (unsigned) buflen) 2844 len = buflen; 2845 if (copy_to_user(buffer, link, len)) 2846 len = -EFAULT; 2847 out: 2848 return len; 2849 } 2850 2851 /* 2852 * A helper for ->readlink(). This should be used *ONLY* for symlinks that 2853 * have ->follow_link() touching nd only in nd_set_link(). Using (or not 2854 * using) it for any given inode is up to filesystem. 2855 */ 2856 int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) 2857 { 2858 struct nameidata nd; 2859 void *cookie; 2860 2861 nd.depth = 0; 2862 cookie = dentry->d_inode->i_op->follow_link(dentry, &nd); 2863 if (!IS_ERR(cookie)) { 2864 int res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); 2865 if (dentry->d_inode->i_op->put_link) 2866 dentry->d_inode->i_op->put_link(dentry, &nd, cookie); 2867 cookie = ERR_PTR(res); 2868 } 2869 return PTR_ERR(cookie); 2870 } 2871 2872 int vfs_follow_link(struct nameidata *nd, const char *link) 2873 { 2874 return __vfs_follow_link(nd, link); 2875 } 2876 2877 /* get the link contents into pagecache */ 2878 static char *page_getlink(struct dentry * dentry, struct page **ppage) 2879 { 2880 struct page * page; 2881 struct address_space *mapping = dentry->d_inode->i_mapping; 2882 page = read_mapping_page(mapping, 0, NULL); 2883 if (IS_ERR(page)) 2884 return (char*)page; 2885 *ppage = page; 2886 return kmap(page); 2887 } 2888 2889 int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) 2890 { 2891 struct page *page = NULL; 2892 char *s = page_getlink(dentry, &page); 2893 int res = vfs_readlink(dentry,buffer,buflen,s); 2894 if (page) { 2895 kunmap(page); 2896 page_cache_release(page); 2897 } 2898 return res; 2899 } 2900 2901 void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd) 2902 { 2903 struct page *page = NULL; 2904 nd_set_link(nd, page_getlink(dentry, &page)); 2905 return page; 2906 } 2907 2908 void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) 2909 { 2910 struct page *page = cookie; 2911 2912 if (page) { 2913 kunmap(page); 2914 page_cache_release(page); 2915 } 2916 } 2917 2918 int __page_symlink(struct inode *inode, const char *symname, int len, 2919 gfp_t gfp_mask) 2920 { 2921 struct address_space *mapping = inode->i_mapping; 2922 struct page *page; 2923 void *fsdata; 2924 int err; 2925 char *kaddr; 2926 2927 retry: 2928 err = pagecache_write_begin(NULL, mapping, 0, len-1, 2929 AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); 2930 if (err) 2931 goto fail; 2932 2933 kaddr = kmap_atomic(page, KM_USER0); 2934 memcpy(kaddr, symname, len-1); 2935 kunmap_atomic(kaddr, KM_USER0); 2936 2937 err = pagecache_write_end(NULL, mapping, 0, len-1, len-1, 2938 page, fsdata); 2939 if (err < 0) 2940 goto fail; 2941 if (err < len-1) 2942 goto retry; 2943 2944 mark_inode_dirty(inode); 2945 return 0; 2946 fail: 2947 return err; 2948 } 2949 2950 int page_symlink(struct inode *inode, const char *symname, int len) 2951 { 2952 return __page_symlink(inode, symname, len, 2953 mapping_gfp_mask(inode->i_mapping)); 2954 } 2955 2956 const struct inode_operations page_symlink_inode_operations = { 2957 .readlink = generic_readlink, 2958 .follow_link = page_follow_link_light, 2959 .put_link = page_put_link, 2960 }; 2961 2962 EXPORT_SYMBOL(__user_walk); 2963 EXPORT_SYMBOL(__user_walk_fd); 2964 EXPORT_SYMBOL(follow_down); 2965 EXPORT_SYMBOL(follow_up); 2966 EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ 2967 EXPORT_SYMBOL(getname); 2968 EXPORT_SYMBOL(lock_rename); 2969 EXPORT_SYMBOL(lookup_one_len); 2970 EXPORT_SYMBOL(page_follow_link_light); 2971 EXPORT_SYMBOL(page_put_link); 2972 EXPORT_SYMBOL(page_readlink); 2973 EXPORT_SYMBOL(__page_symlink); 2974 EXPORT_SYMBOL(page_symlink); 2975 EXPORT_SYMBOL(page_symlink_inode_operations); 2976 EXPORT_SYMBOL(path_lookup); 2977 EXPORT_SYMBOL(vfs_path_lookup); 2978 EXPORT_SYMBOL(permission); 2979 EXPORT_SYMBOL(vfs_permission); 2980 EXPORT_SYMBOL(file_permission); 2981 EXPORT_SYMBOL(unlock_rename); 2982 EXPORT_SYMBOL(vfs_create); 2983 EXPORT_SYMBOL(vfs_follow_link); 2984 EXPORT_SYMBOL(vfs_link); 2985 EXPORT_SYMBOL(vfs_mkdir); 2986 EXPORT_SYMBOL(vfs_mknod); 2987 EXPORT_SYMBOL(generic_permission); 2988 EXPORT_SYMBOL(vfs_readlink); 2989 EXPORT_SYMBOL(vfs_rename); 2990 EXPORT_SYMBOL(vfs_rmdir); 2991 EXPORT_SYMBOL(vfs_symlink); 2992 EXPORT_SYMBOL(vfs_unlink); 2993 EXPORT_SYMBOL(dentry_unhash); 2994 EXPORT_SYMBOL(generic_readlink); 2995