1 /* 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #include "xfs.h" 19 #include "xfs_fs.h" 20 #include "xfs_acl.h" 21 #include "xfs_bit.h" 22 #include "xfs_log.h" 23 #include "xfs_inum.h" 24 #include "xfs_trans.h" 25 #include "xfs_sb.h" 26 #include "xfs_ag.h" 27 #include "xfs_alloc.h" 28 #include "xfs_quota.h" 29 #include "xfs_mount.h" 30 #include "xfs_bmap_btree.h" 31 #include "xfs_dinode.h" 32 #include "xfs_inode.h" 33 #include "xfs_bmap.h" 34 #include "xfs_rtalloc.h" 35 #include "xfs_error.h" 36 #include "xfs_itable.h" 37 #include "xfs_rw.h" 38 #include "xfs_attr.h" 39 #include "xfs_buf_item.h" 40 #include "xfs_utils.h" 41 #include "xfs_vnodeops.h" 42 #include "xfs_inode_item.h" 43 #include "xfs_trace.h" 44 45 #include <linux/capability.h> 46 #include <linux/xattr.h> 47 #include <linux/namei.h> 48 #include <linux/posix_acl.h> 49 #include <linux/security.h> 50 #include <linux/fiemap.h> 51 #include <linux/slab.h> 52 53 static int 54 xfs_initxattrs( 55 struct inode *inode, 56 const struct xattr *xattr_array, 57 void *fs_info) 58 { 59 const struct xattr *xattr; 60 struct xfs_inode *ip = XFS_I(inode); 61 int error = 0; 62 63 for (xattr = xattr_array; xattr->name != NULL; xattr++) { 64 error = xfs_attr_set(ip, xattr->name, xattr->value, 65 xattr->value_len, ATTR_SECURE); 66 if (error < 0) 67 break; 68 } 69 return error; 70 } 71 72 /* 73 * Hook in SELinux. This is not quite correct yet, what we really need 74 * here (as we do for default ACLs) is a mechanism by which creation of 75 * these attrs can be journalled at inode creation time (along with the 76 * inode, of course, such that log replay can't cause these to be lost). 77 */ 78 79 STATIC int 80 xfs_init_security( 81 struct inode *inode, 82 struct inode *dir, 83 const struct qstr *qstr) 84 { 85 return security_inode_init_security(inode, dir, qstr, 86 &xfs_initxattrs, NULL); 87 } 88 89 static void 90 xfs_dentry_to_name( 91 struct xfs_name *namep, 92 struct dentry *dentry) 93 { 94 namep->name = dentry->d_name.name; 95 namep->len = dentry->d_name.len; 96 } 97 98 STATIC void 99 xfs_cleanup_inode( 100 struct inode *dir, 101 struct inode *inode, 102 struct dentry *dentry) 103 { 104 struct xfs_name teardown; 105 106 /* Oh, the horror. 107 * If we can't add the ACL or we fail in 108 * xfs_init_security we must back out. 109 * ENOSPC can hit here, among other things. 110 */ 111 xfs_dentry_to_name(&teardown, dentry); 112 113 xfs_remove(XFS_I(dir), &teardown, XFS_I(inode)); 114 iput(inode); 115 } 116 117 STATIC int 118 xfs_vn_mknod( 119 struct inode *dir, 120 struct dentry *dentry, 121 umode_t mode, 122 dev_t rdev) 123 { 124 struct inode *inode; 125 struct xfs_inode *ip = NULL; 126 struct posix_acl *default_acl = NULL; 127 struct xfs_name name; 128 int error; 129 130 /* 131 * Irix uses Missed'em'V split, but doesn't want to see 132 * the upper 5 bits of (14bit) major. 133 */ 134 if (S_ISCHR(mode) || S_ISBLK(mode)) { 135 if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)) 136 return -EINVAL; 137 rdev = sysv_encode_dev(rdev); 138 } else { 139 rdev = 0; 140 } 141 142 if (IS_POSIXACL(dir)) { 143 default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT); 144 if (IS_ERR(default_acl)) 145 return PTR_ERR(default_acl); 146 147 if (!default_acl) 148 mode &= ~current_umask(); 149 } 150 151 xfs_dentry_to_name(&name, dentry); 152 error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); 153 if (unlikely(error)) 154 goto out_free_acl; 155 156 inode = VFS_I(ip); 157 158 error = xfs_init_security(inode, dir, &dentry->d_name); 159 if (unlikely(error)) 160 goto out_cleanup_inode; 161 162 if (default_acl) { 163 error = -xfs_inherit_acl(inode, default_acl); 164 default_acl = NULL; 165 if (unlikely(error)) 166 goto out_cleanup_inode; 167 } 168 169 170 d_instantiate(dentry, inode); 171 return -error; 172 173 out_cleanup_inode: 174 xfs_cleanup_inode(dir, inode, dentry); 175 out_free_acl: 176 posix_acl_release(default_acl); 177 return -error; 178 } 179 180 STATIC int 181 xfs_vn_create( 182 struct inode *dir, 183 struct dentry *dentry, 184 umode_t mode, 185 struct nameidata *nd) 186 { 187 return xfs_vn_mknod(dir, dentry, mode, 0); 188 } 189 190 STATIC int 191 xfs_vn_mkdir( 192 struct inode *dir, 193 struct dentry *dentry, 194 umode_t mode) 195 { 196 return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0); 197 } 198 199 STATIC struct dentry * 200 xfs_vn_lookup( 201 struct inode *dir, 202 struct dentry *dentry, 203 struct nameidata *nd) 204 { 205 struct xfs_inode *cip; 206 struct xfs_name name; 207 int error; 208 209 if (dentry->d_name.len >= MAXNAMELEN) 210 return ERR_PTR(-ENAMETOOLONG); 211 212 xfs_dentry_to_name(&name, dentry); 213 error = xfs_lookup(XFS_I(dir), &name, &cip, NULL); 214 if (unlikely(error)) { 215 if (unlikely(error != ENOENT)) 216 return ERR_PTR(-error); 217 d_add(dentry, NULL); 218 return NULL; 219 } 220 221 return d_splice_alias(VFS_I(cip), dentry); 222 } 223 224 STATIC struct dentry * 225 xfs_vn_ci_lookup( 226 struct inode *dir, 227 struct dentry *dentry, 228 struct nameidata *nd) 229 { 230 struct xfs_inode *ip; 231 struct xfs_name xname; 232 struct xfs_name ci_name; 233 struct qstr dname; 234 int error; 235 236 if (dentry->d_name.len >= MAXNAMELEN) 237 return ERR_PTR(-ENAMETOOLONG); 238 239 xfs_dentry_to_name(&xname, dentry); 240 error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name); 241 if (unlikely(error)) { 242 if (unlikely(error != ENOENT)) 243 return ERR_PTR(-error); 244 /* 245 * call d_add(dentry, NULL) here when d_drop_negative_children 246 * is called in xfs_vn_mknod (ie. allow negative dentries 247 * with CI filesystems). 248 */ 249 return NULL; 250 } 251 252 /* if exact match, just splice and exit */ 253 if (!ci_name.name) 254 return d_splice_alias(VFS_I(ip), dentry); 255 256 /* else case-insensitive match... */ 257 dname.name = ci_name.name; 258 dname.len = ci_name.len; 259 dentry = d_add_ci(dentry, VFS_I(ip), &dname); 260 kmem_free(ci_name.name); 261 return dentry; 262 } 263 264 STATIC int 265 xfs_vn_link( 266 struct dentry *old_dentry, 267 struct inode *dir, 268 struct dentry *dentry) 269 { 270 struct inode *inode = old_dentry->d_inode; 271 struct xfs_name name; 272 int error; 273 274 xfs_dentry_to_name(&name, dentry); 275 276 error = xfs_link(XFS_I(dir), XFS_I(inode), &name); 277 if (unlikely(error)) 278 return -error; 279 280 ihold(inode); 281 d_instantiate(dentry, inode); 282 return 0; 283 } 284 285 STATIC int 286 xfs_vn_unlink( 287 struct inode *dir, 288 struct dentry *dentry) 289 { 290 struct xfs_name name; 291 int error; 292 293 xfs_dentry_to_name(&name, dentry); 294 295 error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode)); 296 if (error) 297 return error; 298 299 /* 300 * With unlink, the VFS makes the dentry "negative": no inode, 301 * but still hashed. This is incompatible with case-insensitive 302 * mode, so invalidate (unhash) the dentry in CI-mode. 303 */ 304 if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb)) 305 d_invalidate(dentry); 306 return 0; 307 } 308 309 STATIC int 310 xfs_vn_symlink( 311 struct inode *dir, 312 struct dentry *dentry, 313 const char *symname) 314 { 315 struct inode *inode; 316 struct xfs_inode *cip = NULL; 317 struct xfs_name name; 318 int error; 319 umode_t mode; 320 321 mode = S_IFLNK | 322 (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO); 323 xfs_dentry_to_name(&name, dentry); 324 325 error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip); 326 if (unlikely(error)) 327 goto out; 328 329 inode = VFS_I(cip); 330 331 error = xfs_init_security(inode, dir, &dentry->d_name); 332 if (unlikely(error)) 333 goto out_cleanup_inode; 334 335 d_instantiate(dentry, inode); 336 return 0; 337 338 out_cleanup_inode: 339 xfs_cleanup_inode(dir, inode, dentry); 340 out: 341 return -error; 342 } 343 344 STATIC int 345 xfs_vn_rename( 346 struct inode *odir, 347 struct dentry *odentry, 348 struct inode *ndir, 349 struct dentry *ndentry) 350 { 351 struct inode *new_inode = ndentry->d_inode; 352 struct xfs_name oname; 353 struct xfs_name nname; 354 355 xfs_dentry_to_name(&oname, odentry); 356 xfs_dentry_to_name(&nname, ndentry); 357 358 return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), 359 XFS_I(ndir), &nname, new_inode ? 360 XFS_I(new_inode) : NULL); 361 } 362 363 /* 364 * careful here - this function can get called recursively, so 365 * we need to be very careful about how much stack we use. 366 * uio is kmalloced for this reason... 367 */ 368 STATIC void * 369 xfs_vn_follow_link( 370 struct dentry *dentry, 371 struct nameidata *nd) 372 { 373 char *link; 374 int error = -ENOMEM; 375 376 link = kmalloc(MAXPATHLEN+1, GFP_KERNEL); 377 if (!link) 378 goto out_err; 379 380 error = -xfs_readlink(XFS_I(dentry->d_inode), link); 381 if (unlikely(error)) 382 goto out_kfree; 383 384 nd_set_link(nd, link); 385 return NULL; 386 387 out_kfree: 388 kfree(link); 389 out_err: 390 nd_set_link(nd, ERR_PTR(error)); 391 return NULL; 392 } 393 394 STATIC void 395 xfs_vn_put_link( 396 struct dentry *dentry, 397 struct nameidata *nd, 398 void *p) 399 { 400 char *s = nd_get_link(nd); 401 402 if (!IS_ERR(s)) 403 kfree(s); 404 } 405 406 STATIC int 407 xfs_vn_getattr( 408 struct vfsmount *mnt, 409 struct dentry *dentry, 410 struct kstat *stat) 411 { 412 struct inode *inode = dentry->d_inode; 413 struct xfs_inode *ip = XFS_I(inode); 414 struct xfs_mount *mp = ip->i_mount; 415 416 trace_xfs_getattr(ip); 417 418 if (XFS_FORCED_SHUTDOWN(mp)) 419 return -XFS_ERROR(EIO); 420 421 stat->size = XFS_ISIZE(ip); 422 stat->dev = inode->i_sb->s_dev; 423 stat->mode = ip->i_d.di_mode; 424 stat->nlink = ip->i_d.di_nlink; 425 stat->uid = ip->i_d.di_uid; 426 stat->gid = ip->i_d.di_gid; 427 stat->ino = ip->i_ino; 428 stat->atime = inode->i_atime; 429 stat->mtime = inode->i_mtime; 430 stat->ctime = inode->i_ctime; 431 stat->blocks = 432 XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); 433 434 435 switch (inode->i_mode & S_IFMT) { 436 case S_IFBLK: 437 case S_IFCHR: 438 stat->blksize = BLKDEV_IOSIZE; 439 stat->rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, 440 sysv_minor(ip->i_df.if_u2.if_rdev)); 441 break; 442 default: 443 if (XFS_IS_REALTIME_INODE(ip)) { 444 /* 445 * If the file blocks are being allocated from a 446 * realtime volume, then return the inode's realtime 447 * extent size or the realtime volume's extent size. 448 */ 449 stat->blksize = 450 xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog; 451 } else 452 stat->blksize = xfs_preferred_iosize(mp); 453 stat->rdev = 0; 454 break; 455 } 456 457 return 0; 458 } 459 460 int 461 xfs_setattr_nonsize( 462 struct xfs_inode *ip, 463 struct iattr *iattr, 464 int flags) 465 { 466 xfs_mount_t *mp = ip->i_mount; 467 struct inode *inode = VFS_I(ip); 468 int mask = iattr->ia_valid; 469 xfs_trans_t *tp; 470 int error; 471 uid_t uid = 0, iuid = 0; 472 gid_t gid = 0, igid = 0; 473 struct xfs_dquot *udqp = NULL, *gdqp = NULL; 474 struct xfs_dquot *olddquot1 = NULL, *olddquot2 = NULL; 475 476 trace_xfs_setattr(ip); 477 478 if (mp->m_flags & XFS_MOUNT_RDONLY) 479 return XFS_ERROR(EROFS); 480 481 if (XFS_FORCED_SHUTDOWN(mp)) 482 return XFS_ERROR(EIO); 483 484 error = -inode_change_ok(inode, iattr); 485 if (error) 486 return XFS_ERROR(error); 487 488 ASSERT((mask & ATTR_SIZE) == 0); 489 490 /* 491 * If disk quotas is on, we make sure that the dquots do exist on disk, 492 * before we start any other transactions. Trying to do this later 493 * is messy. We don't care to take a readlock to look at the ids 494 * in inode here, because we can't hold it across the trans_reserve. 495 * If the IDs do change before we take the ilock, we're covered 496 * because the i_*dquot fields will get updated anyway. 497 */ 498 if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) { 499 uint qflags = 0; 500 501 if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) { 502 uid = iattr->ia_uid; 503 qflags |= XFS_QMOPT_UQUOTA; 504 } else { 505 uid = ip->i_d.di_uid; 506 } 507 if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) { 508 gid = iattr->ia_gid; 509 qflags |= XFS_QMOPT_GQUOTA; 510 } else { 511 gid = ip->i_d.di_gid; 512 } 513 514 /* 515 * We take a reference when we initialize udqp and gdqp, 516 * so it is important that we never blindly double trip on 517 * the same variable. See xfs_create() for an example. 518 */ 519 ASSERT(udqp == NULL); 520 ASSERT(gdqp == NULL); 521 error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip), 522 qflags, &udqp, &gdqp); 523 if (error) 524 return error; 525 } 526 527 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); 528 error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); 529 if (error) 530 goto out_dqrele; 531 532 xfs_ilock(ip, XFS_ILOCK_EXCL); 533 534 /* 535 * Change file ownership. Must be the owner or privileged. 536 */ 537 if (mask & (ATTR_UID|ATTR_GID)) { 538 /* 539 * These IDs could have changed since we last looked at them. 540 * But, we're assured that if the ownership did change 541 * while we didn't have the inode locked, inode's dquot(s) 542 * would have changed also. 543 */ 544 iuid = ip->i_d.di_uid; 545 igid = ip->i_d.di_gid; 546 gid = (mask & ATTR_GID) ? iattr->ia_gid : igid; 547 uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid; 548 549 /* 550 * Do a quota reservation only if uid/gid is actually 551 * going to change. 552 */ 553 if (XFS_IS_QUOTA_RUNNING(mp) && 554 ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || 555 (XFS_IS_GQUOTA_ON(mp) && igid != gid))) { 556 ASSERT(tp); 557 error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, 558 capable(CAP_FOWNER) ? 559 XFS_QMOPT_FORCE_RES : 0); 560 if (error) /* out of quota */ 561 goto out_trans_cancel; 562 } 563 } 564 565 xfs_trans_ijoin(tp, ip, 0); 566 567 /* 568 * Change file ownership. Must be the owner or privileged. 569 */ 570 if (mask & (ATTR_UID|ATTR_GID)) { 571 /* 572 * CAP_FSETID overrides the following restrictions: 573 * 574 * The set-user-ID and set-group-ID bits of a file will be 575 * cleared upon successful return from chown() 576 */ 577 if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && 578 !capable(CAP_FSETID)) 579 ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); 580 581 /* 582 * Change the ownerships and register quota modifications 583 * in the transaction. 584 */ 585 if (iuid != uid) { 586 if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) { 587 ASSERT(mask & ATTR_UID); 588 ASSERT(udqp); 589 olddquot1 = xfs_qm_vop_chown(tp, ip, 590 &ip->i_udquot, udqp); 591 } 592 ip->i_d.di_uid = uid; 593 inode->i_uid = uid; 594 } 595 if (igid != gid) { 596 if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { 597 ASSERT(!XFS_IS_PQUOTA_ON(mp)); 598 ASSERT(mask & ATTR_GID); 599 ASSERT(gdqp); 600 olddquot2 = xfs_qm_vop_chown(tp, ip, 601 &ip->i_gdquot, gdqp); 602 } 603 ip->i_d.di_gid = gid; 604 inode->i_gid = gid; 605 } 606 } 607 608 /* 609 * Change file access modes. 610 */ 611 if (mask & ATTR_MODE) { 612 umode_t mode = iattr->ia_mode; 613 614 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) 615 mode &= ~S_ISGID; 616 617 ip->i_d.di_mode &= S_IFMT; 618 ip->i_d.di_mode |= mode & ~S_IFMT; 619 620 inode->i_mode &= S_IFMT; 621 inode->i_mode |= mode & ~S_IFMT; 622 } 623 624 /* 625 * Change file access or modified times. 626 */ 627 if (mask & ATTR_ATIME) { 628 inode->i_atime = iattr->ia_atime; 629 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; 630 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; 631 } 632 if (mask & ATTR_CTIME) { 633 inode->i_ctime = iattr->ia_ctime; 634 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; 635 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; 636 } 637 if (mask & ATTR_MTIME) { 638 inode->i_mtime = iattr->ia_mtime; 639 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; 640 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; 641 } 642 643 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 644 645 XFS_STATS_INC(xs_ig_attrchg); 646 647 if (mp->m_flags & XFS_MOUNT_WSYNC) 648 xfs_trans_set_sync(tp); 649 error = xfs_trans_commit(tp, 0); 650 651 xfs_iunlock(ip, XFS_ILOCK_EXCL); 652 653 /* 654 * Release any dquot(s) the inode had kept before chown. 655 */ 656 xfs_qm_dqrele(olddquot1); 657 xfs_qm_dqrele(olddquot2); 658 xfs_qm_dqrele(udqp); 659 xfs_qm_dqrele(gdqp); 660 661 if (error) 662 return XFS_ERROR(error); 663 664 /* 665 * XXX(hch): Updating the ACL entries is not atomic vs the i_mode 666 * update. We could avoid this with linked transactions 667 * and passing down the transaction pointer all the way 668 * to attr_set. No previous user of the generic 669 * Posix ACL code seems to care about this issue either. 670 */ 671 if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) { 672 error = -xfs_acl_chmod(inode); 673 if (error) 674 return XFS_ERROR(error); 675 } 676 677 return 0; 678 679 out_trans_cancel: 680 xfs_trans_cancel(tp, 0); 681 xfs_iunlock(ip, XFS_ILOCK_EXCL); 682 out_dqrele: 683 xfs_qm_dqrele(udqp); 684 xfs_qm_dqrele(gdqp); 685 return error; 686 } 687 688 /* 689 * Truncate file. Must have write permission and not be a directory. 690 */ 691 int 692 xfs_setattr_size( 693 struct xfs_inode *ip, 694 struct iattr *iattr, 695 int flags) 696 { 697 struct xfs_mount *mp = ip->i_mount; 698 struct inode *inode = VFS_I(ip); 699 int mask = iattr->ia_valid; 700 xfs_off_t oldsize, newsize; 701 struct xfs_trans *tp; 702 int error; 703 uint lock_flags; 704 uint commit_flags = 0; 705 706 trace_xfs_setattr(ip); 707 708 if (mp->m_flags & XFS_MOUNT_RDONLY) 709 return XFS_ERROR(EROFS); 710 711 if (XFS_FORCED_SHUTDOWN(mp)) 712 return XFS_ERROR(EIO); 713 714 error = -inode_change_ok(inode, iattr); 715 if (error) 716 return XFS_ERROR(error); 717 718 ASSERT(S_ISREG(ip->i_d.di_mode)); 719 ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| 720 ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID| 721 ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); 722 723 lock_flags = XFS_ILOCK_EXCL; 724 if (!(flags & XFS_ATTR_NOLOCK)) 725 lock_flags |= XFS_IOLOCK_EXCL; 726 xfs_ilock(ip, lock_flags); 727 728 oldsize = inode->i_size; 729 newsize = iattr->ia_size; 730 731 /* 732 * Short circuit the truncate case for zero length files. 733 */ 734 if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) { 735 if (!(mask & (ATTR_CTIME|ATTR_MTIME))) 736 goto out_unlock; 737 738 /* 739 * Use the regular setattr path to update the timestamps. 740 */ 741 xfs_iunlock(ip, lock_flags); 742 iattr->ia_valid &= ~ATTR_SIZE; 743 return xfs_setattr_nonsize(ip, iattr, 0); 744 } 745 746 /* 747 * Make sure that the dquots are attached to the inode. 748 */ 749 error = xfs_qm_dqattach_locked(ip, 0); 750 if (error) 751 goto out_unlock; 752 753 /* 754 * Now we can make the changes. Before we join the inode to the 755 * transaction, take care of the part of the truncation that must be 756 * done without the inode lock. This needs to be done before joining 757 * the inode to the transaction, because the inode cannot be unlocked 758 * once it is a part of the transaction. 759 */ 760 if (newsize > oldsize) { 761 /* 762 * Do the first part of growing a file: zero any data in the 763 * last block that is beyond the old EOF. We need to do this 764 * before the inode is joined to the transaction to modify 765 * i_size. 766 */ 767 error = xfs_zero_eof(ip, newsize, oldsize); 768 if (error) 769 goto out_unlock; 770 } 771 xfs_iunlock(ip, XFS_ILOCK_EXCL); 772 lock_flags &= ~XFS_ILOCK_EXCL; 773 774 /* 775 * We are going to log the inode size change in this transaction so 776 * any previous writes that are beyond the on disk EOF and the new 777 * EOF that have not been written out need to be written here. If we 778 * do not write the data out, we expose ourselves to the null files 779 * problem. 780 * 781 * Only flush from the on disk size to the smaller of the in memory 782 * file size or the new size as that's the range we really care about 783 * here and prevents waiting for other data not within the range we 784 * care about here. 785 */ 786 if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) { 787 error = xfs_flush_pages(ip, ip->i_d.di_size, newsize, 0, 788 FI_NONE); 789 if (error) 790 goto out_unlock; 791 } 792 793 /* 794 * Wait for all direct I/O to complete. 795 */ 796 inode_dio_wait(inode); 797 798 error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); 799 if (error) 800 goto out_unlock; 801 802 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); 803 error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 804 XFS_TRANS_PERM_LOG_RES, 805 XFS_ITRUNCATE_LOG_COUNT); 806 if (error) 807 goto out_trans_cancel; 808 809 truncate_setsize(inode, newsize); 810 811 commit_flags = XFS_TRANS_RELEASE_LOG_RES; 812 lock_flags |= XFS_ILOCK_EXCL; 813 814 xfs_ilock(ip, XFS_ILOCK_EXCL); 815 816 xfs_trans_ijoin(tp, ip, 0); 817 818 /* 819 * Only change the c/mtime if we are changing the size or we are 820 * explicitly asked to change it. This handles the semantic difference 821 * between truncate() and ftruncate() as implemented in the VFS. 822 * 823 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a 824 * special case where we need to update the times despite not having 825 * these flags set. For all other operations the VFS set these flags 826 * explicitly if it wants a timestamp update. 827 */ 828 if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME)))) { 829 iattr->ia_ctime = iattr->ia_mtime = 830 current_fs_time(inode->i_sb); 831 mask |= ATTR_CTIME | ATTR_MTIME; 832 } 833 834 /* 835 * The first thing we do is set the size to new_size permanently on 836 * disk. This way we don't have to worry about anyone ever being able 837 * to look at the data being freed even in the face of a crash. 838 * What we're getting around here is the case where we free a block, it 839 * is allocated to another file, it is written to, and then we crash. 840 * If the new data gets written to the file but the log buffers 841 * containing the free and reallocation don't, then we'd end up with 842 * garbage in the blocks being freed. As long as we make the new size 843 * permanent before actually freeing any blocks it doesn't matter if 844 * they get written to. 845 */ 846 ip->i_d.di_size = newsize; 847 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 848 849 if (newsize <= oldsize) { 850 error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, newsize); 851 if (error) 852 goto out_trans_abort; 853 854 /* 855 * Truncated "down", so we're removing references to old data 856 * here - if we delay flushing for a long time, we expose 857 * ourselves unduly to the notorious NULL files problem. So, 858 * we mark this inode and flush it when the file is closed, 859 * and do not wait the usual (long) time for writeout. 860 */ 861 xfs_iflags_set(ip, XFS_ITRUNCATED); 862 } 863 864 if (mask & ATTR_CTIME) { 865 inode->i_ctime = iattr->ia_ctime; 866 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; 867 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; 868 } 869 if (mask & ATTR_MTIME) { 870 inode->i_mtime = iattr->ia_mtime; 871 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; 872 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; 873 } 874 875 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 876 877 XFS_STATS_INC(xs_ig_attrchg); 878 879 if (mp->m_flags & XFS_MOUNT_WSYNC) 880 xfs_trans_set_sync(tp); 881 882 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 883 out_unlock: 884 if (lock_flags) 885 xfs_iunlock(ip, lock_flags); 886 return error; 887 888 out_trans_abort: 889 commit_flags |= XFS_TRANS_ABORT; 890 out_trans_cancel: 891 xfs_trans_cancel(tp, commit_flags); 892 goto out_unlock; 893 } 894 895 STATIC int 896 xfs_vn_setattr( 897 struct dentry *dentry, 898 struct iattr *iattr) 899 { 900 if (iattr->ia_valid & ATTR_SIZE) 901 return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0); 902 return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0); 903 } 904 905 #define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) 906 907 /* 908 * Call fiemap helper to fill in user data. 909 * Returns positive errors to xfs_getbmap. 910 */ 911 STATIC int 912 xfs_fiemap_format( 913 void **arg, 914 struct getbmapx *bmv, 915 int *full) 916 { 917 int error; 918 struct fiemap_extent_info *fieinfo = *arg; 919 u32 fiemap_flags = 0; 920 u64 logical, physical, length; 921 922 /* Do nothing for a hole */ 923 if (bmv->bmv_block == -1LL) 924 return 0; 925 926 logical = BBTOB(bmv->bmv_offset); 927 physical = BBTOB(bmv->bmv_block); 928 length = BBTOB(bmv->bmv_length); 929 930 if (bmv->bmv_oflags & BMV_OF_PREALLOC) 931 fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN; 932 else if (bmv->bmv_oflags & BMV_OF_DELALLOC) { 933 fiemap_flags |= FIEMAP_EXTENT_DELALLOC; 934 physical = 0; /* no block yet */ 935 } 936 if (bmv->bmv_oflags & BMV_OF_LAST) 937 fiemap_flags |= FIEMAP_EXTENT_LAST; 938 939 error = fiemap_fill_next_extent(fieinfo, logical, physical, 940 length, fiemap_flags); 941 if (error > 0) { 942 error = 0; 943 *full = 1; /* user array now full */ 944 } 945 946 return -error; 947 } 948 949 STATIC int 950 xfs_vn_fiemap( 951 struct inode *inode, 952 struct fiemap_extent_info *fieinfo, 953 u64 start, 954 u64 length) 955 { 956 xfs_inode_t *ip = XFS_I(inode); 957 struct getbmapx bm; 958 int error; 959 960 error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS); 961 if (error) 962 return error; 963 964 /* Set up bmap header for xfs internal routine */ 965 bm.bmv_offset = BTOBB(start); 966 /* Special case for whole file */ 967 if (length == FIEMAP_MAX_OFFSET) 968 bm.bmv_length = -1LL; 969 else 970 bm.bmv_length = BTOBB(length); 971 972 /* We add one because in getbmap world count includes the header */ 973 bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM : 974 fieinfo->fi_extents_max + 1; 975 bm.bmv_count = min_t(__s32, bm.bmv_count, 976 (PAGE_SIZE * 16 / sizeof(struct getbmapx))); 977 bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES; 978 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) 979 bm.bmv_iflags |= BMV_IF_ATTRFORK; 980 if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC)) 981 bm.bmv_iflags |= BMV_IF_DELALLOC; 982 983 error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo); 984 if (error) 985 return -error; 986 987 return 0; 988 } 989 990 static const struct inode_operations xfs_inode_operations = { 991 .get_acl = xfs_get_acl, 992 .getattr = xfs_vn_getattr, 993 .setattr = xfs_vn_setattr, 994 .setxattr = generic_setxattr, 995 .getxattr = generic_getxattr, 996 .removexattr = generic_removexattr, 997 .listxattr = xfs_vn_listxattr, 998 .fiemap = xfs_vn_fiemap, 999 }; 1000 1001 static const struct inode_operations xfs_dir_inode_operations = { 1002 .create = xfs_vn_create, 1003 .lookup = xfs_vn_lookup, 1004 .link = xfs_vn_link, 1005 .unlink = xfs_vn_unlink, 1006 .symlink = xfs_vn_symlink, 1007 .mkdir = xfs_vn_mkdir, 1008 /* 1009 * Yes, XFS uses the same method for rmdir and unlink. 1010 * 1011 * There are some subtile differences deeper in the code, 1012 * but we use S_ISDIR to check for those. 1013 */ 1014 .rmdir = xfs_vn_unlink, 1015 .mknod = xfs_vn_mknod, 1016 .rename = xfs_vn_rename, 1017 .get_acl = xfs_get_acl, 1018 .getattr = xfs_vn_getattr, 1019 .setattr = xfs_vn_setattr, 1020 .setxattr = generic_setxattr, 1021 .getxattr = generic_getxattr, 1022 .removexattr = generic_removexattr, 1023 .listxattr = xfs_vn_listxattr, 1024 }; 1025 1026 static const struct inode_operations xfs_dir_ci_inode_operations = { 1027 .create = xfs_vn_create, 1028 .lookup = xfs_vn_ci_lookup, 1029 .link = xfs_vn_link, 1030 .unlink = xfs_vn_unlink, 1031 .symlink = xfs_vn_symlink, 1032 .mkdir = xfs_vn_mkdir, 1033 /* 1034 * Yes, XFS uses the same method for rmdir and unlink. 1035 * 1036 * There are some subtile differences deeper in the code, 1037 * but we use S_ISDIR to check for those. 1038 */ 1039 .rmdir = xfs_vn_unlink, 1040 .mknod = xfs_vn_mknod, 1041 .rename = xfs_vn_rename, 1042 .get_acl = xfs_get_acl, 1043 .getattr = xfs_vn_getattr, 1044 .setattr = xfs_vn_setattr, 1045 .setxattr = generic_setxattr, 1046 .getxattr = generic_getxattr, 1047 .removexattr = generic_removexattr, 1048 .listxattr = xfs_vn_listxattr, 1049 }; 1050 1051 static const struct inode_operations xfs_symlink_inode_operations = { 1052 .readlink = generic_readlink, 1053 .follow_link = xfs_vn_follow_link, 1054 .put_link = xfs_vn_put_link, 1055 .get_acl = xfs_get_acl, 1056 .getattr = xfs_vn_getattr, 1057 .setattr = xfs_vn_setattr, 1058 .setxattr = generic_setxattr, 1059 .getxattr = generic_getxattr, 1060 .removexattr = generic_removexattr, 1061 .listxattr = xfs_vn_listxattr, 1062 }; 1063 1064 STATIC void 1065 xfs_diflags_to_iflags( 1066 struct inode *inode, 1067 struct xfs_inode *ip) 1068 { 1069 if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE) 1070 inode->i_flags |= S_IMMUTABLE; 1071 else 1072 inode->i_flags &= ~S_IMMUTABLE; 1073 if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) 1074 inode->i_flags |= S_APPEND; 1075 else 1076 inode->i_flags &= ~S_APPEND; 1077 if (ip->i_d.di_flags & XFS_DIFLAG_SYNC) 1078 inode->i_flags |= S_SYNC; 1079 else 1080 inode->i_flags &= ~S_SYNC; 1081 if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME) 1082 inode->i_flags |= S_NOATIME; 1083 else 1084 inode->i_flags &= ~S_NOATIME; 1085 } 1086 1087 /* 1088 * Initialize the Linux inode, set up the operation vectors and 1089 * unlock the inode. 1090 * 1091 * When reading existing inodes from disk this is called directly 1092 * from xfs_iget, when creating a new inode it is called from 1093 * xfs_ialloc after setting up the inode. 1094 * 1095 * We are always called with an uninitialised linux inode here. 1096 * We need to initialise the necessary fields and take a reference 1097 * on it. 1098 */ 1099 void 1100 xfs_setup_inode( 1101 struct xfs_inode *ip) 1102 { 1103 struct inode *inode = &ip->i_vnode; 1104 1105 inode->i_ino = ip->i_ino; 1106 inode->i_state = I_NEW; 1107 1108 inode_sb_list_add(inode); 1109 /* make the inode look hashed for the writeback code */ 1110 hlist_add_fake(&inode->i_hash); 1111 1112 inode->i_mode = ip->i_d.di_mode; 1113 set_nlink(inode, ip->i_d.di_nlink); 1114 inode->i_uid = ip->i_d.di_uid; 1115 inode->i_gid = ip->i_d.di_gid; 1116 1117 switch (inode->i_mode & S_IFMT) { 1118 case S_IFBLK: 1119 case S_IFCHR: 1120 inode->i_rdev = 1121 MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, 1122 sysv_minor(ip->i_df.if_u2.if_rdev)); 1123 break; 1124 default: 1125 inode->i_rdev = 0; 1126 break; 1127 } 1128 1129 inode->i_generation = ip->i_d.di_gen; 1130 i_size_write(inode, ip->i_d.di_size); 1131 inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec; 1132 inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec; 1133 inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec; 1134 inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; 1135 inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec; 1136 inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; 1137 xfs_diflags_to_iflags(inode, ip); 1138 1139 switch (inode->i_mode & S_IFMT) { 1140 case S_IFREG: 1141 inode->i_op = &xfs_inode_operations; 1142 inode->i_fop = &xfs_file_operations; 1143 inode->i_mapping->a_ops = &xfs_address_space_operations; 1144 break; 1145 case S_IFDIR: 1146 if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) 1147 inode->i_op = &xfs_dir_ci_inode_operations; 1148 else 1149 inode->i_op = &xfs_dir_inode_operations; 1150 inode->i_fop = &xfs_dir_file_operations; 1151 break; 1152 case S_IFLNK: 1153 inode->i_op = &xfs_symlink_inode_operations; 1154 if (!(ip->i_df.if_flags & XFS_IFINLINE)) 1155 inode->i_mapping->a_ops = &xfs_address_space_operations; 1156 break; 1157 default: 1158 inode->i_op = &xfs_inode_operations; 1159 init_special_inode(inode, inode->i_mode, inode->i_rdev); 1160 break; 1161 } 1162 1163 /* 1164 * If there is no attribute fork no ACL can exist on this inode, 1165 * and it can't have any file capabilities attached to it either. 1166 */ 1167 if (!XFS_IFORK_Q(ip)) { 1168 inode_has_no_xattr(inode); 1169 cache_no_acl(inode); 1170 } 1171 1172 xfs_iflags_clear(ip, XFS_INEW); 1173 barrier(); 1174 1175 unlock_new_inode(inode); 1176 } 1177