11da177e4SLinus Torvalds /* 23e57ecf6SOlaf Weber * Copyright (c) 2000-2006 Silicon Graphics, Inc. 37b718769SNathan Scott * All Rights Reserved. 41da177e4SLinus Torvalds * 57b718769SNathan Scott * This program is free software; you can redistribute it and/or 67b718769SNathan Scott * modify it under the terms of the GNU General Public License as 71da177e4SLinus Torvalds * published by the Free Software Foundation. 81da177e4SLinus Torvalds * 97b718769SNathan Scott * This program is distributed in the hope that it would be useful, 107b718769SNathan Scott * but WITHOUT ANY WARRANTY; without even the implied warranty of 117b718769SNathan Scott * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 127b718769SNathan Scott * GNU General Public License for more details. 131da177e4SLinus Torvalds * 147b718769SNathan Scott * You should have received a copy of the GNU General Public License 157b718769SNathan Scott * along with this program; if not, write the Free Software Foundation, 167b718769SNathan Scott * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 171da177e4SLinus Torvalds */ 1840ebd81dSRobert P. J. Day #include <linux/log2.h> 1940ebd81dSRobert P. J. Day 201da177e4SLinus Torvalds #include "xfs.h" 21a844f451SNathan Scott #include "xfs_fs.h" 226ca1c906SDave Chinner #include "xfs_format.h" 231da177e4SLinus Torvalds #include "xfs_log.h" 24a844f451SNathan Scott #include "xfs_inum.h" 251da177e4SLinus Torvalds #include "xfs_trans.h" 26c24b5dfaSDave Chinner #include "xfs_trans_space.h" 271da177e4SLinus Torvalds #include "xfs_trans_priv.h" 281da177e4SLinus Torvalds #include "xfs_sb.h" 291da177e4SLinus Torvalds #include "xfs_ag.h" 301da177e4SLinus Torvalds #include "xfs_mount.h" 31c24b5dfaSDave Chinner #include "xfs_da_btree.h" 32c24b5dfaSDave Chinner #include "xfs_dir2_format.h" 33c24b5dfaSDave Chinner #include "xfs_dir2.h" 341da177e4SLinus Torvalds #include "xfs_bmap_btree.h" 35a844f451SNathan Scott #include "xfs_alloc_btree.h" 361da177e4SLinus Torvalds #include "xfs_ialloc_btree.h" 37a844f451SNathan Scott #include "xfs_attr_sf.h" 38c24b5dfaSDave Chinner #include "xfs_attr.h" 391da177e4SLinus Torvalds #include "xfs_dinode.h" 401da177e4SLinus Torvalds #include "xfs_inode.h" 411da177e4SLinus Torvalds #include "xfs_buf_item.h" 42a844f451SNathan Scott #include "xfs_inode_item.h" 43a844f451SNathan Scott #include "xfs_btree.h" 44a844f451SNathan Scott #include "xfs_alloc.h" 45a844f451SNathan Scott #include "xfs_ialloc.h" 46a844f451SNathan Scott #include "xfs_bmap.h" 4768988114SDave Chinner #include "xfs_bmap_util.h" 481da177e4SLinus Torvalds #include "xfs_error.h" 491da177e4SLinus Torvalds #include "xfs_quota.h" 502a82b8beSDavid Chinner #include "xfs_filestream.h" 5193848a99SChristoph Hellwig #include "xfs_cksum.h" 520b1b213fSChristoph Hellwig #include "xfs_trace.h" 5333479e05SDave Chinner #include "xfs_icache.h" 54c24b5dfaSDave Chinner #include "xfs_symlink.h" 551da177e4SLinus Torvalds 561da177e4SLinus Torvalds kmem_zone_t *xfs_inode_zone; 571da177e4SLinus Torvalds 581da177e4SLinus Torvalds /* 598f04c47aSChristoph Hellwig * Used in xfs_itruncate_extents(). This is the maximum number of extents 601da177e4SLinus Torvalds * freed from a file in a single transaction. 611da177e4SLinus Torvalds */ 621da177e4SLinus Torvalds #define XFS_ITRUNC_MAX_EXTENTS 2 631da177e4SLinus Torvalds 641da177e4SLinus Torvalds STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *); 651da177e4SLinus Torvalds 662a0ec1d9SDave Chinner /* 672a0ec1d9SDave Chinner * helper function to extract extent size hint from inode 682a0ec1d9SDave Chinner */ 692a0ec1d9SDave Chinner xfs_extlen_t 702a0ec1d9SDave Chinner xfs_get_extsz_hint( 712a0ec1d9SDave Chinner struct xfs_inode *ip) 722a0ec1d9SDave Chinner { 732a0ec1d9SDave Chinner if ((ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) && ip->i_d.di_extsize) 742a0ec1d9SDave Chinner return ip->i_d.di_extsize; 752a0ec1d9SDave Chinner if (XFS_IS_REALTIME_INODE(ip)) 762a0ec1d9SDave Chinner return ip->i_mount->m_sb.sb_rextsize; 772a0ec1d9SDave Chinner return 0; 782a0ec1d9SDave Chinner } 792a0ec1d9SDave Chinner 80fa96acadSDave Chinner /* 81fa96acadSDave Chinner * This is a wrapper routine around the xfs_ilock() routine used to centralize 82fa96acadSDave Chinner * some grungy code. It is used in places that wish to lock the inode solely 83fa96acadSDave Chinner * for reading the extents. The reason these places can't just call 84fa96acadSDave Chinner * xfs_ilock(SHARED) is that the inode lock also guards to bringing in of the 85fa96acadSDave Chinner * extents from disk for a file in b-tree format. If the inode is in b-tree 86fa96acadSDave Chinner * format, then we need to lock the inode exclusively until the extents are read 87fa96acadSDave Chinner * in. Locking it exclusively all the time would limit our parallelism 88fa96acadSDave Chinner * unnecessarily, though. What we do instead is check to see if the extents 89fa96acadSDave Chinner * have been read in yet, and only lock the inode exclusively if they have not. 90fa96acadSDave Chinner * 91fa96acadSDave Chinner * The function returns a value which should be given to the corresponding 92fa96acadSDave Chinner * xfs_iunlock_map_shared(). This value is the mode in which the lock was 93fa96acadSDave Chinner * actually taken. 94fa96acadSDave Chinner */ 95fa96acadSDave Chinner uint 96fa96acadSDave Chinner xfs_ilock_map_shared( 97fa96acadSDave Chinner xfs_inode_t *ip) 98fa96acadSDave Chinner { 99fa96acadSDave Chinner uint lock_mode; 100fa96acadSDave Chinner 101fa96acadSDave Chinner if ((ip->i_d.di_format == XFS_DINODE_FMT_BTREE) && 102fa96acadSDave Chinner ((ip->i_df.if_flags & XFS_IFEXTENTS) == 0)) { 103fa96acadSDave Chinner lock_mode = XFS_ILOCK_EXCL; 104fa96acadSDave Chinner } else { 105fa96acadSDave Chinner lock_mode = XFS_ILOCK_SHARED; 106fa96acadSDave Chinner } 107fa96acadSDave Chinner 108fa96acadSDave Chinner xfs_ilock(ip, lock_mode); 109fa96acadSDave Chinner 110fa96acadSDave Chinner return lock_mode; 111fa96acadSDave Chinner } 112fa96acadSDave Chinner 113fa96acadSDave Chinner /* 114fa96acadSDave Chinner * This is simply the unlock routine to go with xfs_ilock_map_shared(). 115fa96acadSDave Chinner * All it does is call xfs_iunlock() with the given lock_mode. 116fa96acadSDave Chinner */ 117fa96acadSDave Chinner void 118fa96acadSDave Chinner xfs_iunlock_map_shared( 119fa96acadSDave Chinner xfs_inode_t *ip, 120fa96acadSDave Chinner unsigned int lock_mode) 121fa96acadSDave Chinner { 122fa96acadSDave Chinner xfs_iunlock(ip, lock_mode); 123fa96acadSDave Chinner } 124fa96acadSDave Chinner 125fa96acadSDave Chinner /* 126fa96acadSDave Chinner * The xfs inode contains 2 locks: a multi-reader lock called the 127fa96acadSDave Chinner * i_iolock and a multi-reader lock called the i_lock. This routine 128fa96acadSDave Chinner * allows either or both of the locks to be obtained. 129fa96acadSDave Chinner * 130fa96acadSDave Chinner * The 2 locks should always be ordered so that the IO lock is 131fa96acadSDave Chinner * obtained first in order to prevent deadlock. 132fa96acadSDave Chinner * 133fa96acadSDave Chinner * ip -- the inode being locked 134fa96acadSDave Chinner * lock_flags -- this parameter indicates the inode's locks 135fa96acadSDave Chinner * to be locked. It can be: 136fa96acadSDave Chinner * XFS_IOLOCK_SHARED, 137fa96acadSDave Chinner * XFS_IOLOCK_EXCL, 138fa96acadSDave Chinner * XFS_ILOCK_SHARED, 139fa96acadSDave Chinner * XFS_ILOCK_EXCL, 140fa96acadSDave Chinner * XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED, 141fa96acadSDave Chinner * XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL, 142fa96acadSDave Chinner * XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED, 143fa96acadSDave Chinner * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL 144fa96acadSDave Chinner */ 145fa96acadSDave Chinner void 146fa96acadSDave Chinner xfs_ilock( 147fa96acadSDave Chinner xfs_inode_t *ip, 148fa96acadSDave Chinner uint lock_flags) 149fa96acadSDave Chinner { 150fa96acadSDave Chinner trace_xfs_ilock(ip, lock_flags, _RET_IP_); 151fa96acadSDave Chinner 152fa96acadSDave Chinner /* 153fa96acadSDave Chinner * You can't set both SHARED and EXCL for the same lock, 154fa96acadSDave Chinner * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, 155fa96acadSDave Chinner * and XFS_ILOCK_EXCL are valid values to set in lock_flags. 156fa96acadSDave Chinner */ 157fa96acadSDave Chinner ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != 158fa96acadSDave Chinner (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 159fa96acadSDave Chinner ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 160fa96acadSDave Chinner (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 161fa96acadSDave Chinner ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 162fa96acadSDave Chinner 163fa96acadSDave Chinner if (lock_flags & XFS_IOLOCK_EXCL) 164fa96acadSDave Chinner mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); 165fa96acadSDave Chinner else if (lock_flags & XFS_IOLOCK_SHARED) 166fa96acadSDave Chinner mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); 167fa96acadSDave Chinner 168fa96acadSDave Chinner if (lock_flags & XFS_ILOCK_EXCL) 169fa96acadSDave Chinner mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); 170fa96acadSDave Chinner else if (lock_flags & XFS_ILOCK_SHARED) 171fa96acadSDave Chinner mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); 172fa96acadSDave Chinner } 173fa96acadSDave Chinner 174fa96acadSDave Chinner /* 175fa96acadSDave Chinner * This is just like xfs_ilock(), except that the caller 176fa96acadSDave Chinner * is guaranteed not to sleep. It returns 1 if it gets 177fa96acadSDave Chinner * the requested locks and 0 otherwise. If the IO lock is 178fa96acadSDave Chinner * obtained but the inode lock cannot be, then the IO lock 179fa96acadSDave Chinner * is dropped before returning. 180fa96acadSDave Chinner * 181fa96acadSDave Chinner * ip -- the inode being locked 182fa96acadSDave Chinner * lock_flags -- this parameter indicates the inode's locks to be 183fa96acadSDave Chinner * to be locked. See the comment for xfs_ilock() for a list 184fa96acadSDave Chinner * of valid values. 185fa96acadSDave Chinner */ 186fa96acadSDave Chinner int 187fa96acadSDave Chinner xfs_ilock_nowait( 188fa96acadSDave Chinner xfs_inode_t *ip, 189fa96acadSDave Chinner uint lock_flags) 190fa96acadSDave Chinner { 191fa96acadSDave Chinner trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_); 192fa96acadSDave Chinner 193fa96acadSDave Chinner /* 194fa96acadSDave Chinner * You can't set both SHARED and EXCL for the same lock, 195fa96acadSDave Chinner * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, 196fa96acadSDave Chinner * and XFS_ILOCK_EXCL are valid values to set in lock_flags. 197fa96acadSDave Chinner */ 198fa96acadSDave Chinner ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != 199fa96acadSDave Chinner (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 200fa96acadSDave Chinner ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 201fa96acadSDave Chinner (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 202fa96acadSDave Chinner ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 203fa96acadSDave Chinner 204fa96acadSDave Chinner if (lock_flags & XFS_IOLOCK_EXCL) { 205fa96acadSDave Chinner if (!mrtryupdate(&ip->i_iolock)) 206fa96acadSDave Chinner goto out; 207fa96acadSDave Chinner } else if (lock_flags & XFS_IOLOCK_SHARED) { 208fa96acadSDave Chinner if (!mrtryaccess(&ip->i_iolock)) 209fa96acadSDave Chinner goto out; 210fa96acadSDave Chinner } 211fa96acadSDave Chinner if (lock_flags & XFS_ILOCK_EXCL) { 212fa96acadSDave Chinner if (!mrtryupdate(&ip->i_lock)) 213fa96acadSDave Chinner goto out_undo_iolock; 214fa96acadSDave Chinner } else if (lock_flags & XFS_ILOCK_SHARED) { 215fa96acadSDave Chinner if (!mrtryaccess(&ip->i_lock)) 216fa96acadSDave Chinner goto out_undo_iolock; 217fa96acadSDave Chinner } 218fa96acadSDave Chinner return 1; 219fa96acadSDave Chinner 220fa96acadSDave Chinner out_undo_iolock: 221fa96acadSDave Chinner if (lock_flags & XFS_IOLOCK_EXCL) 222fa96acadSDave Chinner mrunlock_excl(&ip->i_iolock); 223fa96acadSDave Chinner else if (lock_flags & XFS_IOLOCK_SHARED) 224fa96acadSDave Chinner mrunlock_shared(&ip->i_iolock); 225fa96acadSDave Chinner out: 226fa96acadSDave Chinner return 0; 227fa96acadSDave Chinner } 228fa96acadSDave Chinner 229fa96acadSDave Chinner /* 230fa96acadSDave Chinner * xfs_iunlock() is used to drop the inode locks acquired with 231fa96acadSDave Chinner * xfs_ilock() and xfs_ilock_nowait(). The caller must pass 232fa96acadSDave Chinner * in the flags given to xfs_ilock() or xfs_ilock_nowait() so 233fa96acadSDave Chinner * that we know which locks to drop. 234fa96acadSDave Chinner * 235fa96acadSDave Chinner * ip -- the inode being unlocked 236fa96acadSDave Chinner * lock_flags -- this parameter indicates the inode's locks to be 237fa96acadSDave Chinner * to be unlocked. See the comment for xfs_ilock() for a list 238fa96acadSDave Chinner * of valid values for this parameter. 239fa96acadSDave Chinner * 240fa96acadSDave Chinner */ 241fa96acadSDave Chinner void 242fa96acadSDave Chinner xfs_iunlock( 243fa96acadSDave Chinner xfs_inode_t *ip, 244fa96acadSDave Chinner uint lock_flags) 245fa96acadSDave Chinner { 246fa96acadSDave Chinner /* 247fa96acadSDave Chinner * You can't set both SHARED and EXCL for the same lock, 248fa96acadSDave Chinner * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, 249fa96acadSDave Chinner * and XFS_ILOCK_EXCL are valid values to set in lock_flags. 250fa96acadSDave Chinner */ 251fa96acadSDave Chinner ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != 252fa96acadSDave Chinner (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 253fa96acadSDave Chinner ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 254fa96acadSDave Chinner (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 255fa96acadSDave Chinner ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 256fa96acadSDave Chinner ASSERT(lock_flags != 0); 257fa96acadSDave Chinner 258fa96acadSDave Chinner if (lock_flags & XFS_IOLOCK_EXCL) 259fa96acadSDave Chinner mrunlock_excl(&ip->i_iolock); 260fa96acadSDave Chinner else if (lock_flags & XFS_IOLOCK_SHARED) 261fa96acadSDave Chinner mrunlock_shared(&ip->i_iolock); 262fa96acadSDave Chinner 263fa96acadSDave Chinner if (lock_flags & XFS_ILOCK_EXCL) 264fa96acadSDave Chinner mrunlock_excl(&ip->i_lock); 265fa96acadSDave Chinner else if (lock_flags & XFS_ILOCK_SHARED) 266fa96acadSDave Chinner mrunlock_shared(&ip->i_lock); 267fa96acadSDave Chinner 268fa96acadSDave Chinner trace_xfs_iunlock(ip, lock_flags, _RET_IP_); 269fa96acadSDave Chinner } 270fa96acadSDave Chinner 271fa96acadSDave Chinner /* 272fa96acadSDave Chinner * give up write locks. the i/o lock cannot be held nested 273fa96acadSDave Chinner * if it is being demoted. 274fa96acadSDave Chinner */ 275fa96acadSDave Chinner void 276fa96acadSDave Chinner xfs_ilock_demote( 277fa96acadSDave Chinner xfs_inode_t *ip, 278fa96acadSDave Chinner uint lock_flags) 279fa96acadSDave Chinner { 280fa96acadSDave Chinner ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)); 281fa96acadSDave Chinner ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0); 282fa96acadSDave Chinner 283fa96acadSDave Chinner if (lock_flags & XFS_ILOCK_EXCL) 284fa96acadSDave Chinner mrdemote(&ip->i_lock); 285fa96acadSDave Chinner if (lock_flags & XFS_IOLOCK_EXCL) 286fa96acadSDave Chinner mrdemote(&ip->i_iolock); 287fa96acadSDave Chinner 288fa96acadSDave Chinner trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_); 289fa96acadSDave Chinner } 290fa96acadSDave Chinner 291742ae1e3SDave Chinner #if defined(DEBUG) || defined(XFS_WARN) 292fa96acadSDave Chinner int 293fa96acadSDave Chinner xfs_isilocked( 294fa96acadSDave Chinner xfs_inode_t *ip, 295fa96acadSDave Chinner uint lock_flags) 296fa96acadSDave Chinner { 297fa96acadSDave Chinner if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) { 298fa96acadSDave Chinner if (!(lock_flags & XFS_ILOCK_SHARED)) 299fa96acadSDave Chinner return !!ip->i_lock.mr_writer; 300fa96acadSDave Chinner return rwsem_is_locked(&ip->i_lock.mr_lock); 301fa96acadSDave Chinner } 302fa96acadSDave Chinner 303fa96acadSDave Chinner if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) { 304fa96acadSDave Chinner if (!(lock_flags & XFS_IOLOCK_SHARED)) 305fa96acadSDave Chinner return !!ip->i_iolock.mr_writer; 306fa96acadSDave Chinner return rwsem_is_locked(&ip->i_iolock.mr_lock); 307fa96acadSDave Chinner } 308fa96acadSDave Chinner 309fa96acadSDave Chinner ASSERT(0); 310fa96acadSDave Chinner return 0; 311fa96acadSDave Chinner } 312fa96acadSDave Chinner #endif 313fa96acadSDave Chinner 314c24b5dfaSDave Chinner #ifdef DEBUG 315c24b5dfaSDave Chinner int xfs_locked_n; 316c24b5dfaSDave Chinner int xfs_small_retries; 317c24b5dfaSDave Chinner int xfs_middle_retries; 318c24b5dfaSDave Chinner int xfs_lots_retries; 319c24b5dfaSDave Chinner int xfs_lock_delays; 320c24b5dfaSDave Chinner #endif 321c24b5dfaSDave Chinner 322c24b5dfaSDave Chinner /* 323c24b5dfaSDave Chinner * Bump the subclass so xfs_lock_inodes() acquires each lock with 324c24b5dfaSDave Chinner * a different value 325c24b5dfaSDave Chinner */ 326c24b5dfaSDave Chinner static inline int 327c24b5dfaSDave Chinner xfs_lock_inumorder(int lock_mode, int subclass) 328c24b5dfaSDave Chinner { 329c24b5dfaSDave Chinner if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) 330c24b5dfaSDave Chinner lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT; 331c24b5dfaSDave Chinner if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) 332c24b5dfaSDave Chinner lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT; 333c24b5dfaSDave Chinner 334c24b5dfaSDave Chinner return lock_mode; 335c24b5dfaSDave Chinner } 336c24b5dfaSDave Chinner 337c24b5dfaSDave Chinner /* 338c24b5dfaSDave Chinner * The following routine will lock n inodes in exclusive mode. 339c24b5dfaSDave Chinner * We assume the caller calls us with the inodes in i_ino order. 340c24b5dfaSDave Chinner * 341c24b5dfaSDave Chinner * We need to detect deadlock where an inode that we lock 342c24b5dfaSDave Chinner * is in the AIL and we start waiting for another inode that is locked 343c24b5dfaSDave Chinner * by a thread in a long running transaction (such as truncate). This can 344c24b5dfaSDave Chinner * result in deadlock since the long running trans might need to wait 345c24b5dfaSDave Chinner * for the inode we just locked in order to push the tail and free space 346c24b5dfaSDave Chinner * in the log. 347c24b5dfaSDave Chinner */ 348c24b5dfaSDave Chinner void 349c24b5dfaSDave Chinner xfs_lock_inodes( 350c24b5dfaSDave Chinner xfs_inode_t **ips, 351c24b5dfaSDave Chinner int inodes, 352c24b5dfaSDave Chinner uint lock_mode) 353c24b5dfaSDave Chinner { 354c24b5dfaSDave Chinner int attempts = 0, i, j, try_lock; 355c24b5dfaSDave Chinner xfs_log_item_t *lp; 356c24b5dfaSDave Chinner 357c24b5dfaSDave Chinner ASSERT(ips && (inodes >= 2)); /* we need at least two */ 358c24b5dfaSDave Chinner 359c24b5dfaSDave Chinner try_lock = 0; 360c24b5dfaSDave Chinner i = 0; 361c24b5dfaSDave Chinner 362c24b5dfaSDave Chinner again: 363c24b5dfaSDave Chinner for (; i < inodes; i++) { 364c24b5dfaSDave Chinner ASSERT(ips[i]); 365c24b5dfaSDave Chinner 366c24b5dfaSDave Chinner if (i && (ips[i] == ips[i-1])) /* Already locked */ 367c24b5dfaSDave Chinner continue; 368c24b5dfaSDave Chinner 369c24b5dfaSDave Chinner /* 370c24b5dfaSDave Chinner * If try_lock is not set yet, make sure all locked inodes 371c24b5dfaSDave Chinner * are not in the AIL. 372c24b5dfaSDave Chinner * If any are, set try_lock to be used later. 373c24b5dfaSDave Chinner */ 374c24b5dfaSDave Chinner 375c24b5dfaSDave Chinner if (!try_lock) { 376c24b5dfaSDave Chinner for (j = (i - 1); j >= 0 && !try_lock; j--) { 377c24b5dfaSDave Chinner lp = (xfs_log_item_t *)ips[j]->i_itemp; 378c24b5dfaSDave Chinner if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 379c24b5dfaSDave Chinner try_lock++; 380c24b5dfaSDave Chinner } 381c24b5dfaSDave Chinner } 382c24b5dfaSDave Chinner } 383c24b5dfaSDave Chinner 384c24b5dfaSDave Chinner /* 385c24b5dfaSDave Chinner * If any of the previous locks we have locked is in the AIL, 386c24b5dfaSDave Chinner * we must TRY to get the second and subsequent locks. If 387c24b5dfaSDave Chinner * we can't get any, we must release all we have 388c24b5dfaSDave Chinner * and try again. 389c24b5dfaSDave Chinner */ 390c24b5dfaSDave Chinner 391c24b5dfaSDave Chinner if (try_lock) { 392c24b5dfaSDave Chinner /* try_lock must be 0 if i is 0. */ 393c24b5dfaSDave Chinner /* 394c24b5dfaSDave Chinner * try_lock means we have an inode locked 395c24b5dfaSDave Chinner * that is in the AIL. 396c24b5dfaSDave Chinner */ 397c24b5dfaSDave Chinner ASSERT(i != 0); 398c24b5dfaSDave Chinner if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) { 399c24b5dfaSDave Chinner attempts++; 400c24b5dfaSDave Chinner 401c24b5dfaSDave Chinner /* 402c24b5dfaSDave Chinner * Unlock all previous guys and try again. 403c24b5dfaSDave Chinner * xfs_iunlock will try to push the tail 404c24b5dfaSDave Chinner * if the inode is in the AIL. 405c24b5dfaSDave Chinner */ 406c24b5dfaSDave Chinner 407c24b5dfaSDave Chinner for(j = i - 1; j >= 0; j--) { 408c24b5dfaSDave Chinner 409c24b5dfaSDave Chinner /* 410c24b5dfaSDave Chinner * Check to see if we've already 411c24b5dfaSDave Chinner * unlocked this one. 412c24b5dfaSDave Chinner * Not the first one going back, 413c24b5dfaSDave Chinner * and the inode ptr is the same. 414c24b5dfaSDave Chinner */ 415c24b5dfaSDave Chinner if ((j != (i - 1)) && ips[j] == 416c24b5dfaSDave Chinner ips[j+1]) 417c24b5dfaSDave Chinner continue; 418c24b5dfaSDave Chinner 419c24b5dfaSDave Chinner xfs_iunlock(ips[j], lock_mode); 420c24b5dfaSDave Chinner } 421c24b5dfaSDave Chinner 422c24b5dfaSDave Chinner if ((attempts % 5) == 0) { 423c24b5dfaSDave Chinner delay(1); /* Don't just spin the CPU */ 424c24b5dfaSDave Chinner #ifdef DEBUG 425c24b5dfaSDave Chinner xfs_lock_delays++; 426c24b5dfaSDave Chinner #endif 427c24b5dfaSDave Chinner } 428c24b5dfaSDave Chinner i = 0; 429c24b5dfaSDave Chinner try_lock = 0; 430c24b5dfaSDave Chinner goto again; 431c24b5dfaSDave Chinner } 432c24b5dfaSDave Chinner } else { 433c24b5dfaSDave Chinner xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i)); 434c24b5dfaSDave Chinner } 435c24b5dfaSDave Chinner } 436c24b5dfaSDave Chinner 437c24b5dfaSDave Chinner #ifdef DEBUG 438c24b5dfaSDave Chinner if (attempts) { 439c24b5dfaSDave Chinner if (attempts < 5) xfs_small_retries++; 440c24b5dfaSDave Chinner else if (attempts < 100) xfs_middle_retries++; 441c24b5dfaSDave Chinner else xfs_lots_retries++; 442c24b5dfaSDave Chinner } else { 443c24b5dfaSDave Chinner xfs_locked_n++; 444c24b5dfaSDave Chinner } 445c24b5dfaSDave Chinner #endif 446c24b5dfaSDave Chinner } 447c24b5dfaSDave Chinner 448c24b5dfaSDave Chinner /* 449c24b5dfaSDave Chinner * xfs_lock_two_inodes() can only be used to lock one type of lock 450c24b5dfaSDave Chinner * at a time - the iolock or the ilock, but not both at once. If 451c24b5dfaSDave Chinner * we lock both at once, lockdep will report false positives saying 452c24b5dfaSDave Chinner * we have violated locking orders. 453c24b5dfaSDave Chinner */ 454c24b5dfaSDave Chinner void 455c24b5dfaSDave Chinner xfs_lock_two_inodes( 456c24b5dfaSDave Chinner xfs_inode_t *ip0, 457c24b5dfaSDave Chinner xfs_inode_t *ip1, 458c24b5dfaSDave Chinner uint lock_mode) 459c24b5dfaSDave Chinner { 460c24b5dfaSDave Chinner xfs_inode_t *temp; 461c24b5dfaSDave Chinner int attempts = 0; 462c24b5dfaSDave Chinner xfs_log_item_t *lp; 463c24b5dfaSDave Chinner 464c24b5dfaSDave Chinner if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) 465c24b5dfaSDave Chinner ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0); 466c24b5dfaSDave Chinner ASSERT(ip0->i_ino != ip1->i_ino); 467c24b5dfaSDave Chinner 468c24b5dfaSDave Chinner if (ip0->i_ino > ip1->i_ino) { 469c24b5dfaSDave Chinner temp = ip0; 470c24b5dfaSDave Chinner ip0 = ip1; 471c24b5dfaSDave Chinner ip1 = temp; 472c24b5dfaSDave Chinner } 473c24b5dfaSDave Chinner 474c24b5dfaSDave Chinner again: 475c24b5dfaSDave Chinner xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0)); 476c24b5dfaSDave Chinner 477c24b5dfaSDave Chinner /* 478c24b5dfaSDave Chinner * If the first lock we have locked is in the AIL, we must TRY to get 479c24b5dfaSDave Chinner * the second lock. If we can't get it, we must release the first one 480c24b5dfaSDave Chinner * and try again. 481c24b5dfaSDave Chinner */ 482c24b5dfaSDave Chinner lp = (xfs_log_item_t *)ip0->i_itemp; 483c24b5dfaSDave Chinner if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 484c24b5dfaSDave Chinner if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) { 485c24b5dfaSDave Chinner xfs_iunlock(ip0, lock_mode); 486c24b5dfaSDave Chinner if ((++attempts % 5) == 0) 487c24b5dfaSDave Chinner delay(1); /* Don't just spin the CPU */ 488c24b5dfaSDave Chinner goto again; 489c24b5dfaSDave Chinner } 490c24b5dfaSDave Chinner } else { 491c24b5dfaSDave Chinner xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1)); 492c24b5dfaSDave Chinner } 493c24b5dfaSDave Chinner } 494c24b5dfaSDave Chinner 495c24b5dfaSDave Chinner 496fa96acadSDave Chinner void 497fa96acadSDave Chinner __xfs_iflock( 498fa96acadSDave Chinner struct xfs_inode *ip) 499fa96acadSDave Chinner { 500fa96acadSDave Chinner wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT); 501fa96acadSDave Chinner DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT); 502fa96acadSDave Chinner 503fa96acadSDave Chinner do { 504fa96acadSDave Chinner prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 505fa96acadSDave Chinner if (xfs_isiflocked(ip)) 506fa96acadSDave Chinner io_schedule(); 507fa96acadSDave Chinner } while (!xfs_iflock_nowait(ip)); 508fa96acadSDave Chinner 509fa96acadSDave Chinner finish_wait(wq, &wait.wait); 510fa96acadSDave Chinner } 511fa96acadSDave Chinner 5121da177e4SLinus Torvalds STATIC uint 5131da177e4SLinus Torvalds _xfs_dic2xflags( 5141da177e4SLinus Torvalds __uint16_t di_flags) 5151da177e4SLinus Torvalds { 5161da177e4SLinus Torvalds uint flags = 0; 5171da177e4SLinus Torvalds 5181da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_ANY) { 5191da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_REALTIME) 5201da177e4SLinus Torvalds flags |= XFS_XFLAG_REALTIME; 5211da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_PREALLOC) 5221da177e4SLinus Torvalds flags |= XFS_XFLAG_PREALLOC; 5231da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_IMMUTABLE) 5241da177e4SLinus Torvalds flags |= XFS_XFLAG_IMMUTABLE; 5251da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_APPEND) 5261da177e4SLinus Torvalds flags |= XFS_XFLAG_APPEND; 5271da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_SYNC) 5281da177e4SLinus Torvalds flags |= XFS_XFLAG_SYNC; 5291da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_NOATIME) 5301da177e4SLinus Torvalds flags |= XFS_XFLAG_NOATIME; 5311da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_NODUMP) 5321da177e4SLinus Torvalds flags |= XFS_XFLAG_NODUMP; 5331da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_RTINHERIT) 5341da177e4SLinus Torvalds flags |= XFS_XFLAG_RTINHERIT; 5351da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_PROJINHERIT) 5361da177e4SLinus Torvalds flags |= XFS_XFLAG_PROJINHERIT; 5371da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_NOSYMLINKS) 5381da177e4SLinus Torvalds flags |= XFS_XFLAG_NOSYMLINKS; 539dd9f438eSNathan Scott if (di_flags & XFS_DIFLAG_EXTSIZE) 540dd9f438eSNathan Scott flags |= XFS_XFLAG_EXTSIZE; 541dd9f438eSNathan Scott if (di_flags & XFS_DIFLAG_EXTSZINHERIT) 542dd9f438eSNathan Scott flags |= XFS_XFLAG_EXTSZINHERIT; 543d3446eacSBarry Naujok if (di_flags & XFS_DIFLAG_NODEFRAG) 544d3446eacSBarry Naujok flags |= XFS_XFLAG_NODEFRAG; 5452a82b8beSDavid Chinner if (di_flags & XFS_DIFLAG_FILESTREAM) 5462a82b8beSDavid Chinner flags |= XFS_XFLAG_FILESTREAM; 5471da177e4SLinus Torvalds } 5481da177e4SLinus Torvalds 5491da177e4SLinus Torvalds return flags; 5501da177e4SLinus Torvalds } 5511da177e4SLinus Torvalds 5521da177e4SLinus Torvalds uint 5531da177e4SLinus Torvalds xfs_ip2xflags( 5541da177e4SLinus Torvalds xfs_inode_t *ip) 5551da177e4SLinus Torvalds { 556347d1c01SChristoph Hellwig xfs_icdinode_t *dic = &ip->i_d; 5571da177e4SLinus Torvalds 558a916e2bdSNathan Scott return _xfs_dic2xflags(dic->di_flags) | 55945ba598eSChristoph Hellwig (XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0); 5601da177e4SLinus Torvalds } 5611da177e4SLinus Torvalds 5621da177e4SLinus Torvalds uint 5631da177e4SLinus Torvalds xfs_dic2xflags( 56445ba598eSChristoph Hellwig xfs_dinode_t *dip) 5651da177e4SLinus Torvalds { 56681591fe2SChristoph Hellwig return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) | 56745ba598eSChristoph Hellwig (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0); 5681da177e4SLinus Torvalds } 5691da177e4SLinus Torvalds 5701da177e4SLinus Torvalds /* 571c24b5dfaSDave Chinner * Lookups up an inode from "name". If ci_name is not NULL, then a CI match 572c24b5dfaSDave Chinner * is allowed, otherwise it has to be an exact match. If a CI match is found, 573c24b5dfaSDave Chinner * ci_name->name will point to a the actual name (caller must free) or 574c24b5dfaSDave Chinner * will be set to NULL if an exact match is found. 575c24b5dfaSDave Chinner */ 576c24b5dfaSDave Chinner int 577c24b5dfaSDave Chinner xfs_lookup( 578c24b5dfaSDave Chinner xfs_inode_t *dp, 579c24b5dfaSDave Chinner struct xfs_name *name, 580c24b5dfaSDave Chinner xfs_inode_t **ipp, 581c24b5dfaSDave Chinner struct xfs_name *ci_name) 582c24b5dfaSDave Chinner { 583c24b5dfaSDave Chinner xfs_ino_t inum; 584c24b5dfaSDave Chinner int error; 585c24b5dfaSDave Chinner uint lock_mode; 586c24b5dfaSDave Chinner 587c24b5dfaSDave Chinner trace_xfs_lookup(dp, name); 588c24b5dfaSDave Chinner 589c24b5dfaSDave Chinner if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 590c24b5dfaSDave Chinner return XFS_ERROR(EIO); 591c24b5dfaSDave Chinner 592c24b5dfaSDave Chinner lock_mode = xfs_ilock_map_shared(dp); 593c24b5dfaSDave Chinner error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); 594c24b5dfaSDave Chinner xfs_iunlock_map_shared(dp, lock_mode); 595c24b5dfaSDave Chinner 596c24b5dfaSDave Chinner if (error) 597c24b5dfaSDave Chinner goto out; 598c24b5dfaSDave Chinner 599c24b5dfaSDave Chinner error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp); 600c24b5dfaSDave Chinner if (error) 601c24b5dfaSDave Chinner goto out_free_name; 602c24b5dfaSDave Chinner 603c24b5dfaSDave Chinner return 0; 604c24b5dfaSDave Chinner 605c24b5dfaSDave Chinner out_free_name: 606c24b5dfaSDave Chinner if (ci_name) 607c24b5dfaSDave Chinner kmem_free(ci_name->name); 608c24b5dfaSDave Chinner out: 609c24b5dfaSDave Chinner *ipp = NULL; 610c24b5dfaSDave Chinner return error; 611c24b5dfaSDave Chinner } 612c24b5dfaSDave Chinner 613c24b5dfaSDave Chinner /* 6141da177e4SLinus Torvalds * Allocate an inode on disk and return a copy of its in-core version. 6151da177e4SLinus Torvalds * The in-core inode is locked exclusively. Set mode, nlink, and rdev 6161da177e4SLinus Torvalds * appropriately within the inode. The uid and gid for the inode are 6171da177e4SLinus Torvalds * set according to the contents of the given cred structure. 6181da177e4SLinus Torvalds * 6191da177e4SLinus Torvalds * Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc() 620cd856db6SCarlos Maiolino * has a free inode available, call xfs_iget() to obtain the in-core 621cd856db6SCarlos Maiolino * version of the allocated inode. Finally, fill in the inode and 622cd856db6SCarlos Maiolino * log its initial contents. In this case, ialloc_context would be 623cd856db6SCarlos Maiolino * set to NULL. 6241da177e4SLinus Torvalds * 625cd856db6SCarlos Maiolino * If xfs_dialloc() does not have an available inode, it will replenish 626cd856db6SCarlos Maiolino * its supply by doing an allocation. Since we can only do one 627cd856db6SCarlos Maiolino * allocation within a transaction without deadlocks, we must commit 628cd856db6SCarlos Maiolino * the current transaction before returning the inode itself. 629cd856db6SCarlos Maiolino * In this case, therefore, we will set ialloc_context and return. 6301da177e4SLinus Torvalds * The caller should then commit the current transaction, start a new 6311da177e4SLinus Torvalds * transaction, and call xfs_ialloc() again to actually get the inode. 6321da177e4SLinus Torvalds * 6331da177e4SLinus Torvalds * To ensure that some other process does not grab the inode that 6341da177e4SLinus Torvalds * was allocated during the first call to xfs_ialloc(), this routine 6351da177e4SLinus Torvalds * also returns the [locked] bp pointing to the head of the freelist 6361da177e4SLinus Torvalds * as ialloc_context. The caller should hold this buffer across 6371da177e4SLinus Torvalds * the commit and pass it back into this routine on the second call. 638b11f94d5SDavid Chinner * 639b11f94d5SDavid Chinner * If we are allocating quota inodes, we do not have a parent inode 640b11f94d5SDavid Chinner * to attach to or associate with (i.e. pip == NULL) because they 641b11f94d5SDavid Chinner * are not linked into the directory structure - they are attached 642b11f94d5SDavid Chinner * directly to the superblock - and so have no parent. 6431da177e4SLinus Torvalds */ 6441da177e4SLinus Torvalds int 6451da177e4SLinus Torvalds xfs_ialloc( 6461da177e4SLinus Torvalds xfs_trans_t *tp, 6471da177e4SLinus Torvalds xfs_inode_t *pip, 648576b1d67SAl Viro umode_t mode, 64931b084aeSNathan Scott xfs_nlink_t nlink, 6501da177e4SLinus Torvalds xfs_dev_t rdev, 6516743099cSArkadiusz Mi?kiewicz prid_t prid, 6521da177e4SLinus Torvalds int okalloc, 6531da177e4SLinus Torvalds xfs_buf_t **ialloc_context, 6541da177e4SLinus Torvalds xfs_inode_t **ipp) 6551da177e4SLinus Torvalds { 65693848a99SChristoph Hellwig struct xfs_mount *mp = tp->t_mountp; 6571da177e4SLinus Torvalds xfs_ino_t ino; 6581da177e4SLinus Torvalds xfs_inode_t *ip; 6591da177e4SLinus Torvalds uint flags; 6601da177e4SLinus Torvalds int error; 661dff35fd4SChristoph Hellwig timespec_t tv; 662bf904248SDavid Chinner int filestreams = 0; 6631da177e4SLinus Torvalds 6641da177e4SLinus Torvalds /* 6651da177e4SLinus Torvalds * Call the space management code to pick 6661da177e4SLinus Torvalds * the on-disk inode to be allocated. 6671da177e4SLinus Torvalds */ 668b11f94d5SDavid Chinner error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc, 66908358906SChristoph Hellwig ialloc_context, &ino); 670bf904248SDavid Chinner if (error) 6711da177e4SLinus Torvalds return error; 67208358906SChristoph Hellwig if (*ialloc_context || ino == NULLFSINO) { 6731da177e4SLinus Torvalds *ipp = NULL; 6741da177e4SLinus Torvalds return 0; 6751da177e4SLinus Torvalds } 6761da177e4SLinus Torvalds ASSERT(*ialloc_context == NULL); 6771da177e4SLinus Torvalds 6781da177e4SLinus Torvalds /* 6791da177e4SLinus Torvalds * Get the in-core inode with the lock held exclusively. 6801da177e4SLinus Torvalds * This is because we're setting fields here we need 6811da177e4SLinus Torvalds * to prevent others from looking at until we're done. 6821da177e4SLinus Torvalds */ 68393848a99SChristoph Hellwig error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE, 684ec3ba85fSChristoph Hellwig XFS_ILOCK_EXCL, &ip); 685bf904248SDavid Chinner if (error) 6861da177e4SLinus Torvalds return error; 6871da177e4SLinus Torvalds ASSERT(ip != NULL); 6881da177e4SLinus Torvalds 689576b1d67SAl Viro ip->i_d.di_mode = mode; 6901da177e4SLinus Torvalds ip->i_d.di_onlink = 0; 6911da177e4SLinus Torvalds ip->i_d.di_nlink = nlink; 6921da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink == nlink); 6937aab1b28SDwight Engen ip->i_d.di_uid = xfs_kuid_to_uid(current_fsuid()); 6947aab1b28SDwight Engen ip->i_d.di_gid = xfs_kgid_to_gid(current_fsgid()); 6956743099cSArkadiusz Mi?kiewicz xfs_set_projid(ip, prid); 6961da177e4SLinus Torvalds memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 6971da177e4SLinus Torvalds 6981da177e4SLinus Torvalds /* 6991da177e4SLinus Torvalds * If the superblock version is up to where we support new format 7001da177e4SLinus Torvalds * inodes and this is currently an old format inode, then change 7011da177e4SLinus Torvalds * the inode version number now. This way we only do the conversion 7021da177e4SLinus Torvalds * here rather than here and in the flush/logging code. 7031da177e4SLinus Torvalds */ 70493848a99SChristoph Hellwig if (xfs_sb_version_hasnlink(&mp->m_sb) && 70551ce16d5SChristoph Hellwig ip->i_d.di_version == 1) { 70651ce16d5SChristoph Hellwig ip->i_d.di_version = 2; 7071da177e4SLinus Torvalds /* 7081da177e4SLinus Torvalds * We've already zeroed the old link count, the projid field, 7091da177e4SLinus Torvalds * and the pad field. 7101da177e4SLinus Torvalds */ 7111da177e4SLinus Torvalds } 7121da177e4SLinus Torvalds 7131da177e4SLinus Torvalds /* 7141da177e4SLinus Torvalds * Project ids won't be stored on disk if we are using a version 1 inode. 7151da177e4SLinus Torvalds */ 71651ce16d5SChristoph Hellwig if ((prid != 0) && (ip->i_d.di_version == 1)) 7171da177e4SLinus Torvalds xfs_bump_ino_vers2(tp, ip); 7181da177e4SLinus Torvalds 719bd186aa9SChristoph Hellwig if (pip && XFS_INHERIT_GID(pip)) { 7201da177e4SLinus Torvalds ip->i_d.di_gid = pip->i_d.di_gid; 721abbede1bSAl Viro if ((pip->i_d.di_mode & S_ISGID) && S_ISDIR(mode)) { 7221da177e4SLinus Torvalds ip->i_d.di_mode |= S_ISGID; 7231da177e4SLinus Torvalds } 7241da177e4SLinus Torvalds } 7251da177e4SLinus Torvalds 7261da177e4SLinus Torvalds /* 7271da177e4SLinus Torvalds * If the group ID of the new file does not match the effective group 7281da177e4SLinus Torvalds * ID or one of the supplementary group IDs, the S_ISGID bit is cleared 7291da177e4SLinus Torvalds * (and only if the irix_sgid_inherit compatibility variable is set). 7301da177e4SLinus Torvalds */ 7311da177e4SLinus Torvalds if ((irix_sgid_inherit) && 7321da177e4SLinus Torvalds (ip->i_d.di_mode & S_ISGID) && 7337aab1b28SDwight Engen (!in_group_p(xfs_gid_to_kgid(ip->i_d.di_gid)))) { 7341da177e4SLinus Torvalds ip->i_d.di_mode &= ~S_ISGID; 7351da177e4SLinus Torvalds } 7361da177e4SLinus Torvalds 7371da177e4SLinus Torvalds ip->i_d.di_size = 0; 7381da177e4SLinus Torvalds ip->i_d.di_nextents = 0; 7391da177e4SLinus Torvalds ASSERT(ip->i_d.di_nblocks == 0); 740dff35fd4SChristoph Hellwig 741dff35fd4SChristoph Hellwig nanotime(&tv); 742dff35fd4SChristoph Hellwig ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; 743dff35fd4SChristoph Hellwig ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; 744dff35fd4SChristoph Hellwig ip->i_d.di_atime = ip->i_d.di_mtime; 745dff35fd4SChristoph Hellwig ip->i_d.di_ctime = ip->i_d.di_mtime; 746dff35fd4SChristoph Hellwig 7471da177e4SLinus Torvalds /* 7481da177e4SLinus Torvalds * di_gen will have been taken care of in xfs_iread. 7491da177e4SLinus Torvalds */ 7501da177e4SLinus Torvalds ip->i_d.di_extsize = 0; 7511da177e4SLinus Torvalds ip->i_d.di_dmevmask = 0; 7521da177e4SLinus Torvalds ip->i_d.di_dmstate = 0; 7531da177e4SLinus Torvalds ip->i_d.di_flags = 0; 75493848a99SChristoph Hellwig 75593848a99SChristoph Hellwig if (ip->i_d.di_version == 3) { 75693848a99SChristoph Hellwig ASSERT(ip->i_d.di_ino == ino); 75793848a99SChristoph Hellwig ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid)); 75893848a99SChristoph Hellwig ip->i_d.di_crc = 0; 75993848a99SChristoph Hellwig ip->i_d.di_changecount = 1; 76093848a99SChristoph Hellwig ip->i_d.di_lsn = 0; 76193848a99SChristoph Hellwig ip->i_d.di_flags2 = 0; 76293848a99SChristoph Hellwig memset(&(ip->i_d.di_pad2[0]), 0, sizeof(ip->i_d.di_pad2)); 76393848a99SChristoph Hellwig ip->i_d.di_crtime = ip->i_d.di_mtime; 76493848a99SChristoph Hellwig } 76593848a99SChristoph Hellwig 76693848a99SChristoph Hellwig 7671da177e4SLinus Torvalds flags = XFS_ILOG_CORE; 7681da177e4SLinus Torvalds switch (mode & S_IFMT) { 7691da177e4SLinus Torvalds case S_IFIFO: 7701da177e4SLinus Torvalds case S_IFCHR: 7711da177e4SLinus Torvalds case S_IFBLK: 7721da177e4SLinus Torvalds case S_IFSOCK: 7731da177e4SLinus Torvalds ip->i_d.di_format = XFS_DINODE_FMT_DEV; 7741da177e4SLinus Torvalds ip->i_df.if_u2.if_rdev = rdev; 7751da177e4SLinus Torvalds ip->i_df.if_flags = 0; 7761da177e4SLinus Torvalds flags |= XFS_ILOG_DEV; 7771da177e4SLinus Torvalds break; 7781da177e4SLinus Torvalds case S_IFREG: 779bf904248SDavid Chinner /* 780bf904248SDavid Chinner * we can't set up filestreams until after the VFS inode 781bf904248SDavid Chinner * is set up properly. 782bf904248SDavid Chinner */ 783bf904248SDavid Chinner if (pip && xfs_inode_is_filestream(pip)) 784bf904248SDavid Chinner filestreams = 1; 7852a82b8beSDavid Chinner /* fall through */ 7861da177e4SLinus Torvalds case S_IFDIR: 787b11f94d5SDavid Chinner if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { 788365ca83dSNathan Scott uint di_flags = 0; 789365ca83dSNathan Scott 790abbede1bSAl Viro if (S_ISDIR(mode)) { 791365ca83dSNathan Scott if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) 792365ca83dSNathan Scott di_flags |= XFS_DIFLAG_RTINHERIT; 793dd9f438eSNathan Scott if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { 794dd9f438eSNathan Scott di_flags |= XFS_DIFLAG_EXTSZINHERIT; 795dd9f438eSNathan Scott ip->i_d.di_extsize = pip->i_d.di_extsize; 796dd9f438eSNathan Scott } 797abbede1bSAl Viro } else if (S_ISREG(mode)) { 798613d7043SChristoph Hellwig if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) 799365ca83dSNathan Scott di_flags |= XFS_DIFLAG_REALTIME; 800dd9f438eSNathan Scott if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { 801dd9f438eSNathan Scott di_flags |= XFS_DIFLAG_EXTSIZE; 802dd9f438eSNathan Scott ip->i_d.di_extsize = pip->i_d.di_extsize; 803dd9f438eSNathan Scott } 8041da177e4SLinus Torvalds } 8051da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) && 8061da177e4SLinus Torvalds xfs_inherit_noatime) 807365ca83dSNathan Scott di_flags |= XFS_DIFLAG_NOATIME; 8081da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) && 8091da177e4SLinus Torvalds xfs_inherit_nodump) 810365ca83dSNathan Scott di_flags |= XFS_DIFLAG_NODUMP; 8111da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) && 8121da177e4SLinus Torvalds xfs_inherit_sync) 813365ca83dSNathan Scott di_flags |= XFS_DIFLAG_SYNC; 8141da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) && 8151da177e4SLinus Torvalds xfs_inherit_nosymlinks) 816365ca83dSNathan Scott di_flags |= XFS_DIFLAG_NOSYMLINKS; 817365ca83dSNathan Scott if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 818365ca83dSNathan Scott di_flags |= XFS_DIFLAG_PROJINHERIT; 819d3446eacSBarry Naujok if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) && 820d3446eacSBarry Naujok xfs_inherit_nodefrag) 821d3446eacSBarry Naujok di_flags |= XFS_DIFLAG_NODEFRAG; 8222a82b8beSDavid Chinner if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM) 8232a82b8beSDavid Chinner di_flags |= XFS_DIFLAG_FILESTREAM; 824365ca83dSNathan Scott ip->i_d.di_flags |= di_flags; 8251da177e4SLinus Torvalds } 8261da177e4SLinus Torvalds /* FALLTHROUGH */ 8271da177e4SLinus Torvalds case S_IFLNK: 8281da177e4SLinus Torvalds ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; 8291da177e4SLinus Torvalds ip->i_df.if_flags = XFS_IFEXTENTS; 8301da177e4SLinus Torvalds ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0; 8311da177e4SLinus Torvalds ip->i_df.if_u1.if_extents = NULL; 8321da177e4SLinus Torvalds break; 8331da177e4SLinus Torvalds default: 8341da177e4SLinus Torvalds ASSERT(0); 8351da177e4SLinus Torvalds } 8361da177e4SLinus Torvalds /* 8371da177e4SLinus Torvalds * Attribute fork settings for new inode. 8381da177e4SLinus Torvalds */ 8391da177e4SLinus Torvalds ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 8401da177e4SLinus Torvalds ip->i_d.di_anextents = 0; 8411da177e4SLinus Torvalds 8421da177e4SLinus Torvalds /* 8431da177e4SLinus Torvalds * Log the new values stuffed into the inode. 8441da177e4SLinus Torvalds */ 845ddc3415aSChristoph Hellwig xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 8461da177e4SLinus Torvalds xfs_trans_log_inode(tp, ip, flags); 8471da177e4SLinus Torvalds 848b83bd138SNathan Scott /* now that we have an i_mode we can setup inode ops and unlock */ 84941be8bedSChristoph Hellwig xfs_setup_inode(ip); 8501da177e4SLinus Torvalds 851bf904248SDavid Chinner /* now we have set up the vfs inode we can associate the filestream */ 852bf904248SDavid Chinner if (filestreams) { 853bf904248SDavid Chinner error = xfs_filestream_associate(pip, ip); 854bf904248SDavid Chinner if (error < 0) 855bf904248SDavid Chinner return -error; 856bf904248SDavid Chinner if (!error) 857bf904248SDavid Chinner xfs_iflags_set(ip, XFS_IFILESTREAM); 858bf904248SDavid Chinner } 859bf904248SDavid Chinner 8601da177e4SLinus Torvalds *ipp = ip; 8611da177e4SLinus Torvalds return 0; 8621da177e4SLinus Torvalds } 8631da177e4SLinus Torvalds 864e546cb79SDave Chinner /* 865e546cb79SDave Chinner * Allocates a new inode from disk and return a pointer to the 866e546cb79SDave Chinner * incore copy. This routine will internally commit the current 867e546cb79SDave Chinner * transaction and allocate a new one if the Space Manager needed 868e546cb79SDave Chinner * to do an allocation to replenish the inode free-list. 869e546cb79SDave Chinner * 870e546cb79SDave Chinner * This routine is designed to be called from xfs_create and 871e546cb79SDave Chinner * xfs_create_dir. 872e546cb79SDave Chinner * 873e546cb79SDave Chinner */ 874e546cb79SDave Chinner int 875e546cb79SDave Chinner xfs_dir_ialloc( 876e546cb79SDave Chinner xfs_trans_t **tpp, /* input: current transaction; 877e546cb79SDave Chinner output: may be a new transaction. */ 878e546cb79SDave Chinner xfs_inode_t *dp, /* directory within whose allocate 879e546cb79SDave Chinner the inode. */ 880e546cb79SDave Chinner umode_t mode, 881e546cb79SDave Chinner xfs_nlink_t nlink, 882e546cb79SDave Chinner xfs_dev_t rdev, 883e546cb79SDave Chinner prid_t prid, /* project id */ 884e546cb79SDave Chinner int okalloc, /* ok to allocate new space */ 885e546cb79SDave Chinner xfs_inode_t **ipp, /* pointer to inode; it will be 886e546cb79SDave Chinner locked. */ 887e546cb79SDave Chinner int *committed) 888e546cb79SDave Chinner 889e546cb79SDave Chinner { 890e546cb79SDave Chinner xfs_trans_t *tp; 891e546cb79SDave Chinner xfs_trans_t *ntp; 892e546cb79SDave Chinner xfs_inode_t *ip; 893e546cb79SDave Chinner xfs_buf_t *ialloc_context = NULL; 894e546cb79SDave Chinner int code; 895e546cb79SDave Chinner void *dqinfo; 896e546cb79SDave Chinner uint tflags; 897e546cb79SDave Chinner 898e546cb79SDave Chinner tp = *tpp; 899e546cb79SDave Chinner ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); 900e546cb79SDave Chinner 901e546cb79SDave Chinner /* 902e546cb79SDave Chinner * xfs_ialloc will return a pointer to an incore inode if 903e546cb79SDave Chinner * the Space Manager has an available inode on the free 904e546cb79SDave Chinner * list. Otherwise, it will do an allocation and replenish 905e546cb79SDave Chinner * the freelist. Since we can only do one allocation per 906e546cb79SDave Chinner * transaction without deadlocks, we will need to commit the 907e546cb79SDave Chinner * current transaction and start a new one. We will then 908e546cb79SDave Chinner * need to call xfs_ialloc again to get the inode. 909e546cb79SDave Chinner * 910e546cb79SDave Chinner * If xfs_ialloc did an allocation to replenish the freelist, 911e546cb79SDave Chinner * it returns the bp containing the head of the freelist as 912e546cb79SDave Chinner * ialloc_context. We will hold a lock on it across the 913e546cb79SDave Chinner * transaction commit so that no other process can steal 914e546cb79SDave Chinner * the inode(s) that we've just allocated. 915e546cb79SDave Chinner */ 916e546cb79SDave Chinner code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc, 917e546cb79SDave Chinner &ialloc_context, &ip); 918e546cb79SDave Chinner 919e546cb79SDave Chinner /* 920e546cb79SDave Chinner * Return an error if we were unable to allocate a new inode. 921e546cb79SDave Chinner * This should only happen if we run out of space on disk or 922e546cb79SDave Chinner * encounter a disk error. 923e546cb79SDave Chinner */ 924e546cb79SDave Chinner if (code) { 925e546cb79SDave Chinner *ipp = NULL; 926e546cb79SDave Chinner return code; 927e546cb79SDave Chinner } 928e546cb79SDave Chinner if (!ialloc_context && !ip) { 929e546cb79SDave Chinner *ipp = NULL; 930e546cb79SDave Chinner return XFS_ERROR(ENOSPC); 931e546cb79SDave Chinner } 932e546cb79SDave Chinner 933e546cb79SDave Chinner /* 934e546cb79SDave Chinner * If the AGI buffer is non-NULL, then we were unable to get an 935e546cb79SDave Chinner * inode in one operation. We need to commit the current 936e546cb79SDave Chinner * transaction and call xfs_ialloc() again. It is guaranteed 937e546cb79SDave Chinner * to succeed the second time. 938e546cb79SDave Chinner */ 939e546cb79SDave Chinner if (ialloc_context) { 9403d3c8b52SJie Liu struct xfs_trans_res tres; 9413d3c8b52SJie Liu 942e546cb79SDave Chinner /* 943e546cb79SDave Chinner * Normally, xfs_trans_commit releases all the locks. 944e546cb79SDave Chinner * We call bhold to hang on to the ialloc_context across 945e546cb79SDave Chinner * the commit. Holding this buffer prevents any other 946e546cb79SDave Chinner * processes from doing any allocations in this 947e546cb79SDave Chinner * allocation group. 948e546cb79SDave Chinner */ 949e546cb79SDave Chinner xfs_trans_bhold(tp, ialloc_context); 950e546cb79SDave Chinner /* 951e546cb79SDave Chinner * Save the log reservation so we can use 952e546cb79SDave Chinner * them in the next transaction. 953e546cb79SDave Chinner */ 9543d3c8b52SJie Liu tres.tr_logres = xfs_trans_get_log_res(tp); 9553d3c8b52SJie Liu tres.tr_logcount = xfs_trans_get_log_count(tp); 956e546cb79SDave Chinner 957e546cb79SDave Chinner /* 958e546cb79SDave Chinner * We want the quota changes to be associated with the next 959e546cb79SDave Chinner * transaction, NOT this one. So, detach the dqinfo from this 960e546cb79SDave Chinner * and attach it to the next transaction. 961e546cb79SDave Chinner */ 962e546cb79SDave Chinner dqinfo = NULL; 963e546cb79SDave Chinner tflags = 0; 964e546cb79SDave Chinner if (tp->t_dqinfo) { 965e546cb79SDave Chinner dqinfo = (void *)tp->t_dqinfo; 966e546cb79SDave Chinner tp->t_dqinfo = NULL; 967e546cb79SDave Chinner tflags = tp->t_flags & XFS_TRANS_DQ_DIRTY; 968e546cb79SDave Chinner tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY); 969e546cb79SDave Chinner } 970e546cb79SDave Chinner 971e546cb79SDave Chinner ntp = xfs_trans_dup(tp); 972e546cb79SDave Chinner code = xfs_trans_commit(tp, 0); 973e546cb79SDave Chinner tp = ntp; 974e546cb79SDave Chinner if (committed != NULL) { 975e546cb79SDave Chinner *committed = 1; 976e546cb79SDave Chinner } 977e546cb79SDave Chinner /* 978e546cb79SDave Chinner * If we get an error during the commit processing, 979e546cb79SDave Chinner * release the buffer that is still held and return 980e546cb79SDave Chinner * to the caller. 981e546cb79SDave Chinner */ 982e546cb79SDave Chinner if (code) { 983e546cb79SDave Chinner xfs_buf_relse(ialloc_context); 984e546cb79SDave Chinner if (dqinfo) { 985e546cb79SDave Chinner tp->t_dqinfo = dqinfo; 986e546cb79SDave Chinner xfs_trans_free_dqinfo(tp); 987e546cb79SDave Chinner } 988e546cb79SDave Chinner *tpp = ntp; 989e546cb79SDave Chinner *ipp = NULL; 990e546cb79SDave Chinner return code; 991e546cb79SDave Chinner } 992e546cb79SDave Chinner 993e546cb79SDave Chinner /* 994e546cb79SDave Chinner * transaction commit worked ok so we can drop the extra ticket 995e546cb79SDave Chinner * reference that we gained in xfs_trans_dup() 996e546cb79SDave Chinner */ 997e546cb79SDave Chinner xfs_log_ticket_put(tp->t_ticket); 9983d3c8b52SJie Liu tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; 9993d3c8b52SJie Liu code = xfs_trans_reserve(tp, &tres, 0, 0); 10003d3c8b52SJie Liu 1001e546cb79SDave Chinner /* 1002e546cb79SDave Chinner * Re-attach the quota info that we detached from prev trx. 1003e546cb79SDave Chinner */ 1004e546cb79SDave Chinner if (dqinfo) { 1005e546cb79SDave Chinner tp->t_dqinfo = dqinfo; 1006e546cb79SDave Chinner tp->t_flags |= tflags; 1007e546cb79SDave Chinner } 1008e546cb79SDave Chinner 1009e546cb79SDave Chinner if (code) { 1010e546cb79SDave Chinner xfs_buf_relse(ialloc_context); 1011e546cb79SDave Chinner *tpp = ntp; 1012e546cb79SDave Chinner *ipp = NULL; 1013e546cb79SDave Chinner return code; 1014e546cb79SDave Chinner } 1015e546cb79SDave Chinner xfs_trans_bjoin(tp, ialloc_context); 1016e546cb79SDave Chinner 1017e546cb79SDave Chinner /* 1018e546cb79SDave Chinner * Call ialloc again. Since we've locked out all 1019e546cb79SDave Chinner * other allocations in this allocation group, 1020e546cb79SDave Chinner * this call should always succeed. 1021e546cb79SDave Chinner */ 1022e546cb79SDave Chinner code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, 1023e546cb79SDave Chinner okalloc, &ialloc_context, &ip); 1024e546cb79SDave Chinner 1025e546cb79SDave Chinner /* 1026e546cb79SDave Chinner * If we get an error at this point, return to the caller 1027e546cb79SDave Chinner * so that the current transaction can be aborted. 1028e546cb79SDave Chinner */ 1029e546cb79SDave Chinner if (code) { 1030e546cb79SDave Chinner *tpp = tp; 1031e546cb79SDave Chinner *ipp = NULL; 1032e546cb79SDave Chinner return code; 1033e546cb79SDave Chinner } 1034e546cb79SDave Chinner ASSERT(!ialloc_context && ip); 1035e546cb79SDave Chinner 1036e546cb79SDave Chinner } else { 1037e546cb79SDave Chinner if (committed != NULL) 1038e546cb79SDave Chinner *committed = 0; 1039e546cb79SDave Chinner } 1040e546cb79SDave Chinner 1041e546cb79SDave Chinner *ipp = ip; 1042e546cb79SDave Chinner *tpp = tp; 1043e546cb79SDave Chinner 1044e546cb79SDave Chinner return 0; 1045e546cb79SDave Chinner } 1046e546cb79SDave Chinner 1047e546cb79SDave Chinner /* 1048e546cb79SDave Chinner * Decrement the link count on an inode & log the change. 1049e546cb79SDave Chinner * If this causes the link count to go to zero, initiate the 1050e546cb79SDave Chinner * logging activity required to truncate a file. 1051e546cb79SDave Chinner */ 1052e546cb79SDave Chinner int /* error */ 1053e546cb79SDave Chinner xfs_droplink( 1054e546cb79SDave Chinner xfs_trans_t *tp, 1055e546cb79SDave Chinner xfs_inode_t *ip) 1056e546cb79SDave Chinner { 1057e546cb79SDave Chinner int error; 1058e546cb79SDave Chinner 1059e546cb79SDave Chinner xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); 1060e546cb79SDave Chinner 1061e546cb79SDave Chinner ASSERT (ip->i_d.di_nlink > 0); 1062e546cb79SDave Chinner ip->i_d.di_nlink--; 1063e546cb79SDave Chinner drop_nlink(VFS_I(ip)); 1064e546cb79SDave Chinner xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1065e546cb79SDave Chinner 1066e546cb79SDave Chinner error = 0; 1067e546cb79SDave Chinner if (ip->i_d.di_nlink == 0) { 1068e546cb79SDave Chinner /* 1069e546cb79SDave Chinner * We're dropping the last link to this file. 1070e546cb79SDave Chinner * Move the on-disk inode to the AGI unlinked list. 1071e546cb79SDave Chinner * From xfs_inactive() we will pull the inode from 1072e546cb79SDave Chinner * the list and free it. 1073e546cb79SDave Chinner */ 1074e546cb79SDave Chinner error = xfs_iunlink(tp, ip); 1075e546cb79SDave Chinner } 1076e546cb79SDave Chinner return error; 1077e546cb79SDave Chinner } 1078e546cb79SDave Chinner 1079e546cb79SDave Chinner /* 1080e546cb79SDave Chinner * This gets called when the inode's version needs to be changed from 1 to 2. 1081e546cb79SDave Chinner * Currently this happens when the nlink field overflows the old 16-bit value 1082e546cb79SDave Chinner * or when chproj is called to change the project for the first time. 1083e546cb79SDave Chinner * As a side effect the superblock version will also get rev'd 1084e546cb79SDave Chinner * to contain the NLINK bit. 1085e546cb79SDave Chinner */ 1086e546cb79SDave Chinner void 1087e546cb79SDave Chinner xfs_bump_ino_vers2( 1088e546cb79SDave Chinner xfs_trans_t *tp, 1089e546cb79SDave Chinner xfs_inode_t *ip) 1090e546cb79SDave Chinner { 1091e546cb79SDave Chinner xfs_mount_t *mp; 1092e546cb79SDave Chinner 1093e546cb79SDave Chinner ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 1094e546cb79SDave Chinner ASSERT(ip->i_d.di_version == 1); 1095e546cb79SDave Chinner 1096e546cb79SDave Chinner ip->i_d.di_version = 2; 1097e546cb79SDave Chinner ip->i_d.di_onlink = 0; 1098e546cb79SDave Chinner memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 1099e546cb79SDave Chinner mp = tp->t_mountp; 1100e546cb79SDave Chinner if (!xfs_sb_version_hasnlink(&mp->m_sb)) { 1101e546cb79SDave Chinner spin_lock(&mp->m_sb_lock); 1102e546cb79SDave Chinner if (!xfs_sb_version_hasnlink(&mp->m_sb)) { 1103e546cb79SDave Chinner xfs_sb_version_addnlink(&mp->m_sb); 1104e546cb79SDave Chinner spin_unlock(&mp->m_sb_lock); 1105e546cb79SDave Chinner xfs_mod_sb(tp, XFS_SB_VERSIONNUM); 1106e546cb79SDave Chinner } else { 1107e546cb79SDave Chinner spin_unlock(&mp->m_sb_lock); 1108e546cb79SDave Chinner } 1109e546cb79SDave Chinner } 1110e546cb79SDave Chinner /* Caller must log the inode */ 1111e546cb79SDave Chinner } 1112e546cb79SDave Chinner 1113e546cb79SDave Chinner /* 1114e546cb79SDave Chinner * Increment the link count on an inode & log the change. 1115e546cb79SDave Chinner */ 1116e546cb79SDave Chinner int 1117e546cb79SDave Chinner xfs_bumplink( 1118e546cb79SDave Chinner xfs_trans_t *tp, 1119e546cb79SDave Chinner xfs_inode_t *ip) 1120e546cb79SDave Chinner { 1121e546cb79SDave Chinner xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); 1122e546cb79SDave Chinner 1123e546cb79SDave Chinner ASSERT(ip->i_d.di_nlink > 0); 1124e546cb79SDave Chinner ip->i_d.di_nlink++; 1125e546cb79SDave Chinner inc_nlink(VFS_I(ip)); 1126e546cb79SDave Chinner if ((ip->i_d.di_version == 1) && 1127e546cb79SDave Chinner (ip->i_d.di_nlink > XFS_MAXLINK_1)) { 1128e546cb79SDave Chinner /* 1129e546cb79SDave Chinner * The inode has increased its number of links beyond 1130e546cb79SDave Chinner * what can fit in an old format inode. It now needs 1131e546cb79SDave Chinner * to be converted to a version 2 inode with a 32 bit 1132e546cb79SDave Chinner * link count. If this is the first inode in the file 1133e546cb79SDave Chinner * system to do this, then we need to bump the superblock 1134e546cb79SDave Chinner * version number as well. 1135e546cb79SDave Chinner */ 1136e546cb79SDave Chinner xfs_bump_ino_vers2(tp, ip); 1137e546cb79SDave Chinner } 1138e546cb79SDave Chinner 1139e546cb79SDave Chinner xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1140e546cb79SDave Chinner return 0; 1141e546cb79SDave Chinner } 1142e546cb79SDave Chinner 1143c24b5dfaSDave Chinner int 1144c24b5dfaSDave Chinner xfs_create( 1145c24b5dfaSDave Chinner xfs_inode_t *dp, 1146c24b5dfaSDave Chinner struct xfs_name *name, 1147c24b5dfaSDave Chinner umode_t mode, 1148c24b5dfaSDave Chinner xfs_dev_t rdev, 1149c24b5dfaSDave Chinner xfs_inode_t **ipp) 1150c24b5dfaSDave Chinner { 1151c24b5dfaSDave Chinner int is_dir = S_ISDIR(mode); 1152c24b5dfaSDave Chinner struct xfs_mount *mp = dp->i_mount; 1153c24b5dfaSDave Chinner struct xfs_inode *ip = NULL; 1154c24b5dfaSDave Chinner struct xfs_trans *tp = NULL; 1155c24b5dfaSDave Chinner int error; 1156c24b5dfaSDave Chinner xfs_bmap_free_t free_list; 1157c24b5dfaSDave Chinner xfs_fsblock_t first_block; 1158c24b5dfaSDave Chinner bool unlock_dp_on_error = false; 1159c24b5dfaSDave Chinner uint cancel_flags; 1160c24b5dfaSDave Chinner int committed; 1161c24b5dfaSDave Chinner prid_t prid; 1162c24b5dfaSDave Chinner struct xfs_dquot *udqp = NULL; 1163c24b5dfaSDave Chinner struct xfs_dquot *gdqp = NULL; 1164c24b5dfaSDave Chinner struct xfs_dquot *pdqp = NULL; 11653d3c8b52SJie Liu struct xfs_trans_res tres; 1166c24b5dfaSDave Chinner uint resblks; 1167c24b5dfaSDave Chinner 1168c24b5dfaSDave Chinner trace_xfs_create(dp, name); 1169c24b5dfaSDave Chinner 1170c24b5dfaSDave Chinner if (XFS_FORCED_SHUTDOWN(mp)) 1171c24b5dfaSDave Chinner return XFS_ERROR(EIO); 1172c24b5dfaSDave Chinner 1173c24b5dfaSDave Chinner if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 1174c24b5dfaSDave Chinner prid = xfs_get_projid(dp); 1175c24b5dfaSDave Chinner else 1176c24b5dfaSDave Chinner prid = XFS_PROJID_DEFAULT; 1177c24b5dfaSDave Chinner 1178c24b5dfaSDave Chinner /* 1179c24b5dfaSDave Chinner * Make sure that we have allocated dquot(s) on disk. 1180c24b5dfaSDave Chinner */ 11817aab1b28SDwight Engen error = xfs_qm_vop_dqalloc(dp, xfs_kuid_to_uid(current_fsuid()), 11827aab1b28SDwight Engen xfs_kgid_to_gid(current_fsgid()), prid, 1183c24b5dfaSDave Chinner XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, 1184c24b5dfaSDave Chinner &udqp, &gdqp, &pdqp); 1185c24b5dfaSDave Chinner if (error) 1186c24b5dfaSDave Chinner return error; 1187c24b5dfaSDave Chinner 1188c24b5dfaSDave Chinner if (is_dir) { 1189c24b5dfaSDave Chinner rdev = 0; 1190c24b5dfaSDave Chinner resblks = XFS_MKDIR_SPACE_RES(mp, name->len); 11913d3c8b52SJie Liu tres.tr_logres = M_RES(mp)->tr_mkdir.tr_logres; 11923d3c8b52SJie Liu tres.tr_logcount = XFS_MKDIR_LOG_COUNT; 1193c24b5dfaSDave Chinner tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR); 1194c24b5dfaSDave Chinner } else { 1195c24b5dfaSDave Chinner resblks = XFS_CREATE_SPACE_RES(mp, name->len); 11963d3c8b52SJie Liu tres.tr_logres = M_RES(mp)->tr_create.tr_logres; 11973d3c8b52SJie Liu tres.tr_logcount = XFS_CREATE_LOG_COUNT; 1198c24b5dfaSDave Chinner tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE); 1199c24b5dfaSDave Chinner } 1200c24b5dfaSDave Chinner 1201c24b5dfaSDave Chinner cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1202c24b5dfaSDave Chinner 1203c24b5dfaSDave Chinner /* 1204c24b5dfaSDave Chinner * Initially assume that the file does not exist and 1205c24b5dfaSDave Chinner * reserve the resources for that case. If that is not 1206c24b5dfaSDave Chinner * the case we'll drop the one we have and get a more 1207c24b5dfaSDave Chinner * appropriate transaction later. 1208c24b5dfaSDave Chinner */ 12093d3c8b52SJie Liu tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; 12103d3c8b52SJie Liu error = xfs_trans_reserve(tp, &tres, resblks, 0); 1211c24b5dfaSDave Chinner if (error == ENOSPC) { 1212c24b5dfaSDave Chinner /* flush outstanding delalloc blocks and retry */ 1213c24b5dfaSDave Chinner xfs_flush_inodes(mp); 12143d3c8b52SJie Liu error = xfs_trans_reserve(tp, &tres, resblks, 0); 1215c24b5dfaSDave Chinner } 1216c24b5dfaSDave Chinner if (error == ENOSPC) { 1217c24b5dfaSDave Chinner /* No space at all so try a "no-allocation" reservation */ 1218c24b5dfaSDave Chinner resblks = 0; 12193d3c8b52SJie Liu error = xfs_trans_reserve(tp, &tres, 0, 0); 1220c24b5dfaSDave Chinner } 1221c24b5dfaSDave Chinner if (error) { 1222c24b5dfaSDave Chinner cancel_flags = 0; 1223c24b5dfaSDave Chinner goto out_trans_cancel; 1224c24b5dfaSDave Chinner } 1225c24b5dfaSDave Chinner 1226c24b5dfaSDave Chinner xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 1227c24b5dfaSDave Chinner unlock_dp_on_error = true; 1228c24b5dfaSDave Chinner 1229c24b5dfaSDave Chinner xfs_bmap_init(&free_list, &first_block); 1230c24b5dfaSDave Chinner 1231c24b5dfaSDave Chinner /* 1232c24b5dfaSDave Chinner * Reserve disk quota and the inode. 1233c24b5dfaSDave Chinner */ 1234c24b5dfaSDave Chinner error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, 1235c24b5dfaSDave Chinner pdqp, resblks, 1, 0); 1236c24b5dfaSDave Chinner if (error) 1237c24b5dfaSDave Chinner goto out_trans_cancel; 1238c24b5dfaSDave Chinner 1239c24b5dfaSDave Chinner error = xfs_dir_canenter(tp, dp, name, resblks); 1240c24b5dfaSDave Chinner if (error) 1241c24b5dfaSDave Chinner goto out_trans_cancel; 1242c24b5dfaSDave Chinner 1243c24b5dfaSDave Chinner /* 1244c24b5dfaSDave Chinner * A newly created regular or special file just has one directory 1245c24b5dfaSDave Chinner * entry pointing to them, but a directory also the "." entry 1246c24b5dfaSDave Chinner * pointing to itself. 1247c24b5dfaSDave Chinner */ 1248c24b5dfaSDave Chinner error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, 1249c24b5dfaSDave Chinner prid, resblks > 0, &ip, &committed); 1250c24b5dfaSDave Chinner if (error) { 1251c24b5dfaSDave Chinner if (error == ENOSPC) 1252c24b5dfaSDave Chinner goto out_trans_cancel; 1253c24b5dfaSDave Chinner goto out_trans_abort; 1254c24b5dfaSDave Chinner } 1255c24b5dfaSDave Chinner 1256c24b5dfaSDave Chinner /* 1257c24b5dfaSDave Chinner * Now we join the directory inode to the transaction. We do not do it 1258c24b5dfaSDave Chinner * earlier because xfs_dir_ialloc might commit the previous transaction 1259c24b5dfaSDave Chinner * (and release all the locks). An error from here on will result in 1260c24b5dfaSDave Chinner * the transaction cancel unlocking dp so don't do it explicitly in the 1261c24b5dfaSDave Chinner * error path. 1262c24b5dfaSDave Chinner */ 1263c24b5dfaSDave Chinner xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 1264c24b5dfaSDave Chinner unlock_dp_on_error = false; 1265c24b5dfaSDave Chinner 1266c24b5dfaSDave Chinner error = xfs_dir_createname(tp, dp, name, ip->i_ino, 1267c24b5dfaSDave Chinner &first_block, &free_list, resblks ? 1268c24b5dfaSDave Chinner resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 1269c24b5dfaSDave Chinner if (error) { 1270c24b5dfaSDave Chinner ASSERT(error != ENOSPC); 1271c24b5dfaSDave Chinner goto out_trans_abort; 1272c24b5dfaSDave Chinner } 1273c24b5dfaSDave Chinner xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1274c24b5dfaSDave Chinner xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 1275c24b5dfaSDave Chinner 1276c24b5dfaSDave Chinner if (is_dir) { 1277c24b5dfaSDave Chinner error = xfs_dir_init(tp, ip, dp); 1278c24b5dfaSDave Chinner if (error) 1279c24b5dfaSDave Chinner goto out_bmap_cancel; 1280c24b5dfaSDave Chinner 1281c24b5dfaSDave Chinner error = xfs_bumplink(tp, dp); 1282c24b5dfaSDave Chinner if (error) 1283c24b5dfaSDave Chinner goto out_bmap_cancel; 1284c24b5dfaSDave Chinner } 1285c24b5dfaSDave Chinner 1286c24b5dfaSDave Chinner /* 1287c24b5dfaSDave Chinner * If this is a synchronous mount, make sure that the 1288c24b5dfaSDave Chinner * create transaction goes to disk before returning to 1289c24b5dfaSDave Chinner * the user. 1290c24b5dfaSDave Chinner */ 1291c24b5dfaSDave Chinner if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) 1292c24b5dfaSDave Chinner xfs_trans_set_sync(tp); 1293c24b5dfaSDave Chinner 1294c24b5dfaSDave Chinner /* 1295c24b5dfaSDave Chinner * Attach the dquot(s) to the inodes and modify them incore. 1296c24b5dfaSDave Chinner * These ids of the inode couldn't have changed since the new 1297c24b5dfaSDave Chinner * inode has been locked ever since it was created. 1298c24b5dfaSDave Chinner */ 1299c24b5dfaSDave Chinner xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); 1300c24b5dfaSDave Chinner 1301c24b5dfaSDave Chinner error = xfs_bmap_finish(&tp, &free_list, &committed); 1302c24b5dfaSDave Chinner if (error) 1303c24b5dfaSDave Chinner goto out_bmap_cancel; 1304c24b5dfaSDave Chinner 1305c24b5dfaSDave Chinner error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1306c24b5dfaSDave Chinner if (error) 1307c24b5dfaSDave Chinner goto out_release_inode; 1308c24b5dfaSDave Chinner 1309c24b5dfaSDave Chinner xfs_qm_dqrele(udqp); 1310c24b5dfaSDave Chinner xfs_qm_dqrele(gdqp); 1311c24b5dfaSDave Chinner xfs_qm_dqrele(pdqp); 1312c24b5dfaSDave Chinner 1313c24b5dfaSDave Chinner *ipp = ip; 1314c24b5dfaSDave Chinner return 0; 1315c24b5dfaSDave Chinner 1316c24b5dfaSDave Chinner out_bmap_cancel: 1317c24b5dfaSDave Chinner xfs_bmap_cancel(&free_list); 1318c24b5dfaSDave Chinner out_trans_abort: 1319c24b5dfaSDave Chinner cancel_flags |= XFS_TRANS_ABORT; 1320c24b5dfaSDave Chinner out_trans_cancel: 1321c24b5dfaSDave Chinner xfs_trans_cancel(tp, cancel_flags); 1322c24b5dfaSDave Chinner out_release_inode: 1323c24b5dfaSDave Chinner /* 1324c24b5dfaSDave Chinner * Wait until after the current transaction is aborted to 1325c24b5dfaSDave Chinner * release the inode. This prevents recursive transactions 1326c24b5dfaSDave Chinner * and deadlocks from xfs_inactive. 1327c24b5dfaSDave Chinner */ 1328c24b5dfaSDave Chinner if (ip) 1329c24b5dfaSDave Chinner IRELE(ip); 1330c24b5dfaSDave Chinner 1331c24b5dfaSDave Chinner xfs_qm_dqrele(udqp); 1332c24b5dfaSDave Chinner xfs_qm_dqrele(gdqp); 1333c24b5dfaSDave Chinner xfs_qm_dqrele(pdqp); 1334c24b5dfaSDave Chinner 1335c24b5dfaSDave Chinner if (unlock_dp_on_error) 1336c24b5dfaSDave Chinner xfs_iunlock(dp, XFS_ILOCK_EXCL); 1337c24b5dfaSDave Chinner return error; 1338c24b5dfaSDave Chinner } 1339c24b5dfaSDave Chinner 1340c24b5dfaSDave Chinner int 1341c24b5dfaSDave Chinner xfs_link( 1342c24b5dfaSDave Chinner xfs_inode_t *tdp, 1343c24b5dfaSDave Chinner xfs_inode_t *sip, 1344c24b5dfaSDave Chinner struct xfs_name *target_name) 1345c24b5dfaSDave Chinner { 1346c24b5dfaSDave Chinner xfs_mount_t *mp = tdp->i_mount; 1347c24b5dfaSDave Chinner xfs_trans_t *tp; 1348c24b5dfaSDave Chinner int error; 1349c24b5dfaSDave Chinner xfs_bmap_free_t free_list; 1350c24b5dfaSDave Chinner xfs_fsblock_t first_block; 1351c24b5dfaSDave Chinner int cancel_flags; 1352c24b5dfaSDave Chinner int committed; 1353c24b5dfaSDave Chinner int resblks; 1354c24b5dfaSDave Chinner 1355c24b5dfaSDave Chinner trace_xfs_link(tdp, target_name); 1356c24b5dfaSDave Chinner 1357c24b5dfaSDave Chinner ASSERT(!S_ISDIR(sip->i_d.di_mode)); 1358c24b5dfaSDave Chinner 1359c24b5dfaSDave Chinner if (XFS_FORCED_SHUTDOWN(mp)) 1360c24b5dfaSDave Chinner return XFS_ERROR(EIO); 1361c24b5dfaSDave Chinner 1362c24b5dfaSDave Chinner error = xfs_qm_dqattach(sip, 0); 1363c24b5dfaSDave Chinner if (error) 1364c24b5dfaSDave Chinner goto std_return; 1365c24b5dfaSDave Chinner 1366c24b5dfaSDave Chinner error = xfs_qm_dqattach(tdp, 0); 1367c24b5dfaSDave Chinner if (error) 1368c24b5dfaSDave Chinner goto std_return; 1369c24b5dfaSDave Chinner 1370c24b5dfaSDave Chinner tp = xfs_trans_alloc(mp, XFS_TRANS_LINK); 1371c24b5dfaSDave Chinner cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1372c24b5dfaSDave Chinner resblks = XFS_LINK_SPACE_RES(mp, target_name->len); 13733d3c8b52SJie Liu error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, resblks, 0); 1374c24b5dfaSDave Chinner if (error == ENOSPC) { 1375c24b5dfaSDave Chinner resblks = 0; 13763d3c8b52SJie Liu error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, 0, 0); 1377c24b5dfaSDave Chinner } 1378c24b5dfaSDave Chinner if (error) { 1379c24b5dfaSDave Chinner cancel_flags = 0; 1380c24b5dfaSDave Chinner goto error_return; 1381c24b5dfaSDave Chinner } 1382c24b5dfaSDave Chinner 1383c24b5dfaSDave Chinner xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); 1384c24b5dfaSDave Chinner 1385c24b5dfaSDave Chinner xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); 1386c24b5dfaSDave Chinner xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); 1387c24b5dfaSDave Chinner 1388c24b5dfaSDave Chinner /* 1389c24b5dfaSDave Chinner * If we are using project inheritance, we only allow hard link 1390c24b5dfaSDave Chinner * creation in our tree when the project IDs are the same; else 1391c24b5dfaSDave Chinner * the tree quota mechanism could be circumvented. 1392c24b5dfaSDave Chinner */ 1393c24b5dfaSDave Chinner if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && 1394c24b5dfaSDave Chinner (xfs_get_projid(tdp) != xfs_get_projid(sip)))) { 1395c24b5dfaSDave Chinner error = XFS_ERROR(EXDEV); 1396c24b5dfaSDave Chinner goto error_return; 1397c24b5dfaSDave Chinner } 1398c24b5dfaSDave Chinner 1399c24b5dfaSDave Chinner error = xfs_dir_canenter(tp, tdp, target_name, resblks); 1400c24b5dfaSDave Chinner if (error) 1401c24b5dfaSDave Chinner goto error_return; 1402c24b5dfaSDave Chinner 1403c24b5dfaSDave Chinner xfs_bmap_init(&free_list, &first_block); 1404c24b5dfaSDave Chinner 1405c24b5dfaSDave Chinner error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino, 1406c24b5dfaSDave Chinner &first_block, &free_list, resblks); 1407c24b5dfaSDave Chinner if (error) 1408c24b5dfaSDave Chinner goto abort_return; 1409c24b5dfaSDave Chinner xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1410c24b5dfaSDave Chinner xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); 1411c24b5dfaSDave Chinner 1412c24b5dfaSDave Chinner error = xfs_bumplink(tp, sip); 1413c24b5dfaSDave Chinner if (error) 1414c24b5dfaSDave Chinner goto abort_return; 1415c24b5dfaSDave Chinner 1416c24b5dfaSDave Chinner /* 1417c24b5dfaSDave Chinner * If this is a synchronous mount, make sure that the 1418c24b5dfaSDave Chinner * link transaction goes to disk before returning to 1419c24b5dfaSDave Chinner * the user. 1420c24b5dfaSDave Chinner */ 1421c24b5dfaSDave Chinner if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 1422c24b5dfaSDave Chinner xfs_trans_set_sync(tp); 1423c24b5dfaSDave Chinner } 1424c24b5dfaSDave Chinner 1425c24b5dfaSDave Chinner error = xfs_bmap_finish (&tp, &free_list, &committed); 1426c24b5dfaSDave Chinner if (error) { 1427c24b5dfaSDave Chinner xfs_bmap_cancel(&free_list); 1428c24b5dfaSDave Chinner goto abort_return; 1429c24b5dfaSDave Chinner } 1430c24b5dfaSDave Chinner 1431c24b5dfaSDave Chinner return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1432c24b5dfaSDave Chinner 1433c24b5dfaSDave Chinner abort_return: 1434c24b5dfaSDave Chinner cancel_flags |= XFS_TRANS_ABORT; 1435c24b5dfaSDave Chinner error_return: 1436c24b5dfaSDave Chinner xfs_trans_cancel(tp, cancel_flags); 1437c24b5dfaSDave Chinner std_return: 1438c24b5dfaSDave Chinner return error; 1439c24b5dfaSDave Chinner } 1440c24b5dfaSDave Chinner 14411da177e4SLinus Torvalds /* 14428f04c47aSChristoph Hellwig * Free up the underlying blocks past new_size. The new size must be smaller 14438f04c47aSChristoph Hellwig * than the current size. This routine can be used both for the attribute and 14448f04c47aSChristoph Hellwig * data fork, and does not modify the inode size, which is left to the caller. 14451da177e4SLinus Torvalds * 1446f6485057SDavid Chinner * The transaction passed to this routine must have made a permanent log 1447f6485057SDavid Chinner * reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the 1448f6485057SDavid Chinner * given transaction and start new ones, so make sure everything involved in 1449f6485057SDavid Chinner * the transaction is tidy before calling here. Some transaction will be 1450f6485057SDavid Chinner * returned to the caller to be committed. The incoming transaction must 1451f6485057SDavid Chinner * already include the inode, and both inode locks must be held exclusively. 1452f6485057SDavid Chinner * The inode must also be "held" within the transaction. On return the inode 1453f6485057SDavid Chinner * will be "held" within the returned transaction. This routine does NOT 1454f6485057SDavid Chinner * require any disk space to be reserved for it within the transaction. 14551da177e4SLinus Torvalds * 1456f6485057SDavid Chinner * If we get an error, we must return with the inode locked and linked into the 1457f6485057SDavid Chinner * current transaction. This keeps things simple for the higher level code, 1458f6485057SDavid Chinner * because it always knows that the inode is locked and held in the transaction 1459f6485057SDavid Chinner * that returns to it whether errors occur or not. We don't mark the inode 1460f6485057SDavid Chinner * dirty on error so that transactions can be easily aborted if possible. 14611da177e4SLinus Torvalds */ 14621da177e4SLinus Torvalds int 14638f04c47aSChristoph Hellwig xfs_itruncate_extents( 14648f04c47aSChristoph Hellwig struct xfs_trans **tpp, 14658f04c47aSChristoph Hellwig struct xfs_inode *ip, 14668f04c47aSChristoph Hellwig int whichfork, 14678f04c47aSChristoph Hellwig xfs_fsize_t new_size) 14681da177e4SLinus Torvalds { 14698f04c47aSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 14708f04c47aSChristoph Hellwig struct xfs_trans *tp = *tpp; 14718f04c47aSChristoph Hellwig struct xfs_trans *ntp; 14728f04c47aSChristoph Hellwig xfs_bmap_free_t free_list; 14731da177e4SLinus Torvalds xfs_fsblock_t first_block; 14741da177e4SLinus Torvalds xfs_fileoff_t first_unmap_block; 14751da177e4SLinus Torvalds xfs_fileoff_t last_block; 14768f04c47aSChristoph Hellwig xfs_filblks_t unmap_len; 14771da177e4SLinus Torvalds int committed; 14788f04c47aSChristoph Hellwig int error = 0; 14798f04c47aSChristoph Hellwig int done = 0; 14801da177e4SLinus Torvalds 14810b56185bSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 14820b56185bSChristoph Hellwig ASSERT(!atomic_read(&VFS_I(ip)->i_count) || 14830b56185bSChristoph Hellwig xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 1484ce7ae151SChristoph Hellwig ASSERT(new_size <= XFS_ISIZE(ip)); 14858f04c47aSChristoph Hellwig ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); 14861da177e4SLinus Torvalds ASSERT(ip->i_itemp != NULL); 1487898621d5SChristoph Hellwig ASSERT(ip->i_itemp->ili_lock_flags == 0); 14881da177e4SLinus Torvalds ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); 14891da177e4SLinus Torvalds 1490673e8e59SChristoph Hellwig trace_xfs_itruncate_extents_start(ip, new_size); 1491673e8e59SChristoph Hellwig 14921da177e4SLinus Torvalds /* 14931da177e4SLinus Torvalds * Since it is possible for space to become allocated beyond 14941da177e4SLinus Torvalds * the end of the file (in a crash where the space is allocated 14951da177e4SLinus Torvalds * but the inode size is not yet updated), simply remove any 14961da177e4SLinus Torvalds * blocks which show up between the new EOF and the maximum 14971da177e4SLinus Torvalds * possible file size. If the first block to be removed is 14981da177e4SLinus Torvalds * beyond the maximum file size (ie it is the same as last_block), 14991da177e4SLinus Torvalds * then there is nothing to do. 15001da177e4SLinus Torvalds */ 15018f04c47aSChristoph Hellwig first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); 150232972383SDave Chinner last_block = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); 15038f04c47aSChristoph Hellwig if (first_unmap_block == last_block) 15048f04c47aSChristoph Hellwig return 0; 15058f04c47aSChristoph Hellwig 15068f04c47aSChristoph Hellwig ASSERT(first_unmap_block < last_block); 15071da177e4SLinus Torvalds unmap_len = last_block - first_unmap_block + 1; 15081da177e4SLinus Torvalds while (!done) { 15099d87c319SEric Sandeen xfs_bmap_init(&free_list, &first_block); 15108f04c47aSChristoph Hellwig error = xfs_bunmapi(tp, ip, 15113e57ecf6SOlaf Weber first_unmap_block, unmap_len, 15128f04c47aSChristoph Hellwig xfs_bmapi_aflag(whichfork), 15131da177e4SLinus Torvalds XFS_ITRUNC_MAX_EXTENTS, 15143e57ecf6SOlaf Weber &first_block, &free_list, 1515b4e9181eSChristoph Hellwig &done); 15168f04c47aSChristoph Hellwig if (error) 15178f04c47aSChristoph Hellwig goto out_bmap_cancel; 15181da177e4SLinus Torvalds 15191da177e4SLinus Torvalds /* 15201da177e4SLinus Torvalds * Duplicate the transaction that has the permanent 15211da177e4SLinus Torvalds * reservation and commit the old transaction. 15221da177e4SLinus Torvalds */ 15238f04c47aSChristoph Hellwig error = xfs_bmap_finish(&tp, &free_list, &committed); 1524898621d5SChristoph Hellwig if (committed) 1525ddc3415aSChristoph Hellwig xfs_trans_ijoin(tp, ip, 0); 15268f04c47aSChristoph Hellwig if (error) 15278f04c47aSChristoph Hellwig goto out_bmap_cancel; 15281da177e4SLinus Torvalds 15291da177e4SLinus Torvalds if (committed) { 15301da177e4SLinus Torvalds /* 1531f6485057SDavid Chinner * Mark the inode dirty so it will be logged and 1532e5720eecSDavid Chinner * moved forward in the log as part of every commit. 15331da177e4SLinus Torvalds */ 15348f04c47aSChristoph Hellwig xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 15351da177e4SLinus Torvalds } 1536f6485057SDavid Chinner 15378f04c47aSChristoph Hellwig ntp = xfs_trans_dup(tp); 15388f04c47aSChristoph Hellwig error = xfs_trans_commit(tp, 0); 15398f04c47aSChristoph Hellwig tp = ntp; 1540f6485057SDavid Chinner 1541ddc3415aSChristoph Hellwig xfs_trans_ijoin(tp, ip, 0); 1542f6485057SDavid Chinner 1543cc09c0dcSDave Chinner if (error) 15448f04c47aSChristoph Hellwig goto out; 15458f04c47aSChristoph Hellwig 1546cc09c0dcSDave Chinner /* 15478f04c47aSChristoph Hellwig * Transaction commit worked ok so we can drop the extra ticket 1548cc09c0dcSDave Chinner * reference that we gained in xfs_trans_dup() 1549cc09c0dcSDave Chinner */ 15508f04c47aSChristoph Hellwig xfs_log_ticket_put(tp->t_ticket); 15513d3c8b52SJie Liu error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); 15521da177e4SLinus Torvalds if (error) 15538f04c47aSChristoph Hellwig goto out; 15541da177e4SLinus Torvalds } 15558f04c47aSChristoph Hellwig 1556673e8e59SChristoph Hellwig /* 1557673e8e59SChristoph Hellwig * Always re-log the inode so that our permanent transaction can keep 1558673e8e59SChristoph Hellwig * on rolling it forward in the log. 1559673e8e59SChristoph Hellwig */ 1560673e8e59SChristoph Hellwig xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1561673e8e59SChristoph Hellwig 1562673e8e59SChristoph Hellwig trace_xfs_itruncate_extents_end(ip, new_size); 1563673e8e59SChristoph Hellwig 15648f04c47aSChristoph Hellwig out: 15658f04c47aSChristoph Hellwig *tpp = tp; 15668f04c47aSChristoph Hellwig return error; 15678f04c47aSChristoph Hellwig out_bmap_cancel: 15681da177e4SLinus Torvalds /* 15698f04c47aSChristoph Hellwig * If the bunmapi call encounters an error, return to the caller where 15708f04c47aSChristoph Hellwig * the transaction can be properly aborted. We just need to make sure 15718f04c47aSChristoph Hellwig * we're not holding any resources that we were not when we came in. 15721da177e4SLinus Torvalds */ 15738f04c47aSChristoph Hellwig xfs_bmap_cancel(&free_list); 15748f04c47aSChristoph Hellwig goto out; 15758f04c47aSChristoph Hellwig } 15768f04c47aSChristoph Hellwig 1577c24b5dfaSDave Chinner int 1578c24b5dfaSDave Chinner xfs_release( 1579c24b5dfaSDave Chinner xfs_inode_t *ip) 1580c24b5dfaSDave Chinner { 1581c24b5dfaSDave Chinner xfs_mount_t *mp = ip->i_mount; 1582c24b5dfaSDave Chinner int error; 1583c24b5dfaSDave Chinner 1584c24b5dfaSDave Chinner if (!S_ISREG(ip->i_d.di_mode) || (ip->i_d.di_mode == 0)) 1585c24b5dfaSDave Chinner return 0; 1586c24b5dfaSDave Chinner 1587c24b5dfaSDave Chinner /* If this is a read-only mount, don't do this (would generate I/O) */ 1588c24b5dfaSDave Chinner if (mp->m_flags & XFS_MOUNT_RDONLY) 1589c24b5dfaSDave Chinner return 0; 1590c24b5dfaSDave Chinner 1591c24b5dfaSDave Chinner if (!XFS_FORCED_SHUTDOWN(mp)) { 1592c24b5dfaSDave Chinner int truncated; 1593c24b5dfaSDave Chinner 1594c24b5dfaSDave Chinner /* 1595c24b5dfaSDave Chinner * If we are using filestreams, and we have an unlinked 1596c24b5dfaSDave Chinner * file that we are processing the last close on, then nothing 1597c24b5dfaSDave Chinner * will be able to reopen and write to this file. Purge this 1598c24b5dfaSDave Chinner * inode from the filestreams cache so that it doesn't delay 1599c24b5dfaSDave Chinner * teardown of the inode. 1600c24b5dfaSDave Chinner */ 1601c24b5dfaSDave Chinner if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip)) 1602c24b5dfaSDave Chinner xfs_filestream_deassociate(ip); 1603c24b5dfaSDave Chinner 1604c24b5dfaSDave Chinner /* 1605c24b5dfaSDave Chinner * If we previously truncated this file and removed old data 1606c24b5dfaSDave Chinner * in the process, we want to initiate "early" writeout on 1607c24b5dfaSDave Chinner * the last close. This is an attempt to combat the notorious 1608c24b5dfaSDave Chinner * NULL files problem which is particularly noticeable from a 1609c24b5dfaSDave Chinner * truncate down, buffered (re-)write (delalloc), followed by 1610c24b5dfaSDave Chinner * a crash. What we are effectively doing here is 1611c24b5dfaSDave Chinner * significantly reducing the time window where we'd otherwise 1612c24b5dfaSDave Chinner * be exposed to that problem. 1613c24b5dfaSDave Chinner */ 1614c24b5dfaSDave Chinner truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); 1615c24b5dfaSDave Chinner if (truncated) { 1616c24b5dfaSDave Chinner xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE); 1617c24b5dfaSDave Chinner if (VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) { 1618c24b5dfaSDave Chinner error = -filemap_flush(VFS_I(ip)->i_mapping); 1619c24b5dfaSDave Chinner if (error) 1620c24b5dfaSDave Chinner return error; 1621c24b5dfaSDave Chinner } 1622c24b5dfaSDave Chinner } 1623c24b5dfaSDave Chinner } 1624c24b5dfaSDave Chinner 1625c24b5dfaSDave Chinner if (ip->i_d.di_nlink == 0) 1626c24b5dfaSDave Chinner return 0; 1627c24b5dfaSDave Chinner 1628c24b5dfaSDave Chinner if (xfs_can_free_eofblocks(ip, false)) { 1629c24b5dfaSDave Chinner 1630c24b5dfaSDave Chinner /* 1631c24b5dfaSDave Chinner * If we can't get the iolock just skip truncating the blocks 1632c24b5dfaSDave Chinner * past EOF because we could deadlock with the mmap_sem 1633c24b5dfaSDave Chinner * otherwise. We'll get another chance to drop them once the 1634c24b5dfaSDave Chinner * last reference to the inode is dropped, so we'll never leak 1635c24b5dfaSDave Chinner * blocks permanently. 1636c24b5dfaSDave Chinner * 1637c24b5dfaSDave Chinner * Further, check if the inode is being opened, written and 1638c24b5dfaSDave Chinner * closed frequently and we have delayed allocation blocks 1639c24b5dfaSDave Chinner * outstanding (e.g. streaming writes from the NFS server), 1640c24b5dfaSDave Chinner * truncating the blocks past EOF will cause fragmentation to 1641c24b5dfaSDave Chinner * occur. 1642c24b5dfaSDave Chinner * 1643c24b5dfaSDave Chinner * In this case don't do the truncation, either, but we have to 1644c24b5dfaSDave Chinner * be careful how we detect this case. Blocks beyond EOF show 1645c24b5dfaSDave Chinner * up as i_delayed_blks even when the inode is clean, so we 1646c24b5dfaSDave Chinner * need to truncate them away first before checking for a dirty 1647c24b5dfaSDave Chinner * release. Hence on the first dirty close we will still remove 1648c24b5dfaSDave Chinner * the speculative allocation, but after that we will leave it 1649c24b5dfaSDave Chinner * in place. 1650c24b5dfaSDave Chinner */ 1651c24b5dfaSDave Chinner if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) 1652c24b5dfaSDave Chinner return 0; 1653c24b5dfaSDave Chinner 1654c24b5dfaSDave Chinner error = xfs_free_eofblocks(mp, ip, true); 1655c24b5dfaSDave Chinner if (error && error != EAGAIN) 1656c24b5dfaSDave Chinner return error; 1657c24b5dfaSDave Chinner 1658c24b5dfaSDave Chinner /* delalloc blocks after truncation means it really is dirty */ 1659c24b5dfaSDave Chinner if (ip->i_delayed_blks) 1660c24b5dfaSDave Chinner xfs_iflags_set(ip, XFS_IDIRTY_RELEASE); 1661c24b5dfaSDave Chinner } 1662c24b5dfaSDave Chinner return 0; 1663c24b5dfaSDave Chinner } 1664c24b5dfaSDave Chinner 1665c24b5dfaSDave Chinner /* 1666*f7be2d7fSBrian Foster * xfs_inactive_truncate 1667*f7be2d7fSBrian Foster * 1668*f7be2d7fSBrian Foster * Called to perform a truncate when an inode becomes unlinked. 1669*f7be2d7fSBrian Foster */ 1670*f7be2d7fSBrian Foster STATIC int 1671*f7be2d7fSBrian Foster xfs_inactive_truncate( 1672*f7be2d7fSBrian Foster struct xfs_inode *ip) 1673*f7be2d7fSBrian Foster { 1674*f7be2d7fSBrian Foster struct xfs_mount *mp = ip->i_mount; 1675*f7be2d7fSBrian Foster struct xfs_trans *tp; 1676*f7be2d7fSBrian Foster int error; 1677*f7be2d7fSBrian Foster 1678*f7be2d7fSBrian Foster tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 1679*f7be2d7fSBrian Foster error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); 1680*f7be2d7fSBrian Foster if (error) { 1681*f7be2d7fSBrian Foster ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1682*f7be2d7fSBrian Foster xfs_trans_cancel(tp, 0); 1683*f7be2d7fSBrian Foster return error; 1684*f7be2d7fSBrian Foster } 1685*f7be2d7fSBrian Foster 1686*f7be2d7fSBrian Foster xfs_ilock(ip, XFS_ILOCK_EXCL); 1687*f7be2d7fSBrian Foster xfs_trans_ijoin(tp, ip, 0); 1688*f7be2d7fSBrian Foster 1689*f7be2d7fSBrian Foster /* 1690*f7be2d7fSBrian Foster * Log the inode size first to prevent stale data exposure in the event 1691*f7be2d7fSBrian Foster * of a system crash before the truncate completes. See the related 1692*f7be2d7fSBrian Foster * comment in xfs_setattr_size() for details. 1693*f7be2d7fSBrian Foster */ 1694*f7be2d7fSBrian Foster ip->i_d.di_size = 0; 1695*f7be2d7fSBrian Foster xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1696*f7be2d7fSBrian Foster 1697*f7be2d7fSBrian Foster error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0); 1698*f7be2d7fSBrian Foster if (error) 1699*f7be2d7fSBrian Foster goto error_trans_cancel; 1700*f7be2d7fSBrian Foster 1701*f7be2d7fSBrian Foster ASSERT(ip->i_d.di_nextents == 0); 1702*f7be2d7fSBrian Foster 1703*f7be2d7fSBrian Foster error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1704*f7be2d7fSBrian Foster if (error) 1705*f7be2d7fSBrian Foster goto error_unlock; 1706*f7be2d7fSBrian Foster 1707*f7be2d7fSBrian Foster xfs_iunlock(ip, XFS_ILOCK_EXCL); 1708*f7be2d7fSBrian Foster return 0; 1709*f7be2d7fSBrian Foster 1710*f7be2d7fSBrian Foster error_trans_cancel: 1711*f7be2d7fSBrian Foster xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1712*f7be2d7fSBrian Foster error_unlock: 1713*f7be2d7fSBrian Foster xfs_iunlock(ip, XFS_ILOCK_EXCL); 1714*f7be2d7fSBrian Foster return error; 1715*f7be2d7fSBrian Foster } 1716*f7be2d7fSBrian Foster 1717*f7be2d7fSBrian Foster /* 1718c24b5dfaSDave Chinner * xfs_inactive 1719c24b5dfaSDave Chinner * 1720c24b5dfaSDave Chinner * This is called when the vnode reference count for the vnode 1721c24b5dfaSDave Chinner * goes to zero. If the file has been unlinked, then it must 1722c24b5dfaSDave Chinner * now be truncated. Also, we clear all of the read-ahead state 1723c24b5dfaSDave Chinner * kept for the inode here since the file is now closed. 1724c24b5dfaSDave Chinner */ 1725c24b5dfaSDave Chinner int 1726c24b5dfaSDave Chinner xfs_inactive( 1727c24b5dfaSDave Chinner xfs_inode_t *ip) 1728c24b5dfaSDave Chinner { 1729c24b5dfaSDave Chinner xfs_bmap_free_t free_list; 1730c24b5dfaSDave Chinner xfs_fsblock_t first_block; 1731c24b5dfaSDave Chinner int committed; 17323d3c8b52SJie Liu struct xfs_trans *tp; 17333d3c8b52SJie Liu struct xfs_mount *mp; 1734c24b5dfaSDave Chinner int error; 1735c24b5dfaSDave Chinner int truncate = 0; 1736c24b5dfaSDave Chinner 1737c24b5dfaSDave Chinner /* 1738c24b5dfaSDave Chinner * If the inode is already free, then there can be nothing 1739c24b5dfaSDave Chinner * to clean up here. 1740c24b5dfaSDave Chinner */ 1741d948709bSBen Myers if (ip->i_d.di_mode == 0) { 1742c24b5dfaSDave Chinner ASSERT(ip->i_df.if_real_bytes == 0); 1743c24b5dfaSDave Chinner ASSERT(ip->i_df.if_broot_bytes == 0); 1744c24b5dfaSDave Chinner return VN_INACTIVE_CACHE; 1745c24b5dfaSDave Chinner } 1746c24b5dfaSDave Chinner 1747c24b5dfaSDave Chinner mp = ip->i_mount; 1748c24b5dfaSDave Chinner 1749c24b5dfaSDave Chinner error = 0; 1750c24b5dfaSDave Chinner 1751c24b5dfaSDave Chinner /* If this is a read-only mount, don't do this (would generate I/O) */ 1752c24b5dfaSDave Chinner if (mp->m_flags & XFS_MOUNT_RDONLY) 1753c24b5dfaSDave Chinner goto out; 1754c24b5dfaSDave Chinner 1755c24b5dfaSDave Chinner if (ip->i_d.di_nlink != 0) { 1756c24b5dfaSDave Chinner /* 1757c24b5dfaSDave Chinner * force is true because we are evicting an inode from the 1758c24b5dfaSDave Chinner * cache. Post-eof blocks must be freed, lest we end up with 1759c24b5dfaSDave Chinner * broken free space accounting. 1760c24b5dfaSDave Chinner */ 1761c24b5dfaSDave Chinner if (xfs_can_free_eofblocks(ip, true)) { 1762c24b5dfaSDave Chinner error = xfs_free_eofblocks(mp, ip, false); 1763c24b5dfaSDave Chinner if (error) 1764c24b5dfaSDave Chinner return VN_INACTIVE_CACHE; 1765c24b5dfaSDave Chinner } 1766c24b5dfaSDave Chinner goto out; 1767c24b5dfaSDave Chinner } 1768c24b5dfaSDave Chinner 1769c24b5dfaSDave Chinner if (S_ISREG(ip->i_d.di_mode) && 1770c24b5dfaSDave Chinner (ip->i_d.di_size != 0 || XFS_ISIZE(ip) != 0 || 1771c24b5dfaSDave Chinner ip->i_d.di_nextents > 0 || ip->i_delayed_blks > 0)) 1772c24b5dfaSDave Chinner truncate = 1; 1773c24b5dfaSDave Chinner 1774c24b5dfaSDave Chinner error = xfs_qm_dqattach(ip, 0); 1775c24b5dfaSDave Chinner if (error) 1776c24b5dfaSDave Chinner return VN_INACTIVE_CACHE; 1777c24b5dfaSDave Chinner 1778*f7be2d7fSBrian Foster if (S_ISLNK(ip->i_d.di_mode)) 177936b21ddeSBrian Foster error = xfs_inactive_symlink(ip); 1780*f7be2d7fSBrian Foster else if (truncate) 1781*f7be2d7fSBrian Foster error = xfs_inactive_truncate(ip); 178236b21ddeSBrian Foster if (error) 178336b21ddeSBrian Foster goto out; 1784c24b5dfaSDave Chinner 1785c24b5dfaSDave Chinner /* 1786c24b5dfaSDave Chinner * If there are attributes associated with the file then blow them away 1787c24b5dfaSDave Chinner * now. The code calls a routine that recursively deconstructs the 1788c24b5dfaSDave Chinner * attribute fork. We need to just commit the current transaction 1789c24b5dfaSDave Chinner * because we can't use it for xfs_attr_inactive(). 1790c24b5dfaSDave Chinner */ 1791c24b5dfaSDave Chinner if (ip->i_d.di_anextents > 0) { 1792c24b5dfaSDave Chinner ASSERT(ip->i_d.di_forkoff != 0); 1793c24b5dfaSDave Chinner 1794c24b5dfaSDave Chinner error = xfs_attr_inactive(ip); 1795c24b5dfaSDave Chinner if (error) 1796c24b5dfaSDave Chinner goto out; 1797c24b5dfaSDave Chinner } 1798c24b5dfaSDave Chinner 1799c24b5dfaSDave Chinner if (ip->i_afp) 1800c24b5dfaSDave Chinner xfs_idestroy_fork(ip, XFS_ATTR_FORK); 1801c24b5dfaSDave Chinner 1802c24b5dfaSDave Chinner ASSERT(ip->i_d.di_anextents == 0); 1803c24b5dfaSDave Chinner 1804*f7be2d7fSBrian Foster tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 1805*f7be2d7fSBrian Foster error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, 0, 0); 1806*f7be2d7fSBrian Foster if (error) { 1807*f7be2d7fSBrian Foster ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1808*f7be2d7fSBrian Foster xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES); 1809*f7be2d7fSBrian Foster goto out; 1810*f7be2d7fSBrian Foster } 1811*f7be2d7fSBrian Foster 1812*f7be2d7fSBrian Foster xfs_ilock(ip, XFS_ILOCK_EXCL); 1813*f7be2d7fSBrian Foster xfs_trans_ijoin(tp, ip, 0); 1814*f7be2d7fSBrian Foster 1815c24b5dfaSDave Chinner /* 1816c24b5dfaSDave Chinner * Free the inode. 1817c24b5dfaSDave Chinner */ 1818c24b5dfaSDave Chinner xfs_bmap_init(&free_list, &first_block); 1819c24b5dfaSDave Chinner error = xfs_ifree(tp, ip, &free_list); 1820c24b5dfaSDave Chinner if (error) { 1821c24b5dfaSDave Chinner /* 1822c24b5dfaSDave Chinner * If we fail to free the inode, shut down. The cancel 1823c24b5dfaSDave Chinner * might do that, we need to make sure. Otherwise the 1824c24b5dfaSDave Chinner * inode might be lost for a long time or forever. 1825c24b5dfaSDave Chinner */ 1826c24b5dfaSDave Chinner if (!XFS_FORCED_SHUTDOWN(mp)) { 1827c24b5dfaSDave Chinner xfs_notice(mp, "%s: xfs_ifree returned error %d", 1828c24b5dfaSDave Chinner __func__, error); 1829c24b5dfaSDave Chinner xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 1830c24b5dfaSDave Chinner } 1831c24b5dfaSDave Chinner xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); 1832c24b5dfaSDave Chinner } else { 1833c24b5dfaSDave Chinner /* 1834c24b5dfaSDave Chinner * Credit the quota account(s). The inode is gone. 1835c24b5dfaSDave Chinner */ 1836c24b5dfaSDave Chinner xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1); 1837c24b5dfaSDave Chinner 1838c24b5dfaSDave Chinner /* 1839c24b5dfaSDave Chinner * Just ignore errors at this point. There is nothing we can 1840c24b5dfaSDave Chinner * do except to try to keep going. Make sure it's not a silent 1841c24b5dfaSDave Chinner * error. 1842c24b5dfaSDave Chinner */ 1843c24b5dfaSDave Chinner error = xfs_bmap_finish(&tp, &free_list, &committed); 1844c24b5dfaSDave Chinner if (error) 1845c24b5dfaSDave Chinner xfs_notice(mp, "%s: xfs_bmap_finish returned error %d", 1846c24b5dfaSDave Chinner __func__, error); 1847c24b5dfaSDave Chinner error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1848c24b5dfaSDave Chinner if (error) 1849c24b5dfaSDave Chinner xfs_notice(mp, "%s: xfs_trans_commit returned error %d", 1850c24b5dfaSDave Chinner __func__, error); 1851c24b5dfaSDave Chinner } 1852c24b5dfaSDave Chinner 1853c24b5dfaSDave Chinner /* 1854c24b5dfaSDave Chinner * Release the dquots held by inode, if any. 1855c24b5dfaSDave Chinner */ 1856c24b5dfaSDave Chinner xfs_qm_dqdetach(ip); 1857c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 1858c24b5dfaSDave Chinner out: 1859c24b5dfaSDave Chinner return VN_INACTIVE_CACHE; 1860c24b5dfaSDave Chinner } 1861c24b5dfaSDave Chinner 18621da177e4SLinus Torvalds /* 18631da177e4SLinus Torvalds * This is called when the inode's link count goes to 0. 18641da177e4SLinus Torvalds * We place the on-disk inode on a list in the AGI. It 18651da177e4SLinus Torvalds * will be pulled from this list when the inode is freed. 18661da177e4SLinus Torvalds */ 18671da177e4SLinus Torvalds int 18681da177e4SLinus Torvalds xfs_iunlink( 18691da177e4SLinus Torvalds xfs_trans_t *tp, 18701da177e4SLinus Torvalds xfs_inode_t *ip) 18711da177e4SLinus Torvalds { 18721da177e4SLinus Torvalds xfs_mount_t *mp; 18731da177e4SLinus Torvalds xfs_agi_t *agi; 18741da177e4SLinus Torvalds xfs_dinode_t *dip; 18751da177e4SLinus Torvalds xfs_buf_t *agibp; 18761da177e4SLinus Torvalds xfs_buf_t *ibp; 18771da177e4SLinus Torvalds xfs_agino_t agino; 18781da177e4SLinus Torvalds short bucket_index; 18791da177e4SLinus Torvalds int offset; 18801da177e4SLinus Torvalds int error; 18811da177e4SLinus Torvalds 18821da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink == 0); 18831da177e4SLinus Torvalds ASSERT(ip->i_d.di_mode != 0); 18841da177e4SLinus Torvalds 18851da177e4SLinus Torvalds mp = tp->t_mountp; 18861da177e4SLinus Torvalds 18871da177e4SLinus Torvalds /* 18881da177e4SLinus Torvalds * Get the agi buffer first. It ensures lock ordering 18891da177e4SLinus Torvalds * on the list. 18901da177e4SLinus Torvalds */ 18915e1be0fbSChristoph Hellwig error = xfs_read_agi(mp, tp, XFS_INO_TO_AGNO(mp, ip->i_ino), &agibp); 1892859d7182SVlad Apostolov if (error) 18931da177e4SLinus Torvalds return error; 18941da177e4SLinus Torvalds agi = XFS_BUF_TO_AGI(agibp); 18955e1be0fbSChristoph Hellwig 18961da177e4SLinus Torvalds /* 18971da177e4SLinus Torvalds * Get the index into the agi hash table for the 18981da177e4SLinus Torvalds * list this inode will go on. 18991da177e4SLinus Torvalds */ 19001da177e4SLinus Torvalds agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 19011da177e4SLinus Torvalds ASSERT(agino != 0); 19021da177e4SLinus Torvalds bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 19031da177e4SLinus Torvalds ASSERT(agi->agi_unlinked[bucket_index]); 190416259e7dSChristoph Hellwig ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino); 19051da177e4SLinus Torvalds 190669ef921bSChristoph Hellwig if (agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)) { 19071da177e4SLinus Torvalds /* 19081da177e4SLinus Torvalds * There is already another inode in the bucket we need 19091da177e4SLinus Torvalds * to add ourselves to. Add us at the front of the list. 19101da177e4SLinus Torvalds * Here we put the head pointer into our next pointer, 19111da177e4SLinus Torvalds * and then we fall through to point the head at us. 19121da177e4SLinus Torvalds */ 1913475ee413SChristoph Hellwig error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp, 1914475ee413SChristoph Hellwig 0, 0); 1915c319b58bSVlad Apostolov if (error) 1916c319b58bSVlad Apostolov return error; 1917c319b58bSVlad Apostolov 191869ef921bSChristoph Hellwig ASSERT(dip->di_next_unlinked == cpu_to_be32(NULLAGINO)); 19191da177e4SLinus Torvalds dip->di_next_unlinked = agi->agi_unlinked[bucket_index]; 192092bfc6e7SChristoph Hellwig offset = ip->i_imap.im_boffset + 19211da177e4SLinus Torvalds offsetof(xfs_dinode_t, di_next_unlinked); 19220a32c26eSDave Chinner 19230a32c26eSDave Chinner /* need to recalc the inode CRC if appropriate */ 19240a32c26eSDave Chinner xfs_dinode_calc_crc(mp, dip); 19250a32c26eSDave Chinner 19261da177e4SLinus Torvalds xfs_trans_inode_buf(tp, ibp); 19271da177e4SLinus Torvalds xfs_trans_log_buf(tp, ibp, offset, 19281da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 19291da177e4SLinus Torvalds xfs_inobp_check(mp, ibp); 19301da177e4SLinus Torvalds } 19311da177e4SLinus Torvalds 19321da177e4SLinus Torvalds /* 19331da177e4SLinus Torvalds * Point the bucket head pointer at the inode being inserted. 19341da177e4SLinus Torvalds */ 19351da177e4SLinus Torvalds ASSERT(agino != 0); 193616259e7dSChristoph Hellwig agi->agi_unlinked[bucket_index] = cpu_to_be32(agino); 19371da177e4SLinus Torvalds offset = offsetof(xfs_agi_t, agi_unlinked) + 19381da177e4SLinus Torvalds (sizeof(xfs_agino_t) * bucket_index); 19391da177e4SLinus Torvalds xfs_trans_log_buf(tp, agibp, offset, 19401da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 19411da177e4SLinus Torvalds return 0; 19421da177e4SLinus Torvalds } 19431da177e4SLinus Torvalds 19441da177e4SLinus Torvalds /* 19451da177e4SLinus Torvalds * Pull the on-disk inode from the AGI unlinked list. 19461da177e4SLinus Torvalds */ 19471da177e4SLinus Torvalds STATIC int 19481da177e4SLinus Torvalds xfs_iunlink_remove( 19491da177e4SLinus Torvalds xfs_trans_t *tp, 19501da177e4SLinus Torvalds xfs_inode_t *ip) 19511da177e4SLinus Torvalds { 19521da177e4SLinus Torvalds xfs_ino_t next_ino; 19531da177e4SLinus Torvalds xfs_mount_t *mp; 19541da177e4SLinus Torvalds xfs_agi_t *agi; 19551da177e4SLinus Torvalds xfs_dinode_t *dip; 19561da177e4SLinus Torvalds xfs_buf_t *agibp; 19571da177e4SLinus Torvalds xfs_buf_t *ibp; 19581da177e4SLinus Torvalds xfs_agnumber_t agno; 19591da177e4SLinus Torvalds xfs_agino_t agino; 19601da177e4SLinus Torvalds xfs_agino_t next_agino; 19611da177e4SLinus Torvalds xfs_buf_t *last_ibp; 19626fdf8cccSNathan Scott xfs_dinode_t *last_dip = NULL; 19631da177e4SLinus Torvalds short bucket_index; 19646fdf8cccSNathan Scott int offset, last_offset = 0; 19651da177e4SLinus Torvalds int error; 19661da177e4SLinus Torvalds 19671da177e4SLinus Torvalds mp = tp->t_mountp; 19681da177e4SLinus Torvalds agno = XFS_INO_TO_AGNO(mp, ip->i_ino); 19691da177e4SLinus Torvalds 19701da177e4SLinus Torvalds /* 19711da177e4SLinus Torvalds * Get the agi buffer first. It ensures lock ordering 19721da177e4SLinus Torvalds * on the list. 19731da177e4SLinus Torvalds */ 19745e1be0fbSChristoph Hellwig error = xfs_read_agi(mp, tp, agno, &agibp); 19755e1be0fbSChristoph Hellwig if (error) 19761da177e4SLinus Torvalds return error; 19775e1be0fbSChristoph Hellwig 19781da177e4SLinus Torvalds agi = XFS_BUF_TO_AGI(agibp); 19795e1be0fbSChristoph Hellwig 19801da177e4SLinus Torvalds /* 19811da177e4SLinus Torvalds * Get the index into the agi hash table for the 19821da177e4SLinus Torvalds * list this inode will go on. 19831da177e4SLinus Torvalds */ 19841da177e4SLinus Torvalds agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 19851da177e4SLinus Torvalds ASSERT(agino != 0); 19861da177e4SLinus Torvalds bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 198769ef921bSChristoph Hellwig ASSERT(agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)); 19881da177e4SLinus Torvalds ASSERT(agi->agi_unlinked[bucket_index]); 19891da177e4SLinus Torvalds 199016259e7dSChristoph Hellwig if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) { 19911da177e4SLinus Torvalds /* 1992475ee413SChristoph Hellwig * We're at the head of the list. Get the inode's on-disk 1993475ee413SChristoph Hellwig * buffer to see if there is anyone after us on the list. 1994475ee413SChristoph Hellwig * Only modify our next pointer if it is not already NULLAGINO. 1995475ee413SChristoph Hellwig * This saves us the overhead of dealing with the buffer when 1996475ee413SChristoph Hellwig * there is no need to change it. 19971da177e4SLinus Torvalds */ 1998475ee413SChristoph Hellwig error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp, 1999475ee413SChristoph Hellwig 0, 0); 20001da177e4SLinus Torvalds if (error) { 2001475ee413SChristoph Hellwig xfs_warn(mp, "%s: xfs_imap_to_bp returned error %d.", 20020b932cccSDave Chinner __func__, error); 20031da177e4SLinus Torvalds return error; 20041da177e4SLinus Torvalds } 2005347d1c01SChristoph Hellwig next_agino = be32_to_cpu(dip->di_next_unlinked); 20061da177e4SLinus Torvalds ASSERT(next_agino != 0); 20071da177e4SLinus Torvalds if (next_agino != NULLAGINO) { 2008347d1c01SChristoph Hellwig dip->di_next_unlinked = cpu_to_be32(NULLAGINO); 200992bfc6e7SChristoph Hellwig offset = ip->i_imap.im_boffset + 20101da177e4SLinus Torvalds offsetof(xfs_dinode_t, di_next_unlinked); 20110a32c26eSDave Chinner 20120a32c26eSDave Chinner /* need to recalc the inode CRC if appropriate */ 20130a32c26eSDave Chinner xfs_dinode_calc_crc(mp, dip); 20140a32c26eSDave Chinner 20151da177e4SLinus Torvalds xfs_trans_inode_buf(tp, ibp); 20161da177e4SLinus Torvalds xfs_trans_log_buf(tp, ibp, offset, 20171da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 20181da177e4SLinus Torvalds xfs_inobp_check(mp, ibp); 20191da177e4SLinus Torvalds } else { 20201da177e4SLinus Torvalds xfs_trans_brelse(tp, ibp); 20211da177e4SLinus Torvalds } 20221da177e4SLinus Torvalds /* 20231da177e4SLinus Torvalds * Point the bucket head pointer at the next inode. 20241da177e4SLinus Torvalds */ 20251da177e4SLinus Torvalds ASSERT(next_agino != 0); 20261da177e4SLinus Torvalds ASSERT(next_agino != agino); 202716259e7dSChristoph Hellwig agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino); 20281da177e4SLinus Torvalds offset = offsetof(xfs_agi_t, agi_unlinked) + 20291da177e4SLinus Torvalds (sizeof(xfs_agino_t) * bucket_index); 20301da177e4SLinus Torvalds xfs_trans_log_buf(tp, agibp, offset, 20311da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 20321da177e4SLinus Torvalds } else { 20331da177e4SLinus Torvalds /* 20341da177e4SLinus Torvalds * We need to search the list for the inode being freed. 20351da177e4SLinus Torvalds */ 203616259e7dSChristoph Hellwig next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); 20371da177e4SLinus Torvalds last_ibp = NULL; 20381da177e4SLinus Torvalds while (next_agino != agino) { 2039129dbc9aSChristoph Hellwig struct xfs_imap imap; 2040129dbc9aSChristoph Hellwig 2041129dbc9aSChristoph Hellwig if (last_ibp) 20421da177e4SLinus Torvalds xfs_trans_brelse(tp, last_ibp); 2043129dbc9aSChristoph Hellwig 2044129dbc9aSChristoph Hellwig imap.im_blkno = 0; 20451da177e4SLinus Torvalds next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino); 2046129dbc9aSChristoph Hellwig 2047129dbc9aSChristoph Hellwig error = xfs_imap(mp, tp, next_ino, &imap, 0); 20481da177e4SLinus Torvalds if (error) { 20490b932cccSDave Chinner xfs_warn(mp, 2050129dbc9aSChristoph Hellwig "%s: xfs_imap returned error %d.", 20510b932cccSDave Chinner __func__, error); 20521da177e4SLinus Torvalds return error; 20531da177e4SLinus Torvalds } 2054129dbc9aSChristoph Hellwig 2055129dbc9aSChristoph Hellwig error = xfs_imap_to_bp(mp, tp, &imap, &last_dip, 2056129dbc9aSChristoph Hellwig &last_ibp, 0, 0); 2057129dbc9aSChristoph Hellwig if (error) { 2058129dbc9aSChristoph Hellwig xfs_warn(mp, 2059129dbc9aSChristoph Hellwig "%s: xfs_imap_to_bp returned error %d.", 2060129dbc9aSChristoph Hellwig __func__, error); 2061129dbc9aSChristoph Hellwig return error; 2062129dbc9aSChristoph Hellwig } 2063129dbc9aSChristoph Hellwig 2064129dbc9aSChristoph Hellwig last_offset = imap.im_boffset; 2065347d1c01SChristoph Hellwig next_agino = be32_to_cpu(last_dip->di_next_unlinked); 20661da177e4SLinus Torvalds ASSERT(next_agino != NULLAGINO); 20671da177e4SLinus Torvalds ASSERT(next_agino != 0); 20681da177e4SLinus Torvalds } 2069475ee413SChristoph Hellwig 20701da177e4SLinus Torvalds /* 2071475ee413SChristoph Hellwig * Now last_ibp points to the buffer previous to us on the 2072475ee413SChristoph Hellwig * unlinked list. Pull us from the list. 20731da177e4SLinus Torvalds */ 2074475ee413SChristoph Hellwig error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp, 2075475ee413SChristoph Hellwig 0, 0); 20761da177e4SLinus Torvalds if (error) { 2077475ee413SChristoph Hellwig xfs_warn(mp, "%s: xfs_imap_to_bp(2) returned error %d.", 20780b932cccSDave Chinner __func__, error); 20791da177e4SLinus Torvalds return error; 20801da177e4SLinus Torvalds } 2081347d1c01SChristoph Hellwig next_agino = be32_to_cpu(dip->di_next_unlinked); 20821da177e4SLinus Torvalds ASSERT(next_agino != 0); 20831da177e4SLinus Torvalds ASSERT(next_agino != agino); 20841da177e4SLinus Torvalds if (next_agino != NULLAGINO) { 2085347d1c01SChristoph Hellwig dip->di_next_unlinked = cpu_to_be32(NULLAGINO); 208692bfc6e7SChristoph Hellwig offset = ip->i_imap.im_boffset + 20871da177e4SLinus Torvalds offsetof(xfs_dinode_t, di_next_unlinked); 20880a32c26eSDave Chinner 20890a32c26eSDave Chinner /* need to recalc the inode CRC if appropriate */ 20900a32c26eSDave Chinner xfs_dinode_calc_crc(mp, dip); 20910a32c26eSDave Chinner 20921da177e4SLinus Torvalds xfs_trans_inode_buf(tp, ibp); 20931da177e4SLinus Torvalds xfs_trans_log_buf(tp, ibp, offset, 20941da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 20951da177e4SLinus Torvalds xfs_inobp_check(mp, ibp); 20961da177e4SLinus Torvalds } else { 20971da177e4SLinus Torvalds xfs_trans_brelse(tp, ibp); 20981da177e4SLinus Torvalds } 20991da177e4SLinus Torvalds /* 21001da177e4SLinus Torvalds * Point the previous inode on the list to the next inode. 21011da177e4SLinus Torvalds */ 2102347d1c01SChristoph Hellwig last_dip->di_next_unlinked = cpu_to_be32(next_agino); 21031da177e4SLinus Torvalds ASSERT(next_agino != 0); 21041da177e4SLinus Torvalds offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked); 21050a32c26eSDave Chinner 21060a32c26eSDave Chinner /* need to recalc the inode CRC if appropriate */ 21070a32c26eSDave Chinner xfs_dinode_calc_crc(mp, last_dip); 21080a32c26eSDave Chinner 21091da177e4SLinus Torvalds xfs_trans_inode_buf(tp, last_ibp); 21101da177e4SLinus Torvalds xfs_trans_log_buf(tp, last_ibp, offset, 21111da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 21121da177e4SLinus Torvalds xfs_inobp_check(mp, last_ibp); 21131da177e4SLinus Torvalds } 21141da177e4SLinus Torvalds return 0; 21151da177e4SLinus Torvalds } 21161da177e4SLinus Torvalds 21175b3eed75SDave Chinner /* 21180b8182dbSZhi Yong Wu * A big issue when freeing the inode cluster is that we _cannot_ skip any 21195b3eed75SDave Chinner * inodes that are in memory - they all must be marked stale and attached to 21205b3eed75SDave Chinner * the cluster buffer. 21215b3eed75SDave Chinner */ 21222a30f36dSChandra Seetharaman STATIC int 21231da177e4SLinus Torvalds xfs_ifree_cluster( 21241da177e4SLinus Torvalds xfs_inode_t *free_ip, 21251da177e4SLinus Torvalds xfs_trans_t *tp, 21261da177e4SLinus Torvalds xfs_ino_t inum) 21271da177e4SLinus Torvalds { 21281da177e4SLinus Torvalds xfs_mount_t *mp = free_ip->i_mount; 21291da177e4SLinus Torvalds int blks_per_cluster; 21301da177e4SLinus Torvalds int nbufs; 21311da177e4SLinus Torvalds int ninodes; 21325b257b4aSDave Chinner int i, j; 21331da177e4SLinus Torvalds xfs_daddr_t blkno; 21341da177e4SLinus Torvalds xfs_buf_t *bp; 21355b257b4aSDave Chinner xfs_inode_t *ip; 21361da177e4SLinus Torvalds xfs_inode_log_item_t *iip; 21371da177e4SLinus Torvalds xfs_log_item_t *lip; 21385017e97dSDave Chinner struct xfs_perag *pag; 21391da177e4SLinus Torvalds 21405017e97dSDave Chinner pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum)); 21411da177e4SLinus Torvalds if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { 21421da177e4SLinus Torvalds blks_per_cluster = 1; 21431da177e4SLinus Torvalds ninodes = mp->m_sb.sb_inopblock; 21441da177e4SLinus Torvalds nbufs = XFS_IALLOC_BLOCKS(mp); 21451da177e4SLinus Torvalds } else { 21461da177e4SLinus Torvalds blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / 21471da177e4SLinus Torvalds mp->m_sb.sb_blocksize; 21481da177e4SLinus Torvalds ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; 21491da177e4SLinus Torvalds nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster; 21501da177e4SLinus Torvalds } 21511da177e4SLinus Torvalds 21521da177e4SLinus Torvalds for (j = 0; j < nbufs; j++, inum += ninodes) { 21531da177e4SLinus Torvalds blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), 21541da177e4SLinus Torvalds XFS_INO_TO_AGBNO(mp, inum)); 21551da177e4SLinus Torvalds 21561da177e4SLinus Torvalds /* 21575b257b4aSDave Chinner * We obtain and lock the backing buffer first in the process 21585b257b4aSDave Chinner * here, as we have to ensure that any dirty inode that we 21595b257b4aSDave Chinner * can't get the flush lock on is attached to the buffer. 21605b257b4aSDave Chinner * If we scan the in-memory inodes first, then buffer IO can 21615b257b4aSDave Chinner * complete before we get a lock on it, and hence we may fail 21625b257b4aSDave Chinner * to mark all the active inodes on the buffer stale. 21631da177e4SLinus Torvalds */ 21641da177e4SLinus Torvalds bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 2165b6aff29fSDave Chinner mp->m_bsize * blks_per_cluster, 2166b6aff29fSDave Chinner XBF_UNMAPPED); 21671da177e4SLinus Torvalds 21682a30f36dSChandra Seetharaman if (!bp) 21692a30f36dSChandra Seetharaman return ENOMEM; 2170b0f539deSDave Chinner 2171b0f539deSDave Chinner /* 2172b0f539deSDave Chinner * This buffer may not have been correctly initialised as we 2173b0f539deSDave Chinner * didn't read it from disk. That's not important because we are 2174b0f539deSDave Chinner * only using to mark the buffer as stale in the log, and to 2175b0f539deSDave Chinner * attach stale cached inodes on it. That means it will never be 2176b0f539deSDave Chinner * dispatched for IO. If it is, we want to know about it, and we 2177b0f539deSDave Chinner * want it to fail. We can acheive this by adding a write 2178b0f539deSDave Chinner * verifier to the buffer. 2179b0f539deSDave Chinner */ 21801813dd64SDave Chinner bp->b_ops = &xfs_inode_buf_ops; 2181b0f539deSDave Chinner 21825b257b4aSDave Chinner /* 21835b257b4aSDave Chinner * Walk the inodes already attached to the buffer and mark them 21845b257b4aSDave Chinner * stale. These will all have the flush locks held, so an 21855b3eed75SDave Chinner * in-memory inode walk can't lock them. By marking them all 21865b3eed75SDave Chinner * stale first, we will not attempt to lock them in the loop 21875b3eed75SDave Chinner * below as the XFS_ISTALE flag will be set. 21885b257b4aSDave Chinner */ 2189adadbeefSChristoph Hellwig lip = bp->b_fspriv; 21901da177e4SLinus Torvalds while (lip) { 21911da177e4SLinus Torvalds if (lip->li_type == XFS_LI_INODE) { 21921da177e4SLinus Torvalds iip = (xfs_inode_log_item_t *)lip; 21931da177e4SLinus Torvalds ASSERT(iip->ili_logged == 1); 2194ca30b2a7SChristoph Hellwig lip->li_cb = xfs_istale_done; 21957b2e2a31SDavid Chinner xfs_trans_ail_copy_lsn(mp->m_ail, 21967b2e2a31SDavid Chinner &iip->ili_flush_lsn, 21977b2e2a31SDavid Chinner &iip->ili_item.li_lsn); 2198e5ffd2bbSDavid Chinner xfs_iflags_set(iip->ili_inode, XFS_ISTALE); 21991da177e4SLinus Torvalds } 22001da177e4SLinus Torvalds lip = lip->li_bio_list; 22011da177e4SLinus Torvalds } 22021da177e4SLinus Torvalds 22035b3eed75SDave Chinner 22045b257b4aSDave Chinner /* 22055b257b4aSDave Chinner * For each inode in memory attempt to add it to the inode 22065b257b4aSDave Chinner * buffer and set it up for being staled on buffer IO 22075b257b4aSDave Chinner * completion. This is safe as we've locked out tail pushing 22085b257b4aSDave Chinner * and flushing by locking the buffer. 22095b257b4aSDave Chinner * 22105b257b4aSDave Chinner * We have already marked every inode that was part of a 22115b257b4aSDave Chinner * transaction stale above, which means there is no point in 22125b257b4aSDave Chinner * even trying to lock them. 22135b257b4aSDave Chinner */ 22145b257b4aSDave Chinner for (i = 0; i < ninodes; i++) { 22155b3eed75SDave Chinner retry: 22161a3e8f3dSDave Chinner rcu_read_lock(); 22175b257b4aSDave Chinner ip = radix_tree_lookup(&pag->pag_ici_root, 22185b257b4aSDave Chinner XFS_INO_TO_AGINO(mp, (inum + i))); 22191da177e4SLinus Torvalds 22201a3e8f3dSDave Chinner /* Inode not in memory, nothing to do */ 22211a3e8f3dSDave Chinner if (!ip) { 22221a3e8f3dSDave Chinner rcu_read_unlock(); 22235b257b4aSDave Chinner continue; 22245b257b4aSDave Chinner } 22255b257b4aSDave Chinner 22265b3eed75SDave Chinner /* 22271a3e8f3dSDave Chinner * because this is an RCU protected lookup, we could 22281a3e8f3dSDave Chinner * find a recently freed or even reallocated inode 22291a3e8f3dSDave Chinner * during the lookup. We need to check under the 22301a3e8f3dSDave Chinner * i_flags_lock for a valid inode here. Skip it if it 22311a3e8f3dSDave Chinner * is not valid, the wrong inode or stale. 22321a3e8f3dSDave Chinner */ 22331a3e8f3dSDave Chinner spin_lock(&ip->i_flags_lock); 22341a3e8f3dSDave Chinner if (ip->i_ino != inum + i || 22351a3e8f3dSDave Chinner __xfs_iflags_test(ip, XFS_ISTALE)) { 22361a3e8f3dSDave Chinner spin_unlock(&ip->i_flags_lock); 22371a3e8f3dSDave Chinner rcu_read_unlock(); 22381a3e8f3dSDave Chinner continue; 22391a3e8f3dSDave Chinner } 22401a3e8f3dSDave Chinner spin_unlock(&ip->i_flags_lock); 22411a3e8f3dSDave Chinner 22421a3e8f3dSDave Chinner /* 22435b3eed75SDave Chinner * Don't try to lock/unlock the current inode, but we 22445b3eed75SDave Chinner * _cannot_ skip the other inodes that we did not find 22455b3eed75SDave Chinner * in the list attached to the buffer and are not 22465b3eed75SDave Chinner * already marked stale. If we can't lock it, back off 22475b3eed75SDave Chinner * and retry. 22485b3eed75SDave Chinner */ 22495b257b4aSDave Chinner if (ip != free_ip && 22505b257b4aSDave Chinner !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { 22511a3e8f3dSDave Chinner rcu_read_unlock(); 22525b3eed75SDave Chinner delay(1); 22535b3eed75SDave Chinner goto retry; 22545b257b4aSDave Chinner } 22551a3e8f3dSDave Chinner rcu_read_unlock(); 22565b257b4aSDave Chinner 22575b3eed75SDave Chinner xfs_iflock(ip); 22585b257b4aSDave Chinner xfs_iflags_set(ip, XFS_ISTALE); 22595b257b4aSDave Chinner 22605b3eed75SDave Chinner /* 22615b3eed75SDave Chinner * we don't need to attach clean inodes or those only 22625b3eed75SDave Chinner * with unlogged changes (which we throw away, anyway). 22635b3eed75SDave Chinner */ 22645b257b4aSDave Chinner iip = ip->i_itemp; 22655b3eed75SDave Chinner if (!iip || xfs_inode_clean(ip)) { 22665b257b4aSDave Chinner ASSERT(ip != free_ip); 22671da177e4SLinus Torvalds xfs_ifunlock(ip); 22681da177e4SLinus Torvalds xfs_iunlock(ip, XFS_ILOCK_EXCL); 22691da177e4SLinus Torvalds continue; 22701da177e4SLinus Torvalds } 22711da177e4SLinus Torvalds 2272f5d8d5c4SChristoph Hellwig iip->ili_last_fields = iip->ili_fields; 2273f5d8d5c4SChristoph Hellwig iip->ili_fields = 0; 22741da177e4SLinus Torvalds iip->ili_logged = 1; 22757b2e2a31SDavid Chinner xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 22767b2e2a31SDavid Chinner &iip->ili_item.li_lsn); 22771da177e4SLinus Torvalds 2278ca30b2a7SChristoph Hellwig xfs_buf_attach_iodone(bp, xfs_istale_done, 2279ca30b2a7SChristoph Hellwig &iip->ili_item); 22805b257b4aSDave Chinner 22815b257b4aSDave Chinner if (ip != free_ip) 22821da177e4SLinus Torvalds xfs_iunlock(ip, XFS_ILOCK_EXCL); 22831da177e4SLinus Torvalds } 22841da177e4SLinus Torvalds 22851da177e4SLinus Torvalds xfs_trans_stale_inode_buf(tp, bp); 22861da177e4SLinus Torvalds xfs_trans_binval(tp, bp); 22871da177e4SLinus Torvalds } 22881da177e4SLinus Torvalds 22895017e97dSDave Chinner xfs_perag_put(pag); 22902a30f36dSChandra Seetharaman return 0; 22911da177e4SLinus Torvalds } 22921da177e4SLinus Torvalds 22931da177e4SLinus Torvalds /* 22941da177e4SLinus Torvalds * This is called to return an inode to the inode free list. 22951da177e4SLinus Torvalds * The inode should already be truncated to 0 length and have 22961da177e4SLinus Torvalds * no pages associated with it. This routine also assumes that 22971da177e4SLinus Torvalds * the inode is already a part of the transaction. 22981da177e4SLinus Torvalds * 22991da177e4SLinus Torvalds * The on-disk copy of the inode will have been added to the list 23001da177e4SLinus Torvalds * of unlinked inodes in the AGI. We need to remove the inode from 23011da177e4SLinus Torvalds * that list atomically with respect to freeing it here. 23021da177e4SLinus Torvalds */ 23031da177e4SLinus Torvalds int 23041da177e4SLinus Torvalds xfs_ifree( 23051da177e4SLinus Torvalds xfs_trans_t *tp, 23061da177e4SLinus Torvalds xfs_inode_t *ip, 23071da177e4SLinus Torvalds xfs_bmap_free_t *flist) 23081da177e4SLinus Torvalds { 23091da177e4SLinus Torvalds int error; 23101da177e4SLinus Torvalds int delete; 23111da177e4SLinus Torvalds xfs_ino_t first_ino; 23121da177e4SLinus Torvalds 2313579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 23141da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink == 0); 23151da177e4SLinus Torvalds ASSERT(ip->i_d.di_nextents == 0); 23161da177e4SLinus Torvalds ASSERT(ip->i_d.di_anextents == 0); 2317ce7ae151SChristoph Hellwig ASSERT(ip->i_d.di_size == 0 || !S_ISREG(ip->i_d.di_mode)); 23181da177e4SLinus Torvalds ASSERT(ip->i_d.di_nblocks == 0); 23191da177e4SLinus Torvalds 23201da177e4SLinus Torvalds /* 23211da177e4SLinus Torvalds * Pull the on-disk inode from the AGI unlinked list. 23221da177e4SLinus Torvalds */ 23231da177e4SLinus Torvalds error = xfs_iunlink_remove(tp, ip); 23241baaed8fSDave Chinner if (error) 23251da177e4SLinus Torvalds return error; 23261da177e4SLinus Torvalds 23271da177e4SLinus Torvalds error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino); 23281baaed8fSDave Chinner if (error) 23291da177e4SLinus Torvalds return error; 23301baaed8fSDave Chinner 23311da177e4SLinus Torvalds ip->i_d.di_mode = 0; /* mark incore inode as free */ 23321da177e4SLinus Torvalds ip->i_d.di_flags = 0; 23331da177e4SLinus Torvalds ip->i_d.di_dmevmask = 0; 23341da177e4SLinus Torvalds ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */ 23351da177e4SLinus Torvalds ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; 23361da177e4SLinus Torvalds ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 23371da177e4SLinus Torvalds /* 23381da177e4SLinus Torvalds * Bump the generation count so no one will be confused 23391da177e4SLinus Torvalds * by reincarnations of this inode. 23401da177e4SLinus Torvalds */ 23411da177e4SLinus Torvalds ip->i_d.di_gen++; 23421da177e4SLinus Torvalds xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 23431da177e4SLinus Torvalds 23441baaed8fSDave Chinner if (delete) 23452a30f36dSChandra Seetharaman error = xfs_ifree_cluster(ip, tp, first_ino); 23461da177e4SLinus Torvalds 23472a30f36dSChandra Seetharaman return error; 23481da177e4SLinus Torvalds } 23491da177e4SLinus Torvalds 23501da177e4SLinus Torvalds /* 235160ec6783SChristoph Hellwig * This is called to unpin an inode. The caller must have the inode locked 235260ec6783SChristoph Hellwig * in at least shared mode so that the buffer cannot be subsequently pinned 235360ec6783SChristoph Hellwig * once someone is waiting for it to be unpinned. 23541da177e4SLinus Torvalds */ 235560ec6783SChristoph Hellwig static void 2356f392e631SChristoph Hellwig xfs_iunpin( 235760ec6783SChristoph Hellwig struct xfs_inode *ip) 2358a3f74ffbSDavid Chinner { 2359579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2360a3f74ffbSDavid Chinner 23614aaf15d1SDave Chinner trace_xfs_inode_unpin_nowait(ip, _RET_IP_); 23624aaf15d1SDave Chinner 2363a3f74ffbSDavid Chinner /* Give the log a push to start the unpinning I/O */ 236460ec6783SChristoph Hellwig xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0); 2365a14a348bSChristoph Hellwig 2366a3f74ffbSDavid Chinner } 2367a3f74ffbSDavid Chinner 2368f392e631SChristoph Hellwig static void 2369f392e631SChristoph Hellwig __xfs_iunpin_wait( 2370f392e631SChristoph Hellwig struct xfs_inode *ip) 2371f392e631SChristoph Hellwig { 2372f392e631SChristoph Hellwig wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IPINNED_BIT); 2373f392e631SChristoph Hellwig DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IPINNED_BIT); 2374f392e631SChristoph Hellwig 2375f392e631SChristoph Hellwig xfs_iunpin(ip); 2376f392e631SChristoph Hellwig 2377f392e631SChristoph Hellwig do { 2378f392e631SChristoph Hellwig prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 2379f392e631SChristoph Hellwig if (xfs_ipincount(ip)) 2380f392e631SChristoph Hellwig io_schedule(); 2381f392e631SChristoph Hellwig } while (xfs_ipincount(ip)); 2382f392e631SChristoph Hellwig finish_wait(wq, &wait.wait); 2383f392e631SChristoph Hellwig } 2384f392e631SChristoph Hellwig 2385777df5afSDave Chinner void 23861da177e4SLinus Torvalds xfs_iunpin_wait( 238760ec6783SChristoph Hellwig struct xfs_inode *ip) 23881da177e4SLinus Torvalds { 2389f392e631SChristoph Hellwig if (xfs_ipincount(ip)) 2390f392e631SChristoph Hellwig __xfs_iunpin_wait(ip); 23911da177e4SLinus Torvalds } 23921da177e4SLinus Torvalds 2393c24b5dfaSDave Chinner int 2394c24b5dfaSDave Chinner xfs_remove( 2395c24b5dfaSDave Chinner xfs_inode_t *dp, 2396c24b5dfaSDave Chinner struct xfs_name *name, 2397c24b5dfaSDave Chinner xfs_inode_t *ip) 2398c24b5dfaSDave Chinner { 2399c24b5dfaSDave Chinner xfs_mount_t *mp = dp->i_mount; 2400c24b5dfaSDave Chinner xfs_trans_t *tp = NULL; 2401c24b5dfaSDave Chinner int is_dir = S_ISDIR(ip->i_d.di_mode); 2402c24b5dfaSDave Chinner int error = 0; 2403c24b5dfaSDave Chinner xfs_bmap_free_t free_list; 2404c24b5dfaSDave Chinner xfs_fsblock_t first_block; 2405c24b5dfaSDave Chinner int cancel_flags; 2406c24b5dfaSDave Chinner int committed; 2407c24b5dfaSDave Chinner int link_zero; 2408c24b5dfaSDave Chinner uint resblks; 2409c24b5dfaSDave Chinner uint log_count; 2410c24b5dfaSDave Chinner 2411c24b5dfaSDave Chinner trace_xfs_remove(dp, name); 2412c24b5dfaSDave Chinner 2413c24b5dfaSDave Chinner if (XFS_FORCED_SHUTDOWN(mp)) 2414c24b5dfaSDave Chinner return XFS_ERROR(EIO); 2415c24b5dfaSDave Chinner 2416c24b5dfaSDave Chinner error = xfs_qm_dqattach(dp, 0); 2417c24b5dfaSDave Chinner if (error) 2418c24b5dfaSDave Chinner goto std_return; 2419c24b5dfaSDave Chinner 2420c24b5dfaSDave Chinner error = xfs_qm_dqattach(ip, 0); 2421c24b5dfaSDave Chinner if (error) 2422c24b5dfaSDave Chinner goto std_return; 2423c24b5dfaSDave Chinner 2424c24b5dfaSDave Chinner if (is_dir) { 2425c24b5dfaSDave Chinner tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR); 2426c24b5dfaSDave Chinner log_count = XFS_DEFAULT_LOG_COUNT; 2427c24b5dfaSDave Chinner } else { 2428c24b5dfaSDave Chinner tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE); 2429c24b5dfaSDave Chinner log_count = XFS_REMOVE_LOG_COUNT; 2430c24b5dfaSDave Chinner } 2431c24b5dfaSDave Chinner cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2432c24b5dfaSDave Chinner 2433c24b5dfaSDave Chinner /* 2434c24b5dfaSDave Chinner * We try to get the real space reservation first, 2435c24b5dfaSDave Chinner * allowing for directory btree deletion(s) implying 2436c24b5dfaSDave Chinner * possible bmap insert(s). If we can't get the space 2437c24b5dfaSDave Chinner * reservation then we use 0 instead, and avoid the bmap 2438c24b5dfaSDave Chinner * btree insert(s) in the directory code by, if the bmap 2439c24b5dfaSDave Chinner * insert tries to happen, instead trimming the LAST 2440c24b5dfaSDave Chinner * block from the directory. 2441c24b5dfaSDave Chinner */ 2442c24b5dfaSDave Chinner resblks = XFS_REMOVE_SPACE_RES(mp); 24433d3c8b52SJie Liu error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, resblks, 0); 2444c24b5dfaSDave Chinner if (error == ENOSPC) { 2445c24b5dfaSDave Chinner resblks = 0; 24463d3c8b52SJie Liu error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, 0, 0); 2447c24b5dfaSDave Chinner } 2448c24b5dfaSDave Chinner if (error) { 2449c24b5dfaSDave Chinner ASSERT(error != ENOSPC); 2450c24b5dfaSDave Chinner cancel_flags = 0; 2451c24b5dfaSDave Chinner goto out_trans_cancel; 2452c24b5dfaSDave Chinner } 2453c24b5dfaSDave Chinner 2454c24b5dfaSDave Chinner xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); 2455c24b5dfaSDave Chinner 2456c24b5dfaSDave Chinner xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2457c24b5dfaSDave Chinner xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 2458c24b5dfaSDave Chinner 2459c24b5dfaSDave Chinner /* 2460c24b5dfaSDave Chinner * If we're removing a directory perform some additional validation. 2461c24b5dfaSDave Chinner */ 2462c24b5dfaSDave Chinner if (is_dir) { 2463c24b5dfaSDave Chinner ASSERT(ip->i_d.di_nlink >= 2); 2464c24b5dfaSDave Chinner if (ip->i_d.di_nlink != 2) { 2465c24b5dfaSDave Chinner error = XFS_ERROR(ENOTEMPTY); 2466c24b5dfaSDave Chinner goto out_trans_cancel; 2467c24b5dfaSDave Chinner } 2468c24b5dfaSDave Chinner if (!xfs_dir_isempty(ip)) { 2469c24b5dfaSDave Chinner error = XFS_ERROR(ENOTEMPTY); 2470c24b5dfaSDave Chinner goto out_trans_cancel; 2471c24b5dfaSDave Chinner } 2472c24b5dfaSDave Chinner } 2473c24b5dfaSDave Chinner 2474c24b5dfaSDave Chinner xfs_bmap_init(&free_list, &first_block); 2475c24b5dfaSDave Chinner error = xfs_dir_removename(tp, dp, name, ip->i_ino, 2476c24b5dfaSDave Chinner &first_block, &free_list, resblks); 2477c24b5dfaSDave Chinner if (error) { 2478c24b5dfaSDave Chinner ASSERT(error != ENOENT); 2479c24b5dfaSDave Chinner goto out_bmap_cancel; 2480c24b5dfaSDave Chinner } 2481c24b5dfaSDave Chinner xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2482c24b5dfaSDave Chinner 2483c24b5dfaSDave Chinner if (is_dir) { 2484c24b5dfaSDave Chinner /* 2485c24b5dfaSDave Chinner * Drop the link from ip's "..". 2486c24b5dfaSDave Chinner */ 2487c24b5dfaSDave Chinner error = xfs_droplink(tp, dp); 2488c24b5dfaSDave Chinner if (error) 2489c24b5dfaSDave Chinner goto out_bmap_cancel; 2490c24b5dfaSDave Chinner 2491c24b5dfaSDave Chinner /* 2492c24b5dfaSDave Chinner * Drop the "." link from ip to self. 2493c24b5dfaSDave Chinner */ 2494c24b5dfaSDave Chinner error = xfs_droplink(tp, ip); 2495c24b5dfaSDave Chinner if (error) 2496c24b5dfaSDave Chinner goto out_bmap_cancel; 2497c24b5dfaSDave Chinner } else { 2498c24b5dfaSDave Chinner /* 2499c24b5dfaSDave Chinner * When removing a non-directory we need to log the parent 2500c24b5dfaSDave Chinner * inode here. For a directory this is done implicitly 2501c24b5dfaSDave Chinner * by the xfs_droplink call for the ".." entry. 2502c24b5dfaSDave Chinner */ 2503c24b5dfaSDave Chinner xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 2504c24b5dfaSDave Chinner } 2505c24b5dfaSDave Chinner 2506c24b5dfaSDave Chinner /* 2507c24b5dfaSDave Chinner * Drop the link from dp to ip. 2508c24b5dfaSDave Chinner */ 2509c24b5dfaSDave Chinner error = xfs_droplink(tp, ip); 2510c24b5dfaSDave Chinner if (error) 2511c24b5dfaSDave Chinner goto out_bmap_cancel; 2512c24b5dfaSDave Chinner 2513c24b5dfaSDave Chinner /* 2514c24b5dfaSDave Chinner * Determine if this is the last link while 2515c24b5dfaSDave Chinner * we are in the transaction. 2516c24b5dfaSDave Chinner */ 2517c24b5dfaSDave Chinner link_zero = (ip->i_d.di_nlink == 0); 2518c24b5dfaSDave Chinner 2519c24b5dfaSDave Chinner /* 2520c24b5dfaSDave Chinner * If this is a synchronous mount, make sure that the 2521c24b5dfaSDave Chinner * remove transaction goes to disk before returning to 2522c24b5dfaSDave Chinner * the user. 2523c24b5dfaSDave Chinner */ 2524c24b5dfaSDave Chinner if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) 2525c24b5dfaSDave Chinner xfs_trans_set_sync(tp); 2526c24b5dfaSDave Chinner 2527c24b5dfaSDave Chinner error = xfs_bmap_finish(&tp, &free_list, &committed); 2528c24b5dfaSDave Chinner if (error) 2529c24b5dfaSDave Chinner goto out_bmap_cancel; 2530c24b5dfaSDave Chinner 2531c24b5dfaSDave Chinner error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2532c24b5dfaSDave Chinner if (error) 2533c24b5dfaSDave Chinner goto std_return; 2534c24b5dfaSDave Chinner 2535c24b5dfaSDave Chinner /* 2536c24b5dfaSDave Chinner * If we are using filestreams, kill the stream association. 2537c24b5dfaSDave Chinner * If the file is still open it may get a new one but that 2538c24b5dfaSDave Chinner * will get killed on last close in xfs_close() so we don't 2539c24b5dfaSDave Chinner * have to worry about that. 2540c24b5dfaSDave Chinner */ 2541c24b5dfaSDave Chinner if (!is_dir && link_zero && xfs_inode_is_filestream(ip)) 2542c24b5dfaSDave Chinner xfs_filestream_deassociate(ip); 2543c24b5dfaSDave Chinner 2544c24b5dfaSDave Chinner return 0; 2545c24b5dfaSDave Chinner 2546c24b5dfaSDave Chinner out_bmap_cancel: 2547c24b5dfaSDave Chinner xfs_bmap_cancel(&free_list); 2548c24b5dfaSDave Chinner cancel_flags |= XFS_TRANS_ABORT; 2549c24b5dfaSDave Chinner out_trans_cancel: 2550c24b5dfaSDave Chinner xfs_trans_cancel(tp, cancel_flags); 2551c24b5dfaSDave Chinner std_return: 2552c24b5dfaSDave Chinner return error; 2553c24b5dfaSDave Chinner } 2554c24b5dfaSDave Chinner 2555f6bba201SDave Chinner /* 2556f6bba201SDave Chinner * Enter all inodes for a rename transaction into a sorted array. 2557f6bba201SDave Chinner */ 2558f6bba201SDave Chinner STATIC void 2559f6bba201SDave Chinner xfs_sort_for_rename( 2560f6bba201SDave Chinner xfs_inode_t *dp1, /* in: old (source) directory inode */ 2561f6bba201SDave Chinner xfs_inode_t *dp2, /* in: new (target) directory inode */ 2562f6bba201SDave Chinner xfs_inode_t *ip1, /* in: inode of old entry */ 2563f6bba201SDave Chinner xfs_inode_t *ip2, /* in: inode of new entry, if it 2564f6bba201SDave Chinner already exists, NULL otherwise. */ 2565f6bba201SDave Chinner xfs_inode_t **i_tab,/* out: array of inode returned, sorted */ 2566f6bba201SDave Chinner int *num_inodes) /* out: number of inodes in array */ 2567f6bba201SDave Chinner { 2568f6bba201SDave Chinner xfs_inode_t *temp; 2569f6bba201SDave Chinner int i, j; 2570f6bba201SDave Chinner 2571f6bba201SDave Chinner /* 2572f6bba201SDave Chinner * i_tab contains a list of pointers to inodes. We initialize 2573f6bba201SDave Chinner * the table here & we'll sort it. We will then use it to 2574f6bba201SDave Chinner * order the acquisition of the inode locks. 2575f6bba201SDave Chinner * 2576f6bba201SDave Chinner * Note that the table may contain duplicates. e.g., dp1 == dp2. 2577f6bba201SDave Chinner */ 2578f6bba201SDave Chinner i_tab[0] = dp1; 2579f6bba201SDave Chinner i_tab[1] = dp2; 2580f6bba201SDave Chinner i_tab[2] = ip1; 2581f6bba201SDave Chinner if (ip2) { 2582f6bba201SDave Chinner *num_inodes = 4; 2583f6bba201SDave Chinner i_tab[3] = ip2; 2584f6bba201SDave Chinner } else { 2585f6bba201SDave Chinner *num_inodes = 3; 2586f6bba201SDave Chinner i_tab[3] = NULL; 2587f6bba201SDave Chinner } 2588f6bba201SDave Chinner 2589f6bba201SDave Chinner /* 2590f6bba201SDave Chinner * Sort the elements via bubble sort. (Remember, there are at 2591f6bba201SDave Chinner * most 4 elements to sort, so this is adequate.) 2592f6bba201SDave Chinner */ 2593f6bba201SDave Chinner for (i = 0; i < *num_inodes; i++) { 2594f6bba201SDave Chinner for (j = 1; j < *num_inodes; j++) { 2595f6bba201SDave Chinner if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) { 2596f6bba201SDave Chinner temp = i_tab[j]; 2597f6bba201SDave Chinner i_tab[j] = i_tab[j-1]; 2598f6bba201SDave Chinner i_tab[j-1] = temp; 2599f6bba201SDave Chinner } 2600f6bba201SDave Chinner } 2601f6bba201SDave Chinner } 2602f6bba201SDave Chinner } 2603f6bba201SDave Chinner 2604f6bba201SDave Chinner /* 2605f6bba201SDave Chinner * xfs_rename 2606f6bba201SDave Chinner */ 2607f6bba201SDave Chinner int 2608f6bba201SDave Chinner xfs_rename( 2609f6bba201SDave Chinner xfs_inode_t *src_dp, 2610f6bba201SDave Chinner struct xfs_name *src_name, 2611f6bba201SDave Chinner xfs_inode_t *src_ip, 2612f6bba201SDave Chinner xfs_inode_t *target_dp, 2613f6bba201SDave Chinner struct xfs_name *target_name, 2614f6bba201SDave Chinner xfs_inode_t *target_ip) 2615f6bba201SDave Chinner { 2616f6bba201SDave Chinner xfs_trans_t *tp = NULL; 2617f6bba201SDave Chinner xfs_mount_t *mp = src_dp->i_mount; 2618f6bba201SDave Chinner int new_parent; /* moving to a new dir */ 2619f6bba201SDave Chinner int src_is_directory; /* src_name is a directory */ 2620f6bba201SDave Chinner int error; 2621f6bba201SDave Chinner xfs_bmap_free_t free_list; 2622f6bba201SDave Chinner xfs_fsblock_t first_block; 2623f6bba201SDave Chinner int cancel_flags; 2624f6bba201SDave Chinner int committed; 2625f6bba201SDave Chinner xfs_inode_t *inodes[4]; 2626f6bba201SDave Chinner int spaceres; 2627f6bba201SDave Chinner int num_inodes; 2628f6bba201SDave Chinner 2629f6bba201SDave Chinner trace_xfs_rename(src_dp, target_dp, src_name, target_name); 2630f6bba201SDave Chinner 2631f6bba201SDave Chinner new_parent = (src_dp != target_dp); 2632f6bba201SDave Chinner src_is_directory = S_ISDIR(src_ip->i_d.di_mode); 2633f6bba201SDave Chinner 2634f6bba201SDave Chinner xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, 2635f6bba201SDave Chinner inodes, &num_inodes); 2636f6bba201SDave Chinner 2637f6bba201SDave Chinner xfs_bmap_init(&free_list, &first_block); 2638f6bba201SDave Chinner tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME); 2639f6bba201SDave Chinner cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2640f6bba201SDave Chinner spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len); 26413d3c8b52SJie Liu error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0); 2642f6bba201SDave Chinner if (error == ENOSPC) { 2643f6bba201SDave Chinner spaceres = 0; 26443d3c8b52SJie Liu error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0); 2645f6bba201SDave Chinner } 2646f6bba201SDave Chinner if (error) { 2647f6bba201SDave Chinner xfs_trans_cancel(tp, 0); 2648f6bba201SDave Chinner goto std_return; 2649f6bba201SDave Chinner } 2650f6bba201SDave Chinner 2651f6bba201SDave Chinner /* 2652f6bba201SDave Chinner * Attach the dquots to the inodes 2653f6bba201SDave Chinner */ 2654f6bba201SDave Chinner error = xfs_qm_vop_rename_dqattach(inodes); 2655f6bba201SDave Chinner if (error) { 2656f6bba201SDave Chinner xfs_trans_cancel(tp, cancel_flags); 2657f6bba201SDave Chinner goto std_return; 2658f6bba201SDave Chinner } 2659f6bba201SDave Chinner 2660f6bba201SDave Chinner /* 2661f6bba201SDave Chinner * Lock all the participating inodes. Depending upon whether 2662f6bba201SDave Chinner * the target_name exists in the target directory, and 2663f6bba201SDave Chinner * whether the target directory is the same as the source 2664f6bba201SDave Chinner * directory, we can lock from 2 to 4 inodes. 2665f6bba201SDave Chinner */ 2666f6bba201SDave Chinner xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL); 2667f6bba201SDave Chinner 2668f6bba201SDave Chinner /* 2669f6bba201SDave Chinner * Join all the inodes to the transaction. From this point on, 2670f6bba201SDave Chinner * we can rely on either trans_commit or trans_cancel to unlock 2671f6bba201SDave Chinner * them. 2672f6bba201SDave Chinner */ 2673f6bba201SDave Chinner xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); 2674f6bba201SDave Chinner if (new_parent) 2675f6bba201SDave Chinner xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); 2676f6bba201SDave Chinner xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); 2677f6bba201SDave Chinner if (target_ip) 2678f6bba201SDave Chinner xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); 2679f6bba201SDave Chinner 2680f6bba201SDave Chinner /* 2681f6bba201SDave Chinner * If we are using project inheritance, we only allow renames 2682f6bba201SDave Chinner * into our tree when the project IDs are the same; else the 2683f6bba201SDave Chinner * tree quota mechanism would be circumvented. 2684f6bba201SDave Chinner */ 2685f6bba201SDave Chinner if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && 2686f6bba201SDave Chinner (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) { 2687f6bba201SDave Chinner error = XFS_ERROR(EXDEV); 2688f6bba201SDave Chinner goto error_return; 2689f6bba201SDave Chinner } 2690f6bba201SDave Chinner 2691f6bba201SDave Chinner /* 2692f6bba201SDave Chinner * Set up the target. 2693f6bba201SDave Chinner */ 2694f6bba201SDave Chinner if (target_ip == NULL) { 2695f6bba201SDave Chinner /* 2696f6bba201SDave Chinner * If there's no space reservation, check the entry will 2697f6bba201SDave Chinner * fit before actually inserting it. 2698f6bba201SDave Chinner */ 2699f6bba201SDave Chinner error = xfs_dir_canenter(tp, target_dp, target_name, spaceres); 2700f6bba201SDave Chinner if (error) 2701f6bba201SDave Chinner goto error_return; 2702f6bba201SDave Chinner /* 2703f6bba201SDave Chinner * If target does not exist and the rename crosses 2704f6bba201SDave Chinner * directories, adjust the target directory link count 2705f6bba201SDave Chinner * to account for the ".." reference from the new entry. 2706f6bba201SDave Chinner */ 2707f6bba201SDave Chinner error = xfs_dir_createname(tp, target_dp, target_name, 2708f6bba201SDave Chinner src_ip->i_ino, &first_block, 2709f6bba201SDave Chinner &free_list, spaceres); 2710f6bba201SDave Chinner if (error == ENOSPC) 2711f6bba201SDave Chinner goto error_return; 2712f6bba201SDave Chinner if (error) 2713f6bba201SDave Chinner goto abort_return; 2714f6bba201SDave Chinner 2715f6bba201SDave Chinner xfs_trans_ichgtime(tp, target_dp, 2716f6bba201SDave Chinner XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2717f6bba201SDave Chinner 2718f6bba201SDave Chinner if (new_parent && src_is_directory) { 2719f6bba201SDave Chinner error = xfs_bumplink(tp, target_dp); 2720f6bba201SDave Chinner if (error) 2721f6bba201SDave Chinner goto abort_return; 2722f6bba201SDave Chinner } 2723f6bba201SDave Chinner } else { /* target_ip != NULL */ 2724f6bba201SDave Chinner /* 2725f6bba201SDave Chinner * If target exists and it's a directory, check that both 2726f6bba201SDave Chinner * target and source are directories and that target can be 2727f6bba201SDave Chinner * destroyed, or that neither is a directory. 2728f6bba201SDave Chinner */ 2729f6bba201SDave Chinner if (S_ISDIR(target_ip->i_d.di_mode)) { 2730f6bba201SDave Chinner /* 2731f6bba201SDave Chinner * Make sure target dir is empty. 2732f6bba201SDave Chinner */ 2733f6bba201SDave Chinner if (!(xfs_dir_isempty(target_ip)) || 2734f6bba201SDave Chinner (target_ip->i_d.di_nlink > 2)) { 2735f6bba201SDave Chinner error = XFS_ERROR(EEXIST); 2736f6bba201SDave Chinner goto error_return; 2737f6bba201SDave Chinner } 2738f6bba201SDave Chinner } 2739f6bba201SDave Chinner 2740f6bba201SDave Chinner /* 2741f6bba201SDave Chinner * Link the source inode under the target name. 2742f6bba201SDave Chinner * If the source inode is a directory and we are moving 2743f6bba201SDave Chinner * it across directories, its ".." entry will be 2744f6bba201SDave Chinner * inconsistent until we replace that down below. 2745f6bba201SDave Chinner * 2746f6bba201SDave Chinner * In case there is already an entry with the same 2747f6bba201SDave Chinner * name at the destination directory, remove it first. 2748f6bba201SDave Chinner */ 2749f6bba201SDave Chinner error = xfs_dir_replace(tp, target_dp, target_name, 2750f6bba201SDave Chinner src_ip->i_ino, 2751f6bba201SDave Chinner &first_block, &free_list, spaceres); 2752f6bba201SDave Chinner if (error) 2753f6bba201SDave Chinner goto abort_return; 2754f6bba201SDave Chinner 2755f6bba201SDave Chinner xfs_trans_ichgtime(tp, target_dp, 2756f6bba201SDave Chinner XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2757f6bba201SDave Chinner 2758f6bba201SDave Chinner /* 2759f6bba201SDave Chinner * Decrement the link count on the target since the target 2760f6bba201SDave Chinner * dir no longer points to it. 2761f6bba201SDave Chinner */ 2762f6bba201SDave Chinner error = xfs_droplink(tp, target_ip); 2763f6bba201SDave Chinner if (error) 2764f6bba201SDave Chinner goto abort_return; 2765f6bba201SDave Chinner 2766f6bba201SDave Chinner if (src_is_directory) { 2767f6bba201SDave Chinner /* 2768f6bba201SDave Chinner * Drop the link from the old "." entry. 2769f6bba201SDave Chinner */ 2770f6bba201SDave Chinner error = xfs_droplink(tp, target_ip); 2771f6bba201SDave Chinner if (error) 2772f6bba201SDave Chinner goto abort_return; 2773f6bba201SDave Chinner } 2774f6bba201SDave Chinner } /* target_ip != NULL */ 2775f6bba201SDave Chinner 2776f6bba201SDave Chinner /* 2777f6bba201SDave Chinner * Remove the source. 2778f6bba201SDave Chinner */ 2779f6bba201SDave Chinner if (new_parent && src_is_directory) { 2780f6bba201SDave Chinner /* 2781f6bba201SDave Chinner * Rewrite the ".." entry to point to the new 2782f6bba201SDave Chinner * directory. 2783f6bba201SDave Chinner */ 2784f6bba201SDave Chinner error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot, 2785f6bba201SDave Chinner target_dp->i_ino, 2786f6bba201SDave Chinner &first_block, &free_list, spaceres); 2787f6bba201SDave Chinner ASSERT(error != EEXIST); 2788f6bba201SDave Chinner if (error) 2789f6bba201SDave Chinner goto abort_return; 2790f6bba201SDave Chinner } 2791f6bba201SDave Chinner 2792f6bba201SDave Chinner /* 2793f6bba201SDave Chinner * We always want to hit the ctime on the source inode. 2794f6bba201SDave Chinner * 2795f6bba201SDave Chinner * This isn't strictly required by the standards since the source 2796f6bba201SDave Chinner * inode isn't really being changed, but old unix file systems did 2797f6bba201SDave Chinner * it and some incremental backup programs won't work without it. 2798f6bba201SDave Chinner */ 2799f6bba201SDave Chinner xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG); 2800f6bba201SDave Chinner xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE); 2801f6bba201SDave Chinner 2802f6bba201SDave Chinner /* 2803f6bba201SDave Chinner * Adjust the link count on src_dp. This is necessary when 2804f6bba201SDave Chinner * renaming a directory, either within one parent when 2805f6bba201SDave Chinner * the target existed, or across two parent directories. 2806f6bba201SDave Chinner */ 2807f6bba201SDave Chinner if (src_is_directory && (new_parent || target_ip != NULL)) { 2808f6bba201SDave Chinner 2809f6bba201SDave Chinner /* 2810f6bba201SDave Chinner * Decrement link count on src_directory since the 2811f6bba201SDave Chinner * entry that's moved no longer points to it. 2812f6bba201SDave Chinner */ 2813f6bba201SDave Chinner error = xfs_droplink(tp, src_dp); 2814f6bba201SDave Chinner if (error) 2815f6bba201SDave Chinner goto abort_return; 2816f6bba201SDave Chinner } 2817f6bba201SDave Chinner 2818f6bba201SDave Chinner error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, 2819f6bba201SDave Chinner &first_block, &free_list, spaceres); 2820f6bba201SDave Chinner if (error) 2821f6bba201SDave Chinner goto abort_return; 2822f6bba201SDave Chinner 2823f6bba201SDave Chinner xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2824f6bba201SDave Chinner xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); 2825f6bba201SDave Chinner if (new_parent) 2826f6bba201SDave Chinner xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); 2827f6bba201SDave Chinner 2828f6bba201SDave Chinner /* 2829f6bba201SDave Chinner * If this is a synchronous mount, make sure that the 2830f6bba201SDave Chinner * rename transaction goes to disk before returning to 2831f6bba201SDave Chinner * the user. 2832f6bba201SDave Chinner */ 2833f6bba201SDave Chinner if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 2834f6bba201SDave Chinner xfs_trans_set_sync(tp); 2835f6bba201SDave Chinner } 2836f6bba201SDave Chinner 2837f6bba201SDave Chinner error = xfs_bmap_finish(&tp, &free_list, &committed); 2838f6bba201SDave Chinner if (error) { 2839f6bba201SDave Chinner xfs_bmap_cancel(&free_list); 2840f6bba201SDave Chinner xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | 2841f6bba201SDave Chinner XFS_TRANS_ABORT)); 2842f6bba201SDave Chinner goto std_return; 2843f6bba201SDave Chinner } 2844f6bba201SDave Chinner 2845f6bba201SDave Chinner /* 2846f6bba201SDave Chinner * trans_commit will unlock src_ip, target_ip & decrement 2847f6bba201SDave Chinner * the vnode references. 2848f6bba201SDave Chinner */ 2849f6bba201SDave Chinner return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2850f6bba201SDave Chinner 2851f6bba201SDave Chinner abort_return: 2852f6bba201SDave Chinner cancel_flags |= XFS_TRANS_ABORT; 2853f6bba201SDave Chinner error_return: 2854f6bba201SDave Chinner xfs_bmap_cancel(&free_list); 2855f6bba201SDave Chinner xfs_trans_cancel(tp, cancel_flags); 2856f6bba201SDave Chinner std_return: 2857f6bba201SDave Chinner return error; 2858f6bba201SDave Chinner } 2859f6bba201SDave Chinner 2860bad55843SDavid Chinner STATIC int 2861bad55843SDavid Chinner xfs_iflush_cluster( 2862bad55843SDavid Chinner xfs_inode_t *ip, 2863bad55843SDavid Chinner xfs_buf_t *bp) 2864bad55843SDavid Chinner { 2865bad55843SDavid Chinner xfs_mount_t *mp = ip->i_mount; 28665017e97dSDave Chinner struct xfs_perag *pag; 2867bad55843SDavid Chinner unsigned long first_index, mask; 2868c8f5f12eSDavid Chinner unsigned long inodes_per_cluster; 2869bad55843SDavid Chinner int ilist_size; 2870bad55843SDavid Chinner xfs_inode_t **ilist; 2871bad55843SDavid Chinner xfs_inode_t *iq; 2872bad55843SDavid Chinner int nr_found; 2873bad55843SDavid Chinner int clcount = 0; 2874bad55843SDavid Chinner int bufwasdelwri; 2875bad55843SDavid Chinner int i; 2876bad55843SDavid Chinner 28775017e97dSDave Chinner pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 2878bad55843SDavid Chinner 2879c8f5f12eSDavid Chinner inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog; 2880c8f5f12eSDavid Chinner ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); 288149383b0eSDavid Chinner ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); 2882bad55843SDavid Chinner if (!ilist) 288344b56e0aSDave Chinner goto out_put; 2884bad55843SDavid Chinner 2885bad55843SDavid Chinner mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); 2886bad55843SDavid Chinner first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; 28871a3e8f3dSDave Chinner rcu_read_lock(); 2888bad55843SDavid Chinner /* really need a gang lookup range call here */ 2889bad55843SDavid Chinner nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, 2890c8f5f12eSDavid Chinner first_index, inodes_per_cluster); 2891bad55843SDavid Chinner if (nr_found == 0) 2892bad55843SDavid Chinner goto out_free; 2893bad55843SDavid Chinner 2894bad55843SDavid Chinner for (i = 0; i < nr_found; i++) { 2895bad55843SDavid Chinner iq = ilist[i]; 2896bad55843SDavid Chinner if (iq == ip) 2897bad55843SDavid Chinner continue; 28981a3e8f3dSDave Chinner 28991a3e8f3dSDave Chinner /* 29001a3e8f3dSDave Chinner * because this is an RCU protected lookup, we could find a 29011a3e8f3dSDave Chinner * recently freed or even reallocated inode during the lookup. 29021a3e8f3dSDave Chinner * We need to check under the i_flags_lock for a valid inode 29031a3e8f3dSDave Chinner * here. Skip it if it is not valid or the wrong inode. 29041a3e8f3dSDave Chinner */ 29051a3e8f3dSDave Chinner spin_lock(&ip->i_flags_lock); 29061a3e8f3dSDave Chinner if (!ip->i_ino || 29071a3e8f3dSDave Chinner (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) { 29081a3e8f3dSDave Chinner spin_unlock(&ip->i_flags_lock); 29091a3e8f3dSDave Chinner continue; 29101a3e8f3dSDave Chinner } 29111a3e8f3dSDave Chinner spin_unlock(&ip->i_flags_lock); 29121a3e8f3dSDave Chinner 2913bad55843SDavid Chinner /* 2914bad55843SDavid Chinner * Do an un-protected check to see if the inode is dirty and 2915bad55843SDavid Chinner * is a candidate for flushing. These checks will be repeated 2916bad55843SDavid Chinner * later after the appropriate locks are acquired. 2917bad55843SDavid Chinner */ 291833540408SDavid Chinner if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0) 2919bad55843SDavid Chinner continue; 2920bad55843SDavid Chinner 2921bad55843SDavid Chinner /* 2922bad55843SDavid Chinner * Try to get locks. If any are unavailable or it is pinned, 2923bad55843SDavid Chinner * then this inode cannot be flushed and is skipped. 2924bad55843SDavid Chinner */ 2925bad55843SDavid Chinner 2926bad55843SDavid Chinner if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) 2927bad55843SDavid Chinner continue; 2928bad55843SDavid Chinner if (!xfs_iflock_nowait(iq)) { 2929bad55843SDavid Chinner xfs_iunlock(iq, XFS_ILOCK_SHARED); 2930bad55843SDavid Chinner continue; 2931bad55843SDavid Chinner } 2932bad55843SDavid Chinner if (xfs_ipincount(iq)) { 2933bad55843SDavid Chinner xfs_ifunlock(iq); 2934bad55843SDavid Chinner xfs_iunlock(iq, XFS_ILOCK_SHARED); 2935bad55843SDavid Chinner continue; 2936bad55843SDavid Chinner } 2937bad55843SDavid Chinner 2938bad55843SDavid Chinner /* 2939bad55843SDavid Chinner * arriving here means that this inode can be flushed. First 2940bad55843SDavid Chinner * re-check that it's dirty before flushing. 2941bad55843SDavid Chinner */ 294233540408SDavid Chinner if (!xfs_inode_clean(iq)) { 2943bad55843SDavid Chinner int error; 2944bad55843SDavid Chinner error = xfs_iflush_int(iq, bp); 2945bad55843SDavid Chinner if (error) { 2946bad55843SDavid Chinner xfs_iunlock(iq, XFS_ILOCK_SHARED); 2947bad55843SDavid Chinner goto cluster_corrupt_out; 2948bad55843SDavid Chinner } 2949bad55843SDavid Chinner clcount++; 2950bad55843SDavid Chinner } else { 2951bad55843SDavid Chinner xfs_ifunlock(iq); 2952bad55843SDavid Chinner } 2953bad55843SDavid Chinner xfs_iunlock(iq, XFS_ILOCK_SHARED); 2954bad55843SDavid Chinner } 2955bad55843SDavid Chinner 2956bad55843SDavid Chinner if (clcount) { 2957bad55843SDavid Chinner XFS_STATS_INC(xs_icluster_flushcnt); 2958bad55843SDavid Chinner XFS_STATS_ADD(xs_icluster_flushinode, clcount); 2959bad55843SDavid Chinner } 2960bad55843SDavid Chinner 2961bad55843SDavid Chinner out_free: 29621a3e8f3dSDave Chinner rcu_read_unlock(); 2963f0e2d93cSDenys Vlasenko kmem_free(ilist); 296444b56e0aSDave Chinner out_put: 296544b56e0aSDave Chinner xfs_perag_put(pag); 2966bad55843SDavid Chinner return 0; 2967bad55843SDavid Chinner 2968bad55843SDavid Chinner 2969bad55843SDavid Chinner cluster_corrupt_out: 2970bad55843SDavid Chinner /* 2971bad55843SDavid Chinner * Corruption detected in the clustering loop. Invalidate the 2972bad55843SDavid Chinner * inode buffer and shut down the filesystem. 2973bad55843SDavid Chinner */ 29741a3e8f3dSDave Chinner rcu_read_unlock(); 2975bad55843SDavid Chinner /* 297643ff2122SChristoph Hellwig * Clean up the buffer. If it was delwri, just release it -- 2977bad55843SDavid Chinner * brelse can handle it with no problems. If not, shut down the 2978bad55843SDavid Chinner * filesystem before releasing the buffer. 2979bad55843SDavid Chinner */ 298043ff2122SChristoph Hellwig bufwasdelwri = (bp->b_flags & _XBF_DELWRI_Q); 2981bad55843SDavid Chinner if (bufwasdelwri) 2982bad55843SDavid Chinner xfs_buf_relse(bp); 2983bad55843SDavid Chinner 2984bad55843SDavid Chinner xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 2985bad55843SDavid Chinner 2986bad55843SDavid Chinner if (!bufwasdelwri) { 2987bad55843SDavid Chinner /* 2988bad55843SDavid Chinner * Just like incore_relse: if we have b_iodone functions, 2989bad55843SDavid Chinner * mark the buffer as an error and call them. Otherwise 2990bad55843SDavid Chinner * mark it as stale and brelse. 2991bad55843SDavid Chinner */ 2992cb669ca5SChristoph Hellwig if (bp->b_iodone) { 2993bad55843SDavid Chinner XFS_BUF_UNDONE(bp); 2994c867cb61SChristoph Hellwig xfs_buf_stale(bp); 29955a52c2a5SChandra Seetharaman xfs_buf_ioerror(bp, EIO); 29961a1a3e97SChristoph Hellwig xfs_buf_ioend(bp, 0); 2997bad55843SDavid Chinner } else { 2998c867cb61SChristoph Hellwig xfs_buf_stale(bp); 2999bad55843SDavid Chinner xfs_buf_relse(bp); 3000bad55843SDavid Chinner } 3001bad55843SDavid Chinner } 3002bad55843SDavid Chinner 3003bad55843SDavid Chinner /* 3004bad55843SDavid Chinner * Unlocks the flush lock 3005bad55843SDavid Chinner */ 300604913fddSDave Chinner xfs_iflush_abort(iq, false); 3007f0e2d93cSDenys Vlasenko kmem_free(ilist); 300844b56e0aSDave Chinner xfs_perag_put(pag); 3009bad55843SDavid Chinner return XFS_ERROR(EFSCORRUPTED); 3010bad55843SDavid Chinner } 3011bad55843SDavid Chinner 30121da177e4SLinus Torvalds /* 30134c46819aSChristoph Hellwig * Flush dirty inode metadata into the backing buffer. 30144c46819aSChristoph Hellwig * 30154c46819aSChristoph Hellwig * The caller must have the inode lock and the inode flush lock held. The 30164c46819aSChristoph Hellwig * inode lock will still be held upon return to the caller, and the inode 30174c46819aSChristoph Hellwig * flush lock will be released after the inode has reached the disk. 30184c46819aSChristoph Hellwig * 30194c46819aSChristoph Hellwig * The caller must write out the buffer returned in *bpp and release it. 30201da177e4SLinus Torvalds */ 30211da177e4SLinus Torvalds int 30221da177e4SLinus Torvalds xfs_iflush( 30234c46819aSChristoph Hellwig struct xfs_inode *ip, 30244c46819aSChristoph Hellwig struct xfs_buf **bpp) 30251da177e4SLinus Torvalds { 30264c46819aSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 30274c46819aSChristoph Hellwig struct xfs_buf *bp; 30284c46819aSChristoph Hellwig struct xfs_dinode *dip; 30291da177e4SLinus Torvalds int error; 30301da177e4SLinus Torvalds 30311da177e4SLinus Torvalds XFS_STATS_INC(xs_iflush_count); 30321da177e4SLinus Torvalds 3033579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 3034474fce06SChristoph Hellwig ASSERT(xfs_isiflocked(ip)); 30351da177e4SLinus Torvalds ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 30368096b1ebSChristoph Hellwig ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); 30371da177e4SLinus Torvalds 30384c46819aSChristoph Hellwig *bpp = NULL; 30391da177e4SLinus Torvalds 30401da177e4SLinus Torvalds xfs_iunpin_wait(ip); 30411da177e4SLinus Torvalds 30421da177e4SLinus Torvalds /* 30434b6a4688SDave Chinner * For stale inodes we cannot rely on the backing buffer remaining 30444b6a4688SDave Chinner * stale in cache for the remaining life of the stale inode and so 3045475ee413SChristoph Hellwig * xfs_imap_to_bp() below may give us a buffer that no longer contains 30464b6a4688SDave Chinner * inodes below. We have to check this after ensuring the inode is 30474b6a4688SDave Chinner * unpinned so that it is safe to reclaim the stale inode after the 30484b6a4688SDave Chinner * flush call. 30494b6a4688SDave Chinner */ 30504b6a4688SDave Chinner if (xfs_iflags_test(ip, XFS_ISTALE)) { 30514b6a4688SDave Chinner xfs_ifunlock(ip); 30524b6a4688SDave Chinner return 0; 30534b6a4688SDave Chinner } 30544b6a4688SDave Chinner 30554b6a4688SDave Chinner /* 30561da177e4SLinus Torvalds * This may have been unpinned because the filesystem is shutting 30571da177e4SLinus Torvalds * down forcibly. If that's the case we must not write this inode 305832ce90a4SChristoph Hellwig * to disk, because the log record didn't make it to disk. 305932ce90a4SChristoph Hellwig * 306032ce90a4SChristoph Hellwig * We also have to remove the log item from the AIL in this case, 306132ce90a4SChristoph Hellwig * as we wait for an empty AIL as part of the unmount process. 30621da177e4SLinus Torvalds */ 30631da177e4SLinus Torvalds if (XFS_FORCED_SHUTDOWN(mp)) { 306432ce90a4SChristoph Hellwig error = XFS_ERROR(EIO); 306532ce90a4SChristoph Hellwig goto abort_out; 30661da177e4SLinus Torvalds } 30671da177e4SLinus Torvalds 30681da177e4SLinus Torvalds /* 3069a3f74ffbSDavid Chinner * Get the buffer containing the on-disk inode. 3070a3f74ffbSDavid Chinner */ 3071475ee413SChristoph Hellwig error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK, 3072475ee413SChristoph Hellwig 0); 3073a3f74ffbSDavid Chinner if (error || !bp) { 3074a3f74ffbSDavid Chinner xfs_ifunlock(ip); 3075a3f74ffbSDavid Chinner return error; 3076a3f74ffbSDavid Chinner } 3077a3f74ffbSDavid Chinner 3078a3f74ffbSDavid Chinner /* 30791da177e4SLinus Torvalds * First flush out the inode that xfs_iflush was called with. 30801da177e4SLinus Torvalds */ 30811da177e4SLinus Torvalds error = xfs_iflush_int(ip, bp); 3082bad55843SDavid Chinner if (error) 30831da177e4SLinus Torvalds goto corrupt_out; 30841da177e4SLinus Torvalds 30851da177e4SLinus Torvalds /* 3086a3f74ffbSDavid Chinner * If the buffer is pinned then push on the log now so we won't 3087a3f74ffbSDavid Chinner * get stuck waiting in the write for too long. 3088a3f74ffbSDavid Chinner */ 3089811e64c7SChandra Seetharaman if (xfs_buf_ispinned(bp)) 3090a14a348bSChristoph Hellwig xfs_log_force(mp, 0); 3091a3f74ffbSDavid Chinner 3092a3f74ffbSDavid Chinner /* 30931da177e4SLinus Torvalds * inode clustering: 30941da177e4SLinus Torvalds * see if other inodes can be gathered into this write 30951da177e4SLinus Torvalds */ 3096bad55843SDavid Chinner error = xfs_iflush_cluster(ip, bp); 3097bad55843SDavid Chinner if (error) 30981da177e4SLinus Torvalds goto cluster_corrupt_out; 30991da177e4SLinus Torvalds 31004c46819aSChristoph Hellwig *bpp = bp; 31014c46819aSChristoph Hellwig return 0; 31021da177e4SLinus Torvalds 31031da177e4SLinus Torvalds corrupt_out: 31041da177e4SLinus Torvalds xfs_buf_relse(bp); 31057d04a335SNathan Scott xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 31061da177e4SLinus Torvalds cluster_corrupt_out: 310732ce90a4SChristoph Hellwig error = XFS_ERROR(EFSCORRUPTED); 310832ce90a4SChristoph Hellwig abort_out: 31091da177e4SLinus Torvalds /* 31101da177e4SLinus Torvalds * Unlocks the flush lock 31111da177e4SLinus Torvalds */ 311204913fddSDave Chinner xfs_iflush_abort(ip, false); 311332ce90a4SChristoph Hellwig return error; 31141da177e4SLinus Torvalds } 31151da177e4SLinus Torvalds 31161da177e4SLinus Torvalds STATIC int 31171da177e4SLinus Torvalds xfs_iflush_int( 311893848a99SChristoph Hellwig struct xfs_inode *ip, 311993848a99SChristoph Hellwig struct xfs_buf *bp) 31201da177e4SLinus Torvalds { 312193848a99SChristoph Hellwig struct xfs_inode_log_item *iip = ip->i_itemp; 312293848a99SChristoph Hellwig struct xfs_dinode *dip; 312393848a99SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 31241da177e4SLinus Torvalds 3125579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 3126474fce06SChristoph Hellwig ASSERT(xfs_isiflocked(ip)); 31271da177e4SLinus Torvalds ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 31288096b1ebSChristoph Hellwig ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); 312993848a99SChristoph Hellwig ASSERT(iip != NULL && iip->ili_fields != 0); 31301da177e4SLinus Torvalds 31311da177e4SLinus Torvalds /* set *dip = inode's place in the buffer */ 313292bfc6e7SChristoph Hellwig dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); 31331da177e4SLinus Torvalds 313469ef921bSChristoph Hellwig if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), 31351da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { 31366a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 31376a19d939SDave Chinner "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p", 31386a19d939SDave Chinner __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip); 31391da177e4SLinus Torvalds goto corrupt_out; 31401da177e4SLinus Torvalds } 31411da177e4SLinus Torvalds if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC, 31421da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) { 31436a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 31446a19d939SDave Chinner "%s: Bad inode %Lu, ptr 0x%p, magic number 0x%x", 31456a19d939SDave Chinner __func__, ip->i_ino, ip, ip->i_d.di_magic); 31461da177e4SLinus Torvalds goto corrupt_out; 31471da177e4SLinus Torvalds } 3148abbede1bSAl Viro if (S_ISREG(ip->i_d.di_mode)) { 31491da177e4SLinus Torvalds if (XFS_TEST_ERROR( 31501da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && 31511da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), 31521da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) { 31536a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 31546a19d939SDave Chinner "%s: Bad regular inode %Lu, ptr 0x%p", 31556a19d939SDave Chinner __func__, ip->i_ino, ip); 31561da177e4SLinus Torvalds goto corrupt_out; 31571da177e4SLinus Torvalds } 3158abbede1bSAl Viro } else if (S_ISDIR(ip->i_d.di_mode)) { 31591da177e4SLinus Torvalds if (XFS_TEST_ERROR( 31601da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && 31611da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && 31621da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), 31631da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) { 31646a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 31656a19d939SDave Chinner "%s: Bad directory inode %Lu, ptr 0x%p", 31666a19d939SDave Chinner __func__, ip->i_ino, ip); 31671da177e4SLinus Torvalds goto corrupt_out; 31681da177e4SLinus Torvalds } 31691da177e4SLinus Torvalds } 31701da177e4SLinus Torvalds if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents > 31711da177e4SLinus Torvalds ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5, 31721da177e4SLinus Torvalds XFS_RANDOM_IFLUSH_5)) { 31736a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 31746a19d939SDave Chinner "%s: detected corrupt incore inode %Lu, " 31756a19d939SDave Chinner "total extents = %d, nblocks = %Ld, ptr 0x%p", 31766a19d939SDave Chinner __func__, ip->i_ino, 31771da177e4SLinus Torvalds ip->i_d.di_nextents + ip->i_d.di_anextents, 31786a19d939SDave Chinner ip->i_d.di_nblocks, ip); 31791da177e4SLinus Torvalds goto corrupt_out; 31801da177e4SLinus Torvalds } 31811da177e4SLinus Torvalds if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, 31821da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) { 31836a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 31846a19d939SDave Chinner "%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p", 31856a19d939SDave Chinner __func__, ip->i_ino, ip->i_d.di_forkoff, ip); 31861da177e4SLinus Torvalds goto corrupt_out; 31871da177e4SLinus Torvalds } 3188e60896d8SDave Chinner 31891da177e4SLinus Torvalds /* 3190e60896d8SDave Chinner * Inode item log recovery for v1/v2 inodes are dependent on the 3191e60896d8SDave Chinner * di_flushiter count for correct sequencing. We bump the flush 3192e60896d8SDave Chinner * iteration count so we can detect flushes which postdate a log record 3193e60896d8SDave Chinner * during recovery. This is redundant as we now log every change and 3194e60896d8SDave Chinner * hence this can't happen but we need to still do it to ensure 3195e60896d8SDave Chinner * backwards compatibility with old kernels that predate logging all 3196e60896d8SDave Chinner * inode changes. 31971da177e4SLinus Torvalds */ 3198e60896d8SDave Chinner if (ip->i_d.di_version < 3) 31991da177e4SLinus Torvalds ip->i_d.di_flushiter++; 32001da177e4SLinus Torvalds 32011da177e4SLinus Torvalds /* 32021da177e4SLinus Torvalds * Copy the dirty parts of the inode into the on-disk 32031da177e4SLinus Torvalds * inode. We always copy out the core of the inode, 32041da177e4SLinus Torvalds * because if the inode is dirty at all the core must 32051da177e4SLinus Torvalds * be. 32061da177e4SLinus Torvalds */ 320781591fe2SChristoph Hellwig xfs_dinode_to_disk(dip, &ip->i_d); 32081da177e4SLinus Torvalds 32091da177e4SLinus Torvalds /* Wrap, we never let the log put out DI_MAX_FLUSH */ 32101da177e4SLinus Torvalds if (ip->i_d.di_flushiter == DI_MAX_FLUSH) 32111da177e4SLinus Torvalds ip->i_d.di_flushiter = 0; 32121da177e4SLinus Torvalds 32131da177e4SLinus Torvalds /* 32141da177e4SLinus Torvalds * If this is really an old format inode and the superblock version 32151da177e4SLinus Torvalds * has not been updated to support only new format inodes, then 32161da177e4SLinus Torvalds * convert back to the old inode format. If the superblock version 32171da177e4SLinus Torvalds * has been updated, then make the conversion permanent. 32181da177e4SLinus Torvalds */ 321951ce16d5SChristoph Hellwig ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb)); 322051ce16d5SChristoph Hellwig if (ip->i_d.di_version == 1) { 322162118709SEric Sandeen if (!xfs_sb_version_hasnlink(&mp->m_sb)) { 32221da177e4SLinus Torvalds /* 32231da177e4SLinus Torvalds * Convert it back. 32241da177e4SLinus Torvalds */ 32251da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); 322681591fe2SChristoph Hellwig dip->di_onlink = cpu_to_be16(ip->i_d.di_nlink); 32271da177e4SLinus Torvalds } else { 32281da177e4SLinus Torvalds /* 32291da177e4SLinus Torvalds * The superblock version has already been bumped, 32301da177e4SLinus Torvalds * so just make the conversion to the new inode 32311da177e4SLinus Torvalds * format permanent. 32321da177e4SLinus Torvalds */ 323351ce16d5SChristoph Hellwig ip->i_d.di_version = 2; 323451ce16d5SChristoph Hellwig dip->di_version = 2; 32351da177e4SLinus Torvalds ip->i_d.di_onlink = 0; 323681591fe2SChristoph Hellwig dip->di_onlink = 0; 32371da177e4SLinus Torvalds memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 323881591fe2SChristoph Hellwig memset(&(dip->di_pad[0]), 0, 323981591fe2SChristoph Hellwig sizeof(dip->di_pad)); 32406743099cSArkadiusz Mi?kiewicz ASSERT(xfs_get_projid(ip) == 0); 32411da177e4SLinus Torvalds } 32421da177e4SLinus Torvalds } 32431da177e4SLinus Torvalds 3244e4ac967bSDavid Chinner xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp); 3245e4ac967bSDavid Chinner if (XFS_IFORK_Q(ip)) 3246e4ac967bSDavid Chinner xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); 32471da177e4SLinus Torvalds xfs_inobp_check(mp, bp); 32481da177e4SLinus Torvalds 32491da177e4SLinus Torvalds /* 3250f5d8d5c4SChristoph Hellwig * We've recorded everything logged in the inode, so we'd like to clear 3251f5d8d5c4SChristoph Hellwig * the ili_fields bits so we don't log and flush things unnecessarily. 3252f5d8d5c4SChristoph Hellwig * However, we can't stop logging all this information until the data 3253f5d8d5c4SChristoph Hellwig * we've copied into the disk buffer is written to disk. If we did we 3254f5d8d5c4SChristoph Hellwig * might overwrite the copy of the inode in the log with all the data 3255f5d8d5c4SChristoph Hellwig * after re-logging only part of it, and in the face of a crash we 3256f5d8d5c4SChristoph Hellwig * wouldn't have all the data we need to recover. 32571da177e4SLinus Torvalds * 3258f5d8d5c4SChristoph Hellwig * What we do is move the bits to the ili_last_fields field. When 3259f5d8d5c4SChristoph Hellwig * logging the inode, these bits are moved back to the ili_fields field. 3260f5d8d5c4SChristoph Hellwig * In the xfs_iflush_done() routine we clear ili_last_fields, since we 3261f5d8d5c4SChristoph Hellwig * know that the information those bits represent is permanently on 3262f5d8d5c4SChristoph Hellwig * disk. As long as the flush completes before the inode is logged 3263f5d8d5c4SChristoph Hellwig * again, then both ili_fields and ili_last_fields will be cleared. 32641da177e4SLinus Torvalds * 3265f5d8d5c4SChristoph Hellwig * We can play with the ili_fields bits here, because the inode lock 3266f5d8d5c4SChristoph Hellwig * must be held exclusively in order to set bits there and the flush 3267f5d8d5c4SChristoph Hellwig * lock protects the ili_last_fields bits. Set ili_logged so the flush 3268f5d8d5c4SChristoph Hellwig * done routine can tell whether or not to look in the AIL. Also, store 3269f5d8d5c4SChristoph Hellwig * the current LSN of the inode so that we can tell whether the item has 3270f5d8d5c4SChristoph Hellwig * moved in the AIL from xfs_iflush_done(). In order to read the lsn we 3271f5d8d5c4SChristoph Hellwig * need the AIL lock, because it is a 64 bit value that cannot be read 3272f5d8d5c4SChristoph Hellwig * atomically. 32731da177e4SLinus Torvalds */ 3274f5d8d5c4SChristoph Hellwig iip->ili_last_fields = iip->ili_fields; 3275f5d8d5c4SChristoph Hellwig iip->ili_fields = 0; 32761da177e4SLinus Torvalds iip->ili_logged = 1; 32771da177e4SLinus Torvalds 32787b2e2a31SDavid Chinner xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 32797b2e2a31SDavid Chinner &iip->ili_item.li_lsn); 32801da177e4SLinus Torvalds 32811da177e4SLinus Torvalds /* 32821da177e4SLinus Torvalds * Attach the function xfs_iflush_done to the inode's 32831da177e4SLinus Torvalds * buffer. This will remove the inode from the AIL 32841da177e4SLinus Torvalds * and unlock the inode's flush lock when the inode is 32851da177e4SLinus Torvalds * completely written to disk. 32861da177e4SLinus Torvalds */ 3287ca30b2a7SChristoph Hellwig xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item); 32881da177e4SLinus Torvalds 328993848a99SChristoph Hellwig /* update the lsn in the on disk inode if required */ 329093848a99SChristoph Hellwig if (ip->i_d.di_version == 3) 329193848a99SChristoph Hellwig dip->di_lsn = cpu_to_be64(iip->ili_item.li_lsn); 329293848a99SChristoph Hellwig 329393848a99SChristoph Hellwig /* generate the checksum. */ 329493848a99SChristoph Hellwig xfs_dinode_calc_crc(mp, dip); 329593848a99SChristoph Hellwig 3296adadbeefSChristoph Hellwig ASSERT(bp->b_fspriv != NULL); 3297cb669ca5SChristoph Hellwig ASSERT(bp->b_iodone != NULL); 32981da177e4SLinus Torvalds return 0; 32991da177e4SLinus Torvalds 33001da177e4SLinus Torvalds corrupt_out: 33011da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 33021da177e4SLinus Torvalds } 3303