11da177e4SLinus Torvalds /* 23e57ecf6SOlaf Weber * Copyright (c) 2000-2006 Silicon Graphics, Inc. 37b718769SNathan Scott * All Rights Reserved. 41da177e4SLinus Torvalds * 57b718769SNathan Scott * This program is free software; you can redistribute it and/or 67b718769SNathan Scott * modify it under the terms of the GNU General Public License as 71da177e4SLinus Torvalds * published by the Free Software Foundation. 81da177e4SLinus Torvalds * 97b718769SNathan Scott * This program is distributed in the hope that it would be useful, 107b718769SNathan Scott * but WITHOUT ANY WARRANTY; without even the implied warranty of 117b718769SNathan Scott * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 127b718769SNathan Scott * GNU General Public License for more details. 131da177e4SLinus Torvalds * 147b718769SNathan Scott * You should have received a copy of the GNU General Public License 157b718769SNathan Scott * along with this program; if not, write the Free Software Foundation, 167b718769SNathan Scott * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 171da177e4SLinus Torvalds */ 1840ebd81dSRobert P. J. Day #include <linux/log2.h> 1940ebd81dSRobert P. J. Day 201da177e4SLinus Torvalds #include "xfs.h" 21a844f451SNathan Scott #include "xfs_fs.h" 2270a9883cSDave Chinner #include "xfs_shared.h" 23239880efSDave Chinner #include "xfs_format.h" 24239880efSDave Chinner #include "xfs_log_format.h" 25239880efSDave Chinner #include "xfs_trans_resv.h" 26a844f451SNathan Scott #include "xfs_inum.h" 271da177e4SLinus Torvalds #include "xfs_sb.h" 281da177e4SLinus Torvalds #include "xfs_ag.h" 291da177e4SLinus Torvalds #include "xfs_mount.h" 30a4fbe6abSDave Chinner #include "xfs_inode.h" 3157062787SDave Chinner #include "xfs_da_format.h" 32c24b5dfaSDave Chinner #include "xfs_da_btree.h" 33c24b5dfaSDave Chinner #include "xfs_dir2.h" 34a844f451SNathan Scott #include "xfs_attr_sf.h" 35c24b5dfaSDave Chinner #include "xfs_attr.h" 36239880efSDave Chinner #include "xfs_trans_space.h" 37239880efSDave Chinner #include "xfs_trans.h" 381da177e4SLinus Torvalds #include "xfs_buf_item.h" 39a844f451SNathan Scott #include "xfs_inode_item.h" 40a844f451SNathan Scott #include "xfs_ialloc.h" 41a844f451SNathan Scott #include "xfs_bmap.h" 4268988114SDave Chinner #include "xfs_bmap_util.h" 431da177e4SLinus Torvalds #include "xfs_error.h" 441da177e4SLinus Torvalds #include "xfs_quota.h" 45a4fbe6abSDave Chinner #include "xfs_dinode.h" 462a82b8beSDavid Chinner #include "xfs_filestream.h" 4793848a99SChristoph Hellwig #include "xfs_cksum.h" 480b1b213fSChristoph Hellwig #include "xfs_trace.h" 4933479e05SDave Chinner #include "xfs_icache.h" 50c24b5dfaSDave Chinner #include "xfs_symlink.h" 51239880efSDave Chinner #include "xfs_trans_priv.h" 52239880efSDave Chinner #include "xfs_log.h" 53a4fbe6abSDave Chinner #include "xfs_bmap_btree.h" 541da177e4SLinus Torvalds 551da177e4SLinus Torvalds kmem_zone_t *xfs_inode_zone; 561da177e4SLinus Torvalds 571da177e4SLinus Torvalds /* 588f04c47aSChristoph Hellwig * Used in xfs_itruncate_extents(). This is the maximum number of extents 591da177e4SLinus Torvalds * freed from a file in a single transaction. 601da177e4SLinus Torvalds */ 611da177e4SLinus Torvalds #define XFS_ITRUNC_MAX_EXTENTS 2 621da177e4SLinus Torvalds 631da177e4SLinus Torvalds STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *); 641da177e4SLinus Torvalds 652a0ec1d9SDave Chinner /* 662a0ec1d9SDave Chinner * helper function to extract extent size hint from inode 672a0ec1d9SDave Chinner */ 682a0ec1d9SDave Chinner xfs_extlen_t 692a0ec1d9SDave Chinner xfs_get_extsz_hint( 702a0ec1d9SDave Chinner struct xfs_inode *ip) 712a0ec1d9SDave Chinner { 722a0ec1d9SDave Chinner if ((ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) && ip->i_d.di_extsize) 732a0ec1d9SDave Chinner return ip->i_d.di_extsize; 742a0ec1d9SDave Chinner if (XFS_IS_REALTIME_INODE(ip)) 752a0ec1d9SDave Chinner return ip->i_mount->m_sb.sb_rextsize; 762a0ec1d9SDave Chinner return 0; 772a0ec1d9SDave Chinner } 782a0ec1d9SDave Chinner 79fa96acadSDave Chinner /* 80*efa70be1SChristoph Hellwig * These two are wrapper routines around the xfs_ilock() routine used to 81*efa70be1SChristoph Hellwig * centralize some grungy code. They are used in places that wish to lock the 82*efa70be1SChristoph Hellwig * inode solely for reading the extents. The reason these places can't just 83*efa70be1SChristoph Hellwig * call xfs_ilock(ip, XFS_ILOCK_SHARED) is that the inode lock also guards to 84*efa70be1SChristoph Hellwig * bringing in of the extents from disk for a file in b-tree format. If the 85*efa70be1SChristoph Hellwig * inode is in b-tree format, then we need to lock the inode exclusively until 86*efa70be1SChristoph Hellwig * the extents are read in. Locking it exclusively all the time would limit 87*efa70be1SChristoph Hellwig * our parallelism unnecessarily, though. What we do instead is check to see 88*efa70be1SChristoph Hellwig * if the extents have been read in yet, and only lock the inode exclusively 89*efa70be1SChristoph Hellwig * if they have not. 90fa96acadSDave Chinner * 91*efa70be1SChristoph Hellwig * The functions return a value which should be given to the corresponding 9201f4f327SChristoph Hellwig * xfs_iunlock() call. 93fa96acadSDave Chinner */ 94fa96acadSDave Chinner uint 95309ecac8SChristoph Hellwig xfs_ilock_data_map_shared( 96309ecac8SChristoph Hellwig struct xfs_inode *ip) 97fa96acadSDave Chinner { 98309ecac8SChristoph Hellwig uint lock_mode = XFS_ILOCK_SHARED; 99fa96acadSDave Chinner 100309ecac8SChristoph Hellwig if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE && 101309ecac8SChristoph Hellwig (ip->i_df.if_flags & XFS_IFEXTENTS) == 0) 102fa96acadSDave Chinner lock_mode = XFS_ILOCK_EXCL; 103fa96acadSDave Chinner xfs_ilock(ip, lock_mode); 104fa96acadSDave Chinner return lock_mode; 105fa96acadSDave Chinner } 106fa96acadSDave Chinner 107*efa70be1SChristoph Hellwig uint 108*efa70be1SChristoph Hellwig xfs_ilock_attr_map_shared( 109*efa70be1SChristoph Hellwig struct xfs_inode *ip) 110*efa70be1SChristoph Hellwig { 111*efa70be1SChristoph Hellwig uint lock_mode = XFS_ILOCK_SHARED; 112*efa70be1SChristoph Hellwig 113*efa70be1SChristoph Hellwig if (ip->i_d.di_aformat == XFS_DINODE_FMT_BTREE && 114*efa70be1SChristoph Hellwig (ip->i_afp->if_flags & XFS_IFEXTENTS) == 0) 115*efa70be1SChristoph Hellwig lock_mode = XFS_ILOCK_EXCL; 116*efa70be1SChristoph Hellwig xfs_ilock(ip, lock_mode); 117*efa70be1SChristoph Hellwig return lock_mode; 118*efa70be1SChristoph Hellwig } 119*efa70be1SChristoph Hellwig 120fa96acadSDave Chinner /* 121fa96acadSDave Chinner * The xfs inode contains 2 locks: a multi-reader lock called the 122fa96acadSDave Chinner * i_iolock and a multi-reader lock called the i_lock. This routine 123fa96acadSDave Chinner * allows either or both of the locks to be obtained. 124fa96acadSDave Chinner * 125fa96acadSDave Chinner * The 2 locks should always be ordered so that the IO lock is 126fa96acadSDave Chinner * obtained first in order to prevent deadlock. 127fa96acadSDave Chinner * 128fa96acadSDave Chinner * ip -- the inode being locked 129fa96acadSDave Chinner * lock_flags -- this parameter indicates the inode's locks 130fa96acadSDave Chinner * to be locked. It can be: 131fa96acadSDave Chinner * XFS_IOLOCK_SHARED, 132fa96acadSDave Chinner * XFS_IOLOCK_EXCL, 133fa96acadSDave Chinner * XFS_ILOCK_SHARED, 134fa96acadSDave Chinner * XFS_ILOCK_EXCL, 135fa96acadSDave Chinner * XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED, 136fa96acadSDave Chinner * XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL, 137fa96acadSDave Chinner * XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED, 138fa96acadSDave Chinner * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL 139fa96acadSDave Chinner */ 140fa96acadSDave Chinner void 141fa96acadSDave Chinner xfs_ilock( 142fa96acadSDave Chinner xfs_inode_t *ip, 143fa96acadSDave Chinner uint lock_flags) 144fa96acadSDave Chinner { 145fa96acadSDave Chinner trace_xfs_ilock(ip, lock_flags, _RET_IP_); 146fa96acadSDave Chinner 147fa96acadSDave Chinner /* 148fa96acadSDave Chinner * You can't set both SHARED and EXCL for the same lock, 149fa96acadSDave Chinner * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, 150fa96acadSDave Chinner * and XFS_ILOCK_EXCL are valid values to set in lock_flags. 151fa96acadSDave Chinner */ 152fa96acadSDave Chinner ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != 153fa96acadSDave Chinner (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 154fa96acadSDave Chinner ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 155fa96acadSDave Chinner (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 156fa96acadSDave Chinner ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 157fa96acadSDave Chinner 158fa96acadSDave Chinner if (lock_flags & XFS_IOLOCK_EXCL) 159fa96acadSDave Chinner mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); 160fa96acadSDave Chinner else if (lock_flags & XFS_IOLOCK_SHARED) 161fa96acadSDave Chinner mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); 162fa96acadSDave Chinner 163fa96acadSDave Chinner if (lock_flags & XFS_ILOCK_EXCL) 164fa96acadSDave Chinner mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); 165fa96acadSDave Chinner else if (lock_flags & XFS_ILOCK_SHARED) 166fa96acadSDave Chinner mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); 167fa96acadSDave Chinner } 168fa96acadSDave Chinner 169fa96acadSDave Chinner /* 170fa96acadSDave Chinner * This is just like xfs_ilock(), except that the caller 171fa96acadSDave Chinner * is guaranteed not to sleep. It returns 1 if it gets 172fa96acadSDave Chinner * the requested locks and 0 otherwise. If the IO lock is 173fa96acadSDave Chinner * obtained but the inode lock cannot be, then the IO lock 174fa96acadSDave Chinner * is dropped before returning. 175fa96acadSDave Chinner * 176fa96acadSDave Chinner * ip -- the inode being locked 177fa96acadSDave Chinner * lock_flags -- this parameter indicates the inode's locks to be 178fa96acadSDave Chinner * to be locked. See the comment for xfs_ilock() for a list 179fa96acadSDave Chinner * of valid values. 180fa96acadSDave Chinner */ 181fa96acadSDave Chinner int 182fa96acadSDave Chinner xfs_ilock_nowait( 183fa96acadSDave Chinner xfs_inode_t *ip, 184fa96acadSDave Chinner uint lock_flags) 185fa96acadSDave Chinner { 186fa96acadSDave Chinner trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_); 187fa96acadSDave Chinner 188fa96acadSDave Chinner /* 189fa96acadSDave Chinner * You can't set both SHARED and EXCL for the same lock, 190fa96acadSDave Chinner * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, 191fa96acadSDave Chinner * and XFS_ILOCK_EXCL are valid values to set in lock_flags. 192fa96acadSDave Chinner */ 193fa96acadSDave Chinner ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != 194fa96acadSDave Chinner (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 195fa96acadSDave Chinner ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 196fa96acadSDave Chinner (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 197fa96acadSDave Chinner ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 198fa96acadSDave Chinner 199fa96acadSDave Chinner if (lock_flags & XFS_IOLOCK_EXCL) { 200fa96acadSDave Chinner if (!mrtryupdate(&ip->i_iolock)) 201fa96acadSDave Chinner goto out; 202fa96acadSDave Chinner } else if (lock_flags & XFS_IOLOCK_SHARED) { 203fa96acadSDave Chinner if (!mrtryaccess(&ip->i_iolock)) 204fa96acadSDave Chinner goto out; 205fa96acadSDave Chinner } 206fa96acadSDave Chinner if (lock_flags & XFS_ILOCK_EXCL) { 207fa96acadSDave Chinner if (!mrtryupdate(&ip->i_lock)) 208fa96acadSDave Chinner goto out_undo_iolock; 209fa96acadSDave Chinner } else if (lock_flags & XFS_ILOCK_SHARED) { 210fa96acadSDave Chinner if (!mrtryaccess(&ip->i_lock)) 211fa96acadSDave Chinner goto out_undo_iolock; 212fa96acadSDave Chinner } 213fa96acadSDave Chinner return 1; 214fa96acadSDave Chinner 215fa96acadSDave Chinner out_undo_iolock: 216fa96acadSDave Chinner if (lock_flags & XFS_IOLOCK_EXCL) 217fa96acadSDave Chinner mrunlock_excl(&ip->i_iolock); 218fa96acadSDave Chinner else if (lock_flags & XFS_IOLOCK_SHARED) 219fa96acadSDave Chinner mrunlock_shared(&ip->i_iolock); 220fa96acadSDave Chinner out: 221fa96acadSDave Chinner return 0; 222fa96acadSDave Chinner } 223fa96acadSDave Chinner 224fa96acadSDave Chinner /* 225fa96acadSDave Chinner * xfs_iunlock() is used to drop the inode locks acquired with 226fa96acadSDave Chinner * xfs_ilock() and xfs_ilock_nowait(). The caller must pass 227fa96acadSDave Chinner * in the flags given to xfs_ilock() or xfs_ilock_nowait() so 228fa96acadSDave Chinner * that we know which locks to drop. 229fa96acadSDave Chinner * 230fa96acadSDave Chinner * ip -- the inode being unlocked 231fa96acadSDave Chinner * lock_flags -- this parameter indicates the inode's locks to be 232fa96acadSDave Chinner * to be unlocked. See the comment for xfs_ilock() for a list 233fa96acadSDave Chinner * of valid values for this parameter. 234fa96acadSDave Chinner * 235fa96acadSDave Chinner */ 236fa96acadSDave Chinner void 237fa96acadSDave Chinner xfs_iunlock( 238fa96acadSDave Chinner xfs_inode_t *ip, 239fa96acadSDave Chinner uint lock_flags) 240fa96acadSDave Chinner { 241fa96acadSDave Chinner /* 242fa96acadSDave Chinner * You can't set both SHARED and EXCL for the same lock, 243fa96acadSDave Chinner * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, 244fa96acadSDave Chinner * and XFS_ILOCK_EXCL are valid values to set in lock_flags. 245fa96acadSDave Chinner */ 246fa96acadSDave Chinner ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != 247fa96acadSDave Chinner (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 248fa96acadSDave Chinner ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 249fa96acadSDave Chinner (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 250fa96acadSDave Chinner ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 251fa96acadSDave Chinner ASSERT(lock_flags != 0); 252fa96acadSDave Chinner 253fa96acadSDave Chinner if (lock_flags & XFS_IOLOCK_EXCL) 254fa96acadSDave Chinner mrunlock_excl(&ip->i_iolock); 255fa96acadSDave Chinner else if (lock_flags & XFS_IOLOCK_SHARED) 256fa96acadSDave Chinner mrunlock_shared(&ip->i_iolock); 257fa96acadSDave Chinner 258fa96acadSDave Chinner if (lock_flags & XFS_ILOCK_EXCL) 259fa96acadSDave Chinner mrunlock_excl(&ip->i_lock); 260fa96acadSDave Chinner else if (lock_flags & XFS_ILOCK_SHARED) 261fa96acadSDave Chinner mrunlock_shared(&ip->i_lock); 262fa96acadSDave Chinner 263fa96acadSDave Chinner trace_xfs_iunlock(ip, lock_flags, _RET_IP_); 264fa96acadSDave Chinner } 265fa96acadSDave Chinner 266fa96acadSDave Chinner /* 267fa96acadSDave Chinner * give up write locks. the i/o lock cannot be held nested 268fa96acadSDave Chinner * if it is being demoted. 269fa96acadSDave Chinner */ 270fa96acadSDave Chinner void 271fa96acadSDave Chinner xfs_ilock_demote( 272fa96acadSDave Chinner xfs_inode_t *ip, 273fa96acadSDave Chinner uint lock_flags) 274fa96acadSDave Chinner { 275fa96acadSDave Chinner ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)); 276fa96acadSDave Chinner ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0); 277fa96acadSDave Chinner 278fa96acadSDave Chinner if (lock_flags & XFS_ILOCK_EXCL) 279fa96acadSDave Chinner mrdemote(&ip->i_lock); 280fa96acadSDave Chinner if (lock_flags & XFS_IOLOCK_EXCL) 281fa96acadSDave Chinner mrdemote(&ip->i_iolock); 282fa96acadSDave Chinner 283fa96acadSDave Chinner trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_); 284fa96acadSDave Chinner } 285fa96acadSDave Chinner 286742ae1e3SDave Chinner #if defined(DEBUG) || defined(XFS_WARN) 287fa96acadSDave Chinner int 288fa96acadSDave Chinner xfs_isilocked( 289fa96acadSDave Chinner xfs_inode_t *ip, 290fa96acadSDave Chinner uint lock_flags) 291fa96acadSDave Chinner { 292fa96acadSDave Chinner if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) { 293fa96acadSDave Chinner if (!(lock_flags & XFS_ILOCK_SHARED)) 294fa96acadSDave Chinner return !!ip->i_lock.mr_writer; 295fa96acadSDave Chinner return rwsem_is_locked(&ip->i_lock.mr_lock); 296fa96acadSDave Chinner } 297fa96acadSDave Chinner 298fa96acadSDave Chinner if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) { 299fa96acadSDave Chinner if (!(lock_flags & XFS_IOLOCK_SHARED)) 300fa96acadSDave Chinner return !!ip->i_iolock.mr_writer; 301fa96acadSDave Chinner return rwsem_is_locked(&ip->i_iolock.mr_lock); 302fa96acadSDave Chinner } 303fa96acadSDave Chinner 304fa96acadSDave Chinner ASSERT(0); 305fa96acadSDave Chinner return 0; 306fa96acadSDave Chinner } 307fa96acadSDave Chinner #endif 308fa96acadSDave Chinner 309c24b5dfaSDave Chinner #ifdef DEBUG 310c24b5dfaSDave Chinner int xfs_locked_n; 311c24b5dfaSDave Chinner int xfs_small_retries; 312c24b5dfaSDave Chinner int xfs_middle_retries; 313c24b5dfaSDave Chinner int xfs_lots_retries; 314c24b5dfaSDave Chinner int xfs_lock_delays; 315c24b5dfaSDave Chinner #endif 316c24b5dfaSDave Chinner 317c24b5dfaSDave Chinner /* 318c24b5dfaSDave Chinner * Bump the subclass so xfs_lock_inodes() acquires each lock with 319c24b5dfaSDave Chinner * a different value 320c24b5dfaSDave Chinner */ 321c24b5dfaSDave Chinner static inline int 322c24b5dfaSDave Chinner xfs_lock_inumorder(int lock_mode, int subclass) 323c24b5dfaSDave Chinner { 324c24b5dfaSDave Chinner if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) 325c24b5dfaSDave Chinner lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT; 326c24b5dfaSDave Chinner if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) 327c24b5dfaSDave Chinner lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT; 328c24b5dfaSDave Chinner 329c24b5dfaSDave Chinner return lock_mode; 330c24b5dfaSDave Chinner } 331c24b5dfaSDave Chinner 332c24b5dfaSDave Chinner /* 333c24b5dfaSDave Chinner * The following routine will lock n inodes in exclusive mode. 334c24b5dfaSDave Chinner * We assume the caller calls us with the inodes in i_ino order. 335c24b5dfaSDave Chinner * 336c24b5dfaSDave Chinner * We need to detect deadlock where an inode that we lock 337c24b5dfaSDave Chinner * is in the AIL and we start waiting for another inode that is locked 338c24b5dfaSDave Chinner * by a thread in a long running transaction (such as truncate). This can 339c24b5dfaSDave Chinner * result in deadlock since the long running trans might need to wait 340c24b5dfaSDave Chinner * for the inode we just locked in order to push the tail and free space 341c24b5dfaSDave Chinner * in the log. 342c24b5dfaSDave Chinner */ 343c24b5dfaSDave Chinner void 344c24b5dfaSDave Chinner xfs_lock_inodes( 345c24b5dfaSDave Chinner xfs_inode_t **ips, 346c24b5dfaSDave Chinner int inodes, 347c24b5dfaSDave Chinner uint lock_mode) 348c24b5dfaSDave Chinner { 349c24b5dfaSDave Chinner int attempts = 0, i, j, try_lock; 350c24b5dfaSDave Chinner xfs_log_item_t *lp; 351c24b5dfaSDave Chinner 352c24b5dfaSDave Chinner ASSERT(ips && (inodes >= 2)); /* we need at least two */ 353c24b5dfaSDave Chinner 354c24b5dfaSDave Chinner try_lock = 0; 355c24b5dfaSDave Chinner i = 0; 356c24b5dfaSDave Chinner 357c24b5dfaSDave Chinner again: 358c24b5dfaSDave Chinner for (; i < inodes; i++) { 359c24b5dfaSDave Chinner ASSERT(ips[i]); 360c24b5dfaSDave Chinner 361c24b5dfaSDave Chinner if (i && (ips[i] == ips[i-1])) /* Already locked */ 362c24b5dfaSDave Chinner continue; 363c24b5dfaSDave Chinner 364c24b5dfaSDave Chinner /* 365c24b5dfaSDave Chinner * If try_lock is not set yet, make sure all locked inodes 366c24b5dfaSDave Chinner * are not in the AIL. 367c24b5dfaSDave Chinner * If any are, set try_lock to be used later. 368c24b5dfaSDave Chinner */ 369c24b5dfaSDave Chinner 370c24b5dfaSDave Chinner if (!try_lock) { 371c24b5dfaSDave Chinner for (j = (i - 1); j >= 0 && !try_lock; j--) { 372c24b5dfaSDave Chinner lp = (xfs_log_item_t *)ips[j]->i_itemp; 373c24b5dfaSDave Chinner if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 374c24b5dfaSDave Chinner try_lock++; 375c24b5dfaSDave Chinner } 376c24b5dfaSDave Chinner } 377c24b5dfaSDave Chinner } 378c24b5dfaSDave Chinner 379c24b5dfaSDave Chinner /* 380c24b5dfaSDave Chinner * If any of the previous locks we have locked is in the AIL, 381c24b5dfaSDave Chinner * we must TRY to get the second and subsequent locks. If 382c24b5dfaSDave Chinner * we can't get any, we must release all we have 383c24b5dfaSDave Chinner * and try again. 384c24b5dfaSDave Chinner */ 385c24b5dfaSDave Chinner 386c24b5dfaSDave Chinner if (try_lock) { 387c24b5dfaSDave Chinner /* try_lock must be 0 if i is 0. */ 388c24b5dfaSDave Chinner /* 389c24b5dfaSDave Chinner * try_lock means we have an inode locked 390c24b5dfaSDave Chinner * that is in the AIL. 391c24b5dfaSDave Chinner */ 392c24b5dfaSDave Chinner ASSERT(i != 0); 393c24b5dfaSDave Chinner if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) { 394c24b5dfaSDave Chinner attempts++; 395c24b5dfaSDave Chinner 396c24b5dfaSDave Chinner /* 397c24b5dfaSDave Chinner * Unlock all previous guys and try again. 398c24b5dfaSDave Chinner * xfs_iunlock will try to push the tail 399c24b5dfaSDave Chinner * if the inode is in the AIL. 400c24b5dfaSDave Chinner */ 401c24b5dfaSDave Chinner 402c24b5dfaSDave Chinner for(j = i - 1; j >= 0; j--) { 403c24b5dfaSDave Chinner 404c24b5dfaSDave Chinner /* 405c24b5dfaSDave Chinner * Check to see if we've already 406c24b5dfaSDave Chinner * unlocked this one. 407c24b5dfaSDave Chinner * Not the first one going back, 408c24b5dfaSDave Chinner * and the inode ptr is the same. 409c24b5dfaSDave Chinner */ 410c24b5dfaSDave Chinner if ((j != (i - 1)) && ips[j] == 411c24b5dfaSDave Chinner ips[j+1]) 412c24b5dfaSDave Chinner continue; 413c24b5dfaSDave Chinner 414c24b5dfaSDave Chinner xfs_iunlock(ips[j], lock_mode); 415c24b5dfaSDave Chinner } 416c24b5dfaSDave Chinner 417c24b5dfaSDave Chinner if ((attempts % 5) == 0) { 418c24b5dfaSDave Chinner delay(1); /* Don't just spin the CPU */ 419c24b5dfaSDave Chinner #ifdef DEBUG 420c24b5dfaSDave Chinner xfs_lock_delays++; 421c24b5dfaSDave Chinner #endif 422c24b5dfaSDave Chinner } 423c24b5dfaSDave Chinner i = 0; 424c24b5dfaSDave Chinner try_lock = 0; 425c24b5dfaSDave Chinner goto again; 426c24b5dfaSDave Chinner } 427c24b5dfaSDave Chinner } else { 428c24b5dfaSDave Chinner xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i)); 429c24b5dfaSDave Chinner } 430c24b5dfaSDave Chinner } 431c24b5dfaSDave Chinner 432c24b5dfaSDave Chinner #ifdef DEBUG 433c24b5dfaSDave Chinner if (attempts) { 434c24b5dfaSDave Chinner if (attempts < 5) xfs_small_retries++; 435c24b5dfaSDave Chinner else if (attempts < 100) xfs_middle_retries++; 436c24b5dfaSDave Chinner else xfs_lots_retries++; 437c24b5dfaSDave Chinner } else { 438c24b5dfaSDave Chinner xfs_locked_n++; 439c24b5dfaSDave Chinner } 440c24b5dfaSDave Chinner #endif 441c24b5dfaSDave Chinner } 442c24b5dfaSDave Chinner 443c24b5dfaSDave Chinner /* 444c24b5dfaSDave Chinner * xfs_lock_two_inodes() can only be used to lock one type of lock 445c24b5dfaSDave Chinner * at a time - the iolock or the ilock, but not both at once. If 446c24b5dfaSDave Chinner * we lock both at once, lockdep will report false positives saying 447c24b5dfaSDave Chinner * we have violated locking orders. 448c24b5dfaSDave Chinner */ 449c24b5dfaSDave Chinner void 450c24b5dfaSDave Chinner xfs_lock_two_inodes( 451c24b5dfaSDave Chinner xfs_inode_t *ip0, 452c24b5dfaSDave Chinner xfs_inode_t *ip1, 453c24b5dfaSDave Chinner uint lock_mode) 454c24b5dfaSDave Chinner { 455c24b5dfaSDave Chinner xfs_inode_t *temp; 456c24b5dfaSDave Chinner int attempts = 0; 457c24b5dfaSDave Chinner xfs_log_item_t *lp; 458c24b5dfaSDave Chinner 459c24b5dfaSDave Chinner if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) 460c24b5dfaSDave Chinner ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0); 461c24b5dfaSDave Chinner ASSERT(ip0->i_ino != ip1->i_ino); 462c24b5dfaSDave Chinner 463c24b5dfaSDave Chinner if (ip0->i_ino > ip1->i_ino) { 464c24b5dfaSDave Chinner temp = ip0; 465c24b5dfaSDave Chinner ip0 = ip1; 466c24b5dfaSDave Chinner ip1 = temp; 467c24b5dfaSDave Chinner } 468c24b5dfaSDave Chinner 469c24b5dfaSDave Chinner again: 470c24b5dfaSDave Chinner xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0)); 471c24b5dfaSDave Chinner 472c24b5dfaSDave Chinner /* 473c24b5dfaSDave Chinner * If the first lock we have locked is in the AIL, we must TRY to get 474c24b5dfaSDave Chinner * the second lock. If we can't get it, we must release the first one 475c24b5dfaSDave Chinner * and try again. 476c24b5dfaSDave Chinner */ 477c24b5dfaSDave Chinner lp = (xfs_log_item_t *)ip0->i_itemp; 478c24b5dfaSDave Chinner if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 479c24b5dfaSDave Chinner if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) { 480c24b5dfaSDave Chinner xfs_iunlock(ip0, lock_mode); 481c24b5dfaSDave Chinner if ((++attempts % 5) == 0) 482c24b5dfaSDave Chinner delay(1); /* Don't just spin the CPU */ 483c24b5dfaSDave Chinner goto again; 484c24b5dfaSDave Chinner } 485c24b5dfaSDave Chinner } else { 486c24b5dfaSDave Chinner xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1)); 487c24b5dfaSDave Chinner } 488c24b5dfaSDave Chinner } 489c24b5dfaSDave Chinner 490c24b5dfaSDave Chinner 491fa96acadSDave Chinner void 492fa96acadSDave Chinner __xfs_iflock( 493fa96acadSDave Chinner struct xfs_inode *ip) 494fa96acadSDave Chinner { 495fa96acadSDave Chinner wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT); 496fa96acadSDave Chinner DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT); 497fa96acadSDave Chinner 498fa96acadSDave Chinner do { 499fa96acadSDave Chinner prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 500fa96acadSDave Chinner if (xfs_isiflocked(ip)) 501fa96acadSDave Chinner io_schedule(); 502fa96acadSDave Chinner } while (!xfs_iflock_nowait(ip)); 503fa96acadSDave Chinner 504fa96acadSDave Chinner finish_wait(wq, &wait.wait); 505fa96acadSDave Chinner } 506fa96acadSDave Chinner 5071da177e4SLinus Torvalds STATIC uint 5081da177e4SLinus Torvalds _xfs_dic2xflags( 5091da177e4SLinus Torvalds __uint16_t di_flags) 5101da177e4SLinus Torvalds { 5111da177e4SLinus Torvalds uint flags = 0; 5121da177e4SLinus Torvalds 5131da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_ANY) { 5141da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_REALTIME) 5151da177e4SLinus Torvalds flags |= XFS_XFLAG_REALTIME; 5161da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_PREALLOC) 5171da177e4SLinus Torvalds flags |= XFS_XFLAG_PREALLOC; 5181da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_IMMUTABLE) 5191da177e4SLinus Torvalds flags |= XFS_XFLAG_IMMUTABLE; 5201da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_APPEND) 5211da177e4SLinus Torvalds flags |= XFS_XFLAG_APPEND; 5221da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_SYNC) 5231da177e4SLinus Torvalds flags |= XFS_XFLAG_SYNC; 5241da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_NOATIME) 5251da177e4SLinus Torvalds flags |= XFS_XFLAG_NOATIME; 5261da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_NODUMP) 5271da177e4SLinus Torvalds flags |= XFS_XFLAG_NODUMP; 5281da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_RTINHERIT) 5291da177e4SLinus Torvalds flags |= XFS_XFLAG_RTINHERIT; 5301da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_PROJINHERIT) 5311da177e4SLinus Torvalds flags |= XFS_XFLAG_PROJINHERIT; 5321da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_NOSYMLINKS) 5331da177e4SLinus Torvalds flags |= XFS_XFLAG_NOSYMLINKS; 534dd9f438eSNathan Scott if (di_flags & XFS_DIFLAG_EXTSIZE) 535dd9f438eSNathan Scott flags |= XFS_XFLAG_EXTSIZE; 536dd9f438eSNathan Scott if (di_flags & XFS_DIFLAG_EXTSZINHERIT) 537dd9f438eSNathan Scott flags |= XFS_XFLAG_EXTSZINHERIT; 538d3446eacSBarry Naujok if (di_flags & XFS_DIFLAG_NODEFRAG) 539d3446eacSBarry Naujok flags |= XFS_XFLAG_NODEFRAG; 5402a82b8beSDavid Chinner if (di_flags & XFS_DIFLAG_FILESTREAM) 5412a82b8beSDavid Chinner flags |= XFS_XFLAG_FILESTREAM; 5421da177e4SLinus Torvalds } 5431da177e4SLinus Torvalds 5441da177e4SLinus Torvalds return flags; 5451da177e4SLinus Torvalds } 5461da177e4SLinus Torvalds 5471da177e4SLinus Torvalds uint 5481da177e4SLinus Torvalds xfs_ip2xflags( 5491da177e4SLinus Torvalds xfs_inode_t *ip) 5501da177e4SLinus Torvalds { 551347d1c01SChristoph Hellwig xfs_icdinode_t *dic = &ip->i_d; 5521da177e4SLinus Torvalds 553a916e2bdSNathan Scott return _xfs_dic2xflags(dic->di_flags) | 55445ba598eSChristoph Hellwig (XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0); 5551da177e4SLinus Torvalds } 5561da177e4SLinus Torvalds 5571da177e4SLinus Torvalds uint 5581da177e4SLinus Torvalds xfs_dic2xflags( 55945ba598eSChristoph Hellwig xfs_dinode_t *dip) 5601da177e4SLinus Torvalds { 56181591fe2SChristoph Hellwig return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) | 56245ba598eSChristoph Hellwig (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0); 5631da177e4SLinus Torvalds } 5641da177e4SLinus Torvalds 5651da177e4SLinus Torvalds /* 566c24b5dfaSDave Chinner * Lookups up an inode from "name". If ci_name is not NULL, then a CI match 567c24b5dfaSDave Chinner * is allowed, otherwise it has to be an exact match. If a CI match is found, 568c24b5dfaSDave Chinner * ci_name->name will point to a the actual name (caller must free) or 569c24b5dfaSDave Chinner * will be set to NULL if an exact match is found. 570c24b5dfaSDave Chinner */ 571c24b5dfaSDave Chinner int 572c24b5dfaSDave Chinner xfs_lookup( 573c24b5dfaSDave Chinner xfs_inode_t *dp, 574c24b5dfaSDave Chinner struct xfs_name *name, 575c24b5dfaSDave Chinner xfs_inode_t **ipp, 576c24b5dfaSDave Chinner struct xfs_name *ci_name) 577c24b5dfaSDave Chinner { 578c24b5dfaSDave Chinner xfs_ino_t inum; 579c24b5dfaSDave Chinner int error; 580c24b5dfaSDave Chinner uint lock_mode; 581c24b5dfaSDave Chinner 582c24b5dfaSDave Chinner trace_xfs_lookup(dp, name); 583c24b5dfaSDave Chinner 584c24b5dfaSDave Chinner if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 585c24b5dfaSDave Chinner return XFS_ERROR(EIO); 586c24b5dfaSDave Chinner 587309ecac8SChristoph Hellwig lock_mode = xfs_ilock_data_map_shared(dp); 588c24b5dfaSDave Chinner error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); 58901f4f327SChristoph Hellwig xfs_iunlock(dp, lock_mode); 590c24b5dfaSDave Chinner 591c24b5dfaSDave Chinner if (error) 592c24b5dfaSDave Chinner goto out; 593c24b5dfaSDave Chinner 594c24b5dfaSDave Chinner error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp); 595c24b5dfaSDave Chinner if (error) 596c24b5dfaSDave Chinner goto out_free_name; 597c24b5dfaSDave Chinner 598c24b5dfaSDave Chinner return 0; 599c24b5dfaSDave Chinner 600c24b5dfaSDave Chinner out_free_name: 601c24b5dfaSDave Chinner if (ci_name) 602c24b5dfaSDave Chinner kmem_free(ci_name->name); 603c24b5dfaSDave Chinner out: 604c24b5dfaSDave Chinner *ipp = NULL; 605c24b5dfaSDave Chinner return error; 606c24b5dfaSDave Chinner } 607c24b5dfaSDave Chinner 608c24b5dfaSDave Chinner /* 6091da177e4SLinus Torvalds * Allocate an inode on disk and return a copy of its in-core version. 6101da177e4SLinus Torvalds * The in-core inode is locked exclusively. Set mode, nlink, and rdev 6111da177e4SLinus Torvalds * appropriately within the inode. The uid and gid for the inode are 6121da177e4SLinus Torvalds * set according to the contents of the given cred structure. 6131da177e4SLinus Torvalds * 6141da177e4SLinus Torvalds * Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc() 615cd856db6SCarlos Maiolino * has a free inode available, call xfs_iget() to obtain the in-core 616cd856db6SCarlos Maiolino * version of the allocated inode. Finally, fill in the inode and 617cd856db6SCarlos Maiolino * log its initial contents. In this case, ialloc_context would be 618cd856db6SCarlos Maiolino * set to NULL. 6191da177e4SLinus Torvalds * 620cd856db6SCarlos Maiolino * If xfs_dialloc() does not have an available inode, it will replenish 621cd856db6SCarlos Maiolino * its supply by doing an allocation. Since we can only do one 622cd856db6SCarlos Maiolino * allocation within a transaction without deadlocks, we must commit 623cd856db6SCarlos Maiolino * the current transaction before returning the inode itself. 624cd856db6SCarlos Maiolino * In this case, therefore, we will set ialloc_context and return. 6251da177e4SLinus Torvalds * The caller should then commit the current transaction, start a new 6261da177e4SLinus Torvalds * transaction, and call xfs_ialloc() again to actually get the inode. 6271da177e4SLinus Torvalds * 6281da177e4SLinus Torvalds * To ensure that some other process does not grab the inode that 6291da177e4SLinus Torvalds * was allocated during the first call to xfs_ialloc(), this routine 6301da177e4SLinus Torvalds * also returns the [locked] bp pointing to the head of the freelist 6311da177e4SLinus Torvalds * as ialloc_context. The caller should hold this buffer across 6321da177e4SLinus Torvalds * the commit and pass it back into this routine on the second call. 633b11f94d5SDavid Chinner * 634b11f94d5SDavid Chinner * If we are allocating quota inodes, we do not have a parent inode 635b11f94d5SDavid Chinner * to attach to or associate with (i.e. pip == NULL) because they 636b11f94d5SDavid Chinner * are not linked into the directory structure - they are attached 637b11f94d5SDavid Chinner * directly to the superblock - and so have no parent. 6381da177e4SLinus Torvalds */ 6391da177e4SLinus Torvalds int 6401da177e4SLinus Torvalds xfs_ialloc( 6411da177e4SLinus Torvalds xfs_trans_t *tp, 6421da177e4SLinus Torvalds xfs_inode_t *pip, 643576b1d67SAl Viro umode_t mode, 64431b084aeSNathan Scott xfs_nlink_t nlink, 6451da177e4SLinus Torvalds xfs_dev_t rdev, 6466743099cSArkadiusz Mi?kiewicz prid_t prid, 6471da177e4SLinus Torvalds int okalloc, 6481da177e4SLinus Torvalds xfs_buf_t **ialloc_context, 6491da177e4SLinus Torvalds xfs_inode_t **ipp) 6501da177e4SLinus Torvalds { 65193848a99SChristoph Hellwig struct xfs_mount *mp = tp->t_mountp; 6521da177e4SLinus Torvalds xfs_ino_t ino; 6531da177e4SLinus Torvalds xfs_inode_t *ip; 6541da177e4SLinus Torvalds uint flags; 6551da177e4SLinus Torvalds int error; 656dff35fd4SChristoph Hellwig timespec_t tv; 657bf904248SDavid Chinner int filestreams = 0; 6581da177e4SLinus Torvalds 6591da177e4SLinus Torvalds /* 6601da177e4SLinus Torvalds * Call the space management code to pick 6611da177e4SLinus Torvalds * the on-disk inode to be allocated. 6621da177e4SLinus Torvalds */ 663b11f94d5SDavid Chinner error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc, 66408358906SChristoph Hellwig ialloc_context, &ino); 665bf904248SDavid Chinner if (error) 6661da177e4SLinus Torvalds return error; 66708358906SChristoph Hellwig if (*ialloc_context || ino == NULLFSINO) { 6681da177e4SLinus Torvalds *ipp = NULL; 6691da177e4SLinus Torvalds return 0; 6701da177e4SLinus Torvalds } 6711da177e4SLinus Torvalds ASSERT(*ialloc_context == NULL); 6721da177e4SLinus Torvalds 6731da177e4SLinus Torvalds /* 6741da177e4SLinus Torvalds * Get the in-core inode with the lock held exclusively. 6751da177e4SLinus Torvalds * This is because we're setting fields here we need 6761da177e4SLinus Torvalds * to prevent others from looking at until we're done. 6771da177e4SLinus Torvalds */ 67893848a99SChristoph Hellwig error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE, 679ec3ba85fSChristoph Hellwig XFS_ILOCK_EXCL, &ip); 680bf904248SDavid Chinner if (error) 6811da177e4SLinus Torvalds return error; 6821da177e4SLinus Torvalds ASSERT(ip != NULL); 6831da177e4SLinus Torvalds 684576b1d67SAl Viro ip->i_d.di_mode = mode; 6851da177e4SLinus Torvalds ip->i_d.di_onlink = 0; 6861da177e4SLinus Torvalds ip->i_d.di_nlink = nlink; 6871da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink == nlink); 6887aab1b28SDwight Engen ip->i_d.di_uid = xfs_kuid_to_uid(current_fsuid()); 6897aab1b28SDwight Engen ip->i_d.di_gid = xfs_kgid_to_gid(current_fsgid()); 6906743099cSArkadiusz Mi?kiewicz xfs_set_projid(ip, prid); 6911da177e4SLinus Torvalds memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 6921da177e4SLinus Torvalds 6931da177e4SLinus Torvalds /* 6941da177e4SLinus Torvalds * If the superblock version is up to where we support new format 6951da177e4SLinus Torvalds * inodes and this is currently an old format inode, then change 6961da177e4SLinus Torvalds * the inode version number now. This way we only do the conversion 6971da177e4SLinus Torvalds * here rather than here and in the flush/logging code. 6981da177e4SLinus Torvalds */ 69993848a99SChristoph Hellwig if (xfs_sb_version_hasnlink(&mp->m_sb) && 70051ce16d5SChristoph Hellwig ip->i_d.di_version == 1) { 70151ce16d5SChristoph Hellwig ip->i_d.di_version = 2; 7021da177e4SLinus Torvalds /* 7031da177e4SLinus Torvalds * We've already zeroed the old link count, the projid field, 7041da177e4SLinus Torvalds * and the pad field. 7051da177e4SLinus Torvalds */ 7061da177e4SLinus Torvalds } 7071da177e4SLinus Torvalds 7081da177e4SLinus Torvalds /* 7091da177e4SLinus Torvalds * Project ids won't be stored on disk if we are using a version 1 inode. 7101da177e4SLinus Torvalds */ 71151ce16d5SChristoph Hellwig if ((prid != 0) && (ip->i_d.di_version == 1)) 7121da177e4SLinus Torvalds xfs_bump_ino_vers2(tp, ip); 7131da177e4SLinus Torvalds 714bd186aa9SChristoph Hellwig if (pip && XFS_INHERIT_GID(pip)) { 7151da177e4SLinus Torvalds ip->i_d.di_gid = pip->i_d.di_gid; 716abbede1bSAl Viro if ((pip->i_d.di_mode & S_ISGID) && S_ISDIR(mode)) { 7171da177e4SLinus Torvalds ip->i_d.di_mode |= S_ISGID; 7181da177e4SLinus Torvalds } 7191da177e4SLinus Torvalds } 7201da177e4SLinus Torvalds 7211da177e4SLinus Torvalds /* 7221da177e4SLinus Torvalds * If the group ID of the new file does not match the effective group 7231da177e4SLinus Torvalds * ID or one of the supplementary group IDs, the S_ISGID bit is cleared 7241da177e4SLinus Torvalds * (and only if the irix_sgid_inherit compatibility variable is set). 7251da177e4SLinus Torvalds */ 7261da177e4SLinus Torvalds if ((irix_sgid_inherit) && 7271da177e4SLinus Torvalds (ip->i_d.di_mode & S_ISGID) && 7287aab1b28SDwight Engen (!in_group_p(xfs_gid_to_kgid(ip->i_d.di_gid)))) { 7291da177e4SLinus Torvalds ip->i_d.di_mode &= ~S_ISGID; 7301da177e4SLinus Torvalds } 7311da177e4SLinus Torvalds 7321da177e4SLinus Torvalds ip->i_d.di_size = 0; 7331da177e4SLinus Torvalds ip->i_d.di_nextents = 0; 7341da177e4SLinus Torvalds ASSERT(ip->i_d.di_nblocks == 0); 735dff35fd4SChristoph Hellwig 736dff35fd4SChristoph Hellwig nanotime(&tv); 737dff35fd4SChristoph Hellwig ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; 738dff35fd4SChristoph Hellwig ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; 739dff35fd4SChristoph Hellwig ip->i_d.di_atime = ip->i_d.di_mtime; 740dff35fd4SChristoph Hellwig ip->i_d.di_ctime = ip->i_d.di_mtime; 741dff35fd4SChristoph Hellwig 7421da177e4SLinus Torvalds /* 7431da177e4SLinus Torvalds * di_gen will have been taken care of in xfs_iread. 7441da177e4SLinus Torvalds */ 7451da177e4SLinus Torvalds ip->i_d.di_extsize = 0; 7461da177e4SLinus Torvalds ip->i_d.di_dmevmask = 0; 7471da177e4SLinus Torvalds ip->i_d.di_dmstate = 0; 7481da177e4SLinus Torvalds ip->i_d.di_flags = 0; 74993848a99SChristoph Hellwig 75093848a99SChristoph Hellwig if (ip->i_d.di_version == 3) { 75193848a99SChristoph Hellwig ASSERT(ip->i_d.di_ino == ino); 75293848a99SChristoph Hellwig ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid)); 75393848a99SChristoph Hellwig ip->i_d.di_crc = 0; 75493848a99SChristoph Hellwig ip->i_d.di_changecount = 1; 75593848a99SChristoph Hellwig ip->i_d.di_lsn = 0; 75693848a99SChristoph Hellwig ip->i_d.di_flags2 = 0; 75793848a99SChristoph Hellwig memset(&(ip->i_d.di_pad2[0]), 0, sizeof(ip->i_d.di_pad2)); 75893848a99SChristoph Hellwig ip->i_d.di_crtime = ip->i_d.di_mtime; 75993848a99SChristoph Hellwig } 76093848a99SChristoph Hellwig 76193848a99SChristoph Hellwig 7621da177e4SLinus Torvalds flags = XFS_ILOG_CORE; 7631da177e4SLinus Torvalds switch (mode & S_IFMT) { 7641da177e4SLinus Torvalds case S_IFIFO: 7651da177e4SLinus Torvalds case S_IFCHR: 7661da177e4SLinus Torvalds case S_IFBLK: 7671da177e4SLinus Torvalds case S_IFSOCK: 7681da177e4SLinus Torvalds ip->i_d.di_format = XFS_DINODE_FMT_DEV; 7691da177e4SLinus Torvalds ip->i_df.if_u2.if_rdev = rdev; 7701da177e4SLinus Torvalds ip->i_df.if_flags = 0; 7711da177e4SLinus Torvalds flags |= XFS_ILOG_DEV; 7721da177e4SLinus Torvalds break; 7731da177e4SLinus Torvalds case S_IFREG: 774bf904248SDavid Chinner /* 775bf904248SDavid Chinner * we can't set up filestreams until after the VFS inode 776bf904248SDavid Chinner * is set up properly. 777bf904248SDavid Chinner */ 778bf904248SDavid Chinner if (pip && xfs_inode_is_filestream(pip)) 779bf904248SDavid Chinner filestreams = 1; 7802a82b8beSDavid Chinner /* fall through */ 7811da177e4SLinus Torvalds case S_IFDIR: 782b11f94d5SDavid Chinner if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { 783365ca83dSNathan Scott uint di_flags = 0; 784365ca83dSNathan Scott 785abbede1bSAl Viro if (S_ISDIR(mode)) { 786365ca83dSNathan Scott if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) 787365ca83dSNathan Scott di_flags |= XFS_DIFLAG_RTINHERIT; 788dd9f438eSNathan Scott if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { 789dd9f438eSNathan Scott di_flags |= XFS_DIFLAG_EXTSZINHERIT; 790dd9f438eSNathan Scott ip->i_d.di_extsize = pip->i_d.di_extsize; 791dd9f438eSNathan Scott } 792abbede1bSAl Viro } else if (S_ISREG(mode)) { 793613d7043SChristoph Hellwig if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) 794365ca83dSNathan Scott di_flags |= XFS_DIFLAG_REALTIME; 795dd9f438eSNathan Scott if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { 796dd9f438eSNathan Scott di_flags |= XFS_DIFLAG_EXTSIZE; 797dd9f438eSNathan Scott ip->i_d.di_extsize = pip->i_d.di_extsize; 798dd9f438eSNathan Scott } 7991da177e4SLinus Torvalds } 8001da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) && 8011da177e4SLinus Torvalds xfs_inherit_noatime) 802365ca83dSNathan Scott di_flags |= XFS_DIFLAG_NOATIME; 8031da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) && 8041da177e4SLinus Torvalds xfs_inherit_nodump) 805365ca83dSNathan Scott di_flags |= XFS_DIFLAG_NODUMP; 8061da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) && 8071da177e4SLinus Torvalds xfs_inherit_sync) 808365ca83dSNathan Scott di_flags |= XFS_DIFLAG_SYNC; 8091da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) && 8101da177e4SLinus Torvalds xfs_inherit_nosymlinks) 811365ca83dSNathan Scott di_flags |= XFS_DIFLAG_NOSYMLINKS; 812365ca83dSNathan Scott if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 813365ca83dSNathan Scott di_flags |= XFS_DIFLAG_PROJINHERIT; 814d3446eacSBarry Naujok if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) && 815d3446eacSBarry Naujok xfs_inherit_nodefrag) 816d3446eacSBarry Naujok di_flags |= XFS_DIFLAG_NODEFRAG; 8172a82b8beSDavid Chinner if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM) 8182a82b8beSDavid Chinner di_flags |= XFS_DIFLAG_FILESTREAM; 819365ca83dSNathan Scott ip->i_d.di_flags |= di_flags; 8201da177e4SLinus Torvalds } 8211da177e4SLinus Torvalds /* FALLTHROUGH */ 8221da177e4SLinus Torvalds case S_IFLNK: 8231da177e4SLinus Torvalds ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; 8241da177e4SLinus Torvalds ip->i_df.if_flags = XFS_IFEXTENTS; 8251da177e4SLinus Torvalds ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0; 8261da177e4SLinus Torvalds ip->i_df.if_u1.if_extents = NULL; 8271da177e4SLinus Torvalds break; 8281da177e4SLinus Torvalds default: 8291da177e4SLinus Torvalds ASSERT(0); 8301da177e4SLinus Torvalds } 8311da177e4SLinus Torvalds /* 8321da177e4SLinus Torvalds * Attribute fork settings for new inode. 8331da177e4SLinus Torvalds */ 8341da177e4SLinus Torvalds ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 8351da177e4SLinus Torvalds ip->i_d.di_anextents = 0; 8361da177e4SLinus Torvalds 8371da177e4SLinus Torvalds /* 8381da177e4SLinus Torvalds * Log the new values stuffed into the inode. 8391da177e4SLinus Torvalds */ 840ddc3415aSChristoph Hellwig xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 8411da177e4SLinus Torvalds xfs_trans_log_inode(tp, ip, flags); 8421da177e4SLinus Torvalds 843b83bd138SNathan Scott /* now that we have an i_mode we can setup inode ops and unlock */ 84441be8bedSChristoph Hellwig xfs_setup_inode(ip); 8451da177e4SLinus Torvalds 846bf904248SDavid Chinner /* now we have set up the vfs inode we can associate the filestream */ 847bf904248SDavid Chinner if (filestreams) { 848bf904248SDavid Chinner error = xfs_filestream_associate(pip, ip); 849bf904248SDavid Chinner if (error < 0) 850bf904248SDavid Chinner return -error; 851bf904248SDavid Chinner if (!error) 852bf904248SDavid Chinner xfs_iflags_set(ip, XFS_IFILESTREAM); 853bf904248SDavid Chinner } 854bf904248SDavid Chinner 8551da177e4SLinus Torvalds *ipp = ip; 8561da177e4SLinus Torvalds return 0; 8571da177e4SLinus Torvalds } 8581da177e4SLinus Torvalds 859e546cb79SDave Chinner /* 860e546cb79SDave Chinner * Allocates a new inode from disk and return a pointer to the 861e546cb79SDave Chinner * incore copy. This routine will internally commit the current 862e546cb79SDave Chinner * transaction and allocate a new one if the Space Manager needed 863e546cb79SDave Chinner * to do an allocation to replenish the inode free-list. 864e546cb79SDave Chinner * 865e546cb79SDave Chinner * This routine is designed to be called from xfs_create and 866e546cb79SDave Chinner * xfs_create_dir. 867e546cb79SDave Chinner * 868e546cb79SDave Chinner */ 869e546cb79SDave Chinner int 870e546cb79SDave Chinner xfs_dir_ialloc( 871e546cb79SDave Chinner xfs_trans_t **tpp, /* input: current transaction; 872e546cb79SDave Chinner output: may be a new transaction. */ 873e546cb79SDave Chinner xfs_inode_t *dp, /* directory within whose allocate 874e546cb79SDave Chinner the inode. */ 875e546cb79SDave Chinner umode_t mode, 876e546cb79SDave Chinner xfs_nlink_t nlink, 877e546cb79SDave Chinner xfs_dev_t rdev, 878e546cb79SDave Chinner prid_t prid, /* project id */ 879e546cb79SDave Chinner int okalloc, /* ok to allocate new space */ 880e546cb79SDave Chinner xfs_inode_t **ipp, /* pointer to inode; it will be 881e546cb79SDave Chinner locked. */ 882e546cb79SDave Chinner int *committed) 883e546cb79SDave Chinner 884e546cb79SDave Chinner { 885e546cb79SDave Chinner xfs_trans_t *tp; 886e546cb79SDave Chinner xfs_trans_t *ntp; 887e546cb79SDave Chinner xfs_inode_t *ip; 888e546cb79SDave Chinner xfs_buf_t *ialloc_context = NULL; 889e546cb79SDave Chinner int code; 890e546cb79SDave Chinner void *dqinfo; 891e546cb79SDave Chinner uint tflags; 892e546cb79SDave Chinner 893e546cb79SDave Chinner tp = *tpp; 894e546cb79SDave Chinner ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); 895e546cb79SDave Chinner 896e546cb79SDave Chinner /* 897e546cb79SDave Chinner * xfs_ialloc will return a pointer to an incore inode if 898e546cb79SDave Chinner * the Space Manager has an available inode on the free 899e546cb79SDave Chinner * list. Otherwise, it will do an allocation and replenish 900e546cb79SDave Chinner * the freelist. Since we can only do one allocation per 901e546cb79SDave Chinner * transaction without deadlocks, we will need to commit the 902e546cb79SDave Chinner * current transaction and start a new one. We will then 903e546cb79SDave Chinner * need to call xfs_ialloc again to get the inode. 904e546cb79SDave Chinner * 905e546cb79SDave Chinner * If xfs_ialloc did an allocation to replenish the freelist, 906e546cb79SDave Chinner * it returns the bp containing the head of the freelist as 907e546cb79SDave Chinner * ialloc_context. We will hold a lock on it across the 908e546cb79SDave Chinner * transaction commit so that no other process can steal 909e546cb79SDave Chinner * the inode(s) that we've just allocated. 910e546cb79SDave Chinner */ 911e546cb79SDave Chinner code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc, 912e546cb79SDave Chinner &ialloc_context, &ip); 913e546cb79SDave Chinner 914e546cb79SDave Chinner /* 915e546cb79SDave Chinner * Return an error if we were unable to allocate a new inode. 916e546cb79SDave Chinner * This should only happen if we run out of space on disk or 917e546cb79SDave Chinner * encounter a disk error. 918e546cb79SDave Chinner */ 919e546cb79SDave Chinner if (code) { 920e546cb79SDave Chinner *ipp = NULL; 921e546cb79SDave Chinner return code; 922e546cb79SDave Chinner } 923e546cb79SDave Chinner if (!ialloc_context && !ip) { 924e546cb79SDave Chinner *ipp = NULL; 925e546cb79SDave Chinner return XFS_ERROR(ENOSPC); 926e546cb79SDave Chinner } 927e546cb79SDave Chinner 928e546cb79SDave Chinner /* 929e546cb79SDave Chinner * If the AGI buffer is non-NULL, then we were unable to get an 930e546cb79SDave Chinner * inode in one operation. We need to commit the current 931e546cb79SDave Chinner * transaction and call xfs_ialloc() again. It is guaranteed 932e546cb79SDave Chinner * to succeed the second time. 933e546cb79SDave Chinner */ 934e546cb79SDave Chinner if (ialloc_context) { 9353d3c8b52SJie Liu struct xfs_trans_res tres; 9363d3c8b52SJie Liu 937e546cb79SDave Chinner /* 938e546cb79SDave Chinner * Normally, xfs_trans_commit releases all the locks. 939e546cb79SDave Chinner * We call bhold to hang on to the ialloc_context across 940e546cb79SDave Chinner * the commit. Holding this buffer prevents any other 941e546cb79SDave Chinner * processes from doing any allocations in this 942e546cb79SDave Chinner * allocation group. 943e546cb79SDave Chinner */ 944e546cb79SDave Chinner xfs_trans_bhold(tp, ialloc_context); 945e546cb79SDave Chinner /* 946e546cb79SDave Chinner * Save the log reservation so we can use 947e546cb79SDave Chinner * them in the next transaction. 948e546cb79SDave Chinner */ 9493d3c8b52SJie Liu tres.tr_logres = xfs_trans_get_log_res(tp); 9503d3c8b52SJie Liu tres.tr_logcount = xfs_trans_get_log_count(tp); 951e546cb79SDave Chinner 952e546cb79SDave Chinner /* 953e546cb79SDave Chinner * We want the quota changes to be associated with the next 954e546cb79SDave Chinner * transaction, NOT this one. So, detach the dqinfo from this 955e546cb79SDave Chinner * and attach it to the next transaction. 956e546cb79SDave Chinner */ 957e546cb79SDave Chinner dqinfo = NULL; 958e546cb79SDave Chinner tflags = 0; 959e546cb79SDave Chinner if (tp->t_dqinfo) { 960e546cb79SDave Chinner dqinfo = (void *)tp->t_dqinfo; 961e546cb79SDave Chinner tp->t_dqinfo = NULL; 962e546cb79SDave Chinner tflags = tp->t_flags & XFS_TRANS_DQ_DIRTY; 963e546cb79SDave Chinner tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY); 964e546cb79SDave Chinner } 965e546cb79SDave Chinner 966e546cb79SDave Chinner ntp = xfs_trans_dup(tp); 967e546cb79SDave Chinner code = xfs_trans_commit(tp, 0); 968e546cb79SDave Chinner tp = ntp; 969e546cb79SDave Chinner if (committed != NULL) { 970e546cb79SDave Chinner *committed = 1; 971e546cb79SDave Chinner } 972e546cb79SDave Chinner /* 973e546cb79SDave Chinner * If we get an error during the commit processing, 974e546cb79SDave Chinner * release the buffer that is still held and return 975e546cb79SDave Chinner * to the caller. 976e546cb79SDave Chinner */ 977e546cb79SDave Chinner if (code) { 978e546cb79SDave Chinner xfs_buf_relse(ialloc_context); 979e546cb79SDave Chinner if (dqinfo) { 980e546cb79SDave Chinner tp->t_dqinfo = dqinfo; 981e546cb79SDave Chinner xfs_trans_free_dqinfo(tp); 982e546cb79SDave Chinner } 983e546cb79SDave Chinner *tpp = ntp; 984e546cb79SDave Chinner *ipp = NULL; 985e546cb79SDave Chinner return code; 986e546cb79SDave Chinner } 987e546cb79SDave Chinner 988e546cb79SDave Chinner /* 989e546cb79SDave Chinner * transaction commit worked ok so we can drop the extra ticket 990e546cb79SDave Chinner * reference that we gained in xfs_trans_dup() 991e546cb79SDave Chinner */ 992e546cb79SDave Chinner xfs_log_ticket_put(tp->t_ticket); 9933d3c8b52SJie Liu tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; 9943d3c8b52SJie Liu code = xfs_trans_reserve(tp, &tres, 0, 0); 9953d3c8b52SJie Liu 996e546cb79SDave Chinner /* 997e546cb79SDave Chinner * Re-attach the quota info that we detached from prev trx. 998e546cb79SDave Chinner */ 999e546cb79SDave Chinner if (dqinfo) { 1000e546cb79SDave Chinner tp->t_dqinfo = dqinfo; 1001e546cb79SDave Chinner tp->t_flags |= tflags; 1002e546cb79SDave Chinner } 1003e546cb79SDave Chinner 1004e546cb79SDave Chinner if (code) { 1005e546cb79SDave Chinner xfs_buf_relse(ialloc_context); 1006e546cb79SDave Chinner *tpp = ntp; 1007e546cb79SDave Chinner *ipp = NULL; 1008e546cb79SDave Chinner return code; 1009e546cb79SDave Chinner } 1010e546cb79SDave Chinner xfs_trans_bjoin(tp, ialloc_context); 1011e546cb79SDave Chinner 1012e546cb79SDave Chinner /* 1013e546cb79SDave Chinner * Call ialloc again. Since we've locked out all 1014e546cb79SDave Chinner * other allocations in this allocation group, 1015e546cb79SDave Chinner * this call should always succeed. 1016e546cb79SDave Chinner */ 1017e546cb79SDave Chinner code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, 1018e546cb79SDave Chinner okalloc, &ialloc_context, &ip); 1019e546cb79SDave Chinner 1020e546cb79SDave Chinner /* 1021e546cb79SDave Chinner * If we get an error at this point, return to the caller 1022e546cb79SDave Chinner * so that the current transaction can be aborted. 1023e546cb79SDave Chinner */ 1024e546cb79SDave Chinner if (code) { 1025e546cb79SDave Chinner *tpp = tp; 1026e546cb79SDave Chinner *ipp = NULL; 1027e546cb79SDave Chinner return code; 1028e546cb79SDave Chinner } 1029e546cb79SDave Chinner ASSERT(!ialloc_context && ip); 1030e546cb79SDave Chinner 1031e546cb79SDave Chinner } else { 1032e546cb79SDave Chinner if (committed != NULL) 1033e546cb79SDave Chinner *committed = 0; 1034e546cb79SDave Chinner } 1035e546cb79SDave Chinner 1036e546cb79SDave Chinner *ipp = ip; 1037e546cb79SDave Chinner *tpp = tp; 1038e546cb79SDave Chinner 1039e546cb79SDave Chinner return 0; 1040e546cb79SDave Chinner } 1041e546cb79SDave Chinner 1042e546cb79SDave Chinner /* 1043e546cb79SDave Chinner * Decrement the link count on an inode & log the change. 1044e546cb79SDave Chinner * If this causes the link count to go to zero, initiate the 1045e546cb79SDave Chinner * logging activity required to truncate a file. 1046e546cb79SDave Chinner */ 1047e546cb79SDave Chinner int /* error */ 1048e546cb79SDave Chinner xfs_droplink( 1049e546cb79SDave Chinner xfs_trans_t *tp, 1050e546cb79SDave Chinner xfs_inode_t *ip) 1051e546cb79SDave Chinner { 1052e546cb79SDave Chinner int error; 1053e546cb79SDave Chinner 1054e546cb79SDave Chinner xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); 1055e546cb79SDave Chinner 1056e546cb79SDave Chinner ASSERT (ip->i_d.di_nlink > 0); 1057e546cb79SDave Chinner ip->i_d.di_nlink--; 1058e546cb79SDave Chinner drop_nlink(VFS_I(ip)); 1059e546cb79SDave Chinner xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1060e546cb79SDave Chinner 1061e546cb79SDave Chinner error = 0; 1062e546cb79SDave Chinner if (ip->i_d.di_nlink == 0) { 1063e546cb79SDave Chinner /* 1064e546cb79SDave Chinner * We're dropping the last link to this file. 1065e546cb79SDave Chinner * Move the on-disk inode to the AGI unlinked list. 1066e546cb79SDave Chinner * From xfs_inactive() we will pull the inode from 1067e546cb79SDave Chinner * the list and free it. 1068e546cb79SDave Chinner */ 1069e546cb79SDave Chinner error = xfs_iunlink(tp, ip); 1070e546cb79SDave Chinner } 1071e546cb79SDave Chinner return error; 1072e546cb79SDave Chinner } 1073e546cb79SDave Chinner 1074e546cb79SDave Chinner /* 1075e546cb79SDave Chinner * This gets called when the inode's version needs to be changed from 1 to 2. 1076e546cb79SDave Chinner * Currently this happens when the nlink field overflows the old 16-bit value 1077e546cb79SDave Chinner * or when chproj is called to change the project for the first time. 1078e546cb79SDave Chinner * As a side effect the superblock version will also get rev'd 1079e546cb79SDave Chinner * to contain the NLINK bit. 1080e546cb79SDave Chinner */ 1081e546cb79SDave Chinner void 1082e546cb79SDave Chinner xfs_bump_ino_vers2( 1083e546cb79SDave Chinner xfs_trans_t *tp, 1084e546cb79SDave Chinner xfs_inode_t *ip) 1085e546cb79SDave Chinner { 1086e546cb79SDave Chinner xfs_mount_t *mp; 1087e546cb79SDave Chinner 1088e546cb79SDave Chinner ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 1089e546cb79SDave Chinner ASSERT(ip->i_d.di_version == 1); 1090e546cb79SDave Chinner 1091e546cb79SDave Chinner ip->i_d.di_version = 2; 1092e546cb79SDave Chinner ip->i_d.di_onlink = 0; 1093e546cb79SDave Chinner memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 1094e546cb79SDave Chinner mp = tp->t_mountp; 1095e546cb79SDave Chinner if (!xfs_sb_version_hasnlink(&mp->m_sb)) { 1096e546cb79SDave Chinner spin_lock(&mp->m_sb_lock); 1097e546cb79SDave Chinner if (!xfs_sb_version_hasnlink(&mp->m_sb)) { 1098e546cb79SDave Chinner xfs_sb_version_addnlink(&mp->m_sb); 1099e546cb79SDave Chinner spin_unlock(&mp->m_sb_lock); 1100e546cb79SDave Chinner xfs_mod_sb(tp, XFS_SB_VERSIONNUM); 1101e546cb79SDave Chinner } else { 1102e546cb79SDave Chinner spin_unlock(&mp->m_sb_lock); 1103e546cb79SDave Chinner } 1104e546cb79SDave Chinner } 1105e546cb79SDave Chinner /* Caller must log the inode */ 1106e546cb79SDave Chinner } 1107e546cb79SDave Chinner 1108e546cb79SDave Chinner /* 1109e546cb79SDave Chinner * Increment the link count on an inode & log the change. 1110e546cb79SDave Chinner */ 1111e546cb79SDave Chinner int 1112e546cb79SDave Chinner xfs_bumplink( 1113e546cb79SDave Chinner xfs_trans_t *tp, 1114e546cb79SDave Chinner xfs_inode_t *ip) 1115e546cb79SDave Chinner { 1116e546cb79SDave Chinner xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); 1117e546cb79SDave Chinner 1118e546cb79SDave Chinner ASSERT(ip->i_d.di_nlink > 0); 1119e546cb79SDave Chinner ip->i_d.di_nlink++; 1120e546cb79SDave Chinner inc_nlink(VFS_I(ip)); 1121e546cb79SDave Chinner if ((ip->i_d.di_version == 1) && 1122e546cb79SDave Chinner (ip->i_d.di_nlink > XFS_MAXLINK_1)) { 1123e546cb79SDave Chinner /* 1124e546cb79SDave Chinner * The inode has increased its number of links beyond 1125e546cb79SDave Chinner * what can fit in an old format inode. It now needs 1126e546cb79SDave Chinner * to be converted to a version 2 inode with a 32 bit 1127e546cb79SDave Chinner * link count. If this is the first inode in the file 1128e546cb79SDave Chinner * system to do this, then we need to bump the superblock 1129e546cb79SDave Chinner * version number as well. 1130e546cb79SDave Chinner */ 1131e546cb79SDave Chinner xfs_bump_ino_vers2(tp, ip); 1132e546cb79SDave Chinner } 1133e546cb79SDave Chinner 1134e546cb79SDave Chinner xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1135e546cb79SDave Chinner return 0; 1136e546cb79SDave Chinner } 1137e546cb79SDave Chinner 1138c24b5dfaSDave Chinner int 1139c24b5dfaSDave Chinner xfs_create( 1140c24b5dfaSDave Chinner xfs_inode_t *dp, 1141c24b5dfaSDave Chinner struct xfs_name *name, 1142c24b5dfaSDave Chinner umode_t mode, 1143c24b5dfaSDave Chinner xfs_dev_t rdev, 1144c24b5dfaSDave Chinner xfs_inode_t **ipp) 1145c24b5dfaSDave Chinner { 1146c24b5dfaSDave Chinner int is_dir = S_ISDIR(mode); 1147c24b5dfaSDave Chinner struct xfs_mount *mp = dp->i_mount; 1148c24b5dfaSDave Chinner struct xfs_inode *ip = NULL; 1149c24b5dfaSDave Chinner struct xfs_trans *tp = NULL; 1150c24b5dfaSDave Chinner int error; 1151c24b5dfaSDave Chinner xfs_bmap_free_t free_list; 1152c24b5dfaSDave Chinner xfs_fsblock_t first_block; 1153c24b5dfaSDave Chinner bool unlock_dp_on_error = false; 1154c24b5dfaSDave Chinner uint cancel_flags; 1155c24b5dfaSDave Chinner int committed; 1156c24b5dfaSDave Chinner prid_t prid; 1157c24b5dfaSDave Chinner struct xfs_dquot *udqp = NULL; 1158c24b5dfaSDave Chinner struct xfs_dquot *gdqp = NULL; 1159c24b5dfaSDave Chinner struct xfs_dquot *pdqp = NULL; 11603d3c8b52SJie Liu struct xfs_trans_res tres; 1161c24b5dfaSDave Chinner uint resblks; 1162c24b5dfaSDave Chinner 1163c24b5dfaSDave Chinner trace_xfs_create(dp, name); 1164c24b5dfaSDave Chinner 1165c24b5dfaSDave Chinner if (XFS_FORCED_SHUTDOWN(mp)) 1166c24b5dfaSDave Chinner return XFS_ERROR(EIO); 1167c24b5dfaSDave Chinner 1168c24b5dfaSDave Chinner if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 1169c24b5dfaSDave Chinner prid = xfs_get_projid(dp); 1170c24b5dfaSDave Chinner else 1171c24b5dfaSDave Chinner prid = XFS_PROJID_DEFAULT; 1172c24b5dfaSDave Chinner 1173c24b5dfaSDave Chinner /* 1174c24b5dfaSDave Chinner * Make sure that we have allocated dquot(s) on disk. 1175c24b5dfaSDave Chinner */ 11767aab1b28SDwight Engen error = xfs_qm_vop_dqalloc(dp, xfs_kuid_to_uid(current_fsuid()), 11777aab1b28SDwight Engen xfs_kgid_to_gid(current_fsgid()), prid, 1178c24b5dfaSDave Chinner XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, 1179c24b5dfaSDave Chinner &udqp, &gdqp, &pdqp); 1180c24b5dfaSDave Chinner if (error) 1181c24b5dfaSDave Chinner return error; 1182c24b5dfaSDave Chinner 1183c24b5dfaSDave Chinner if (is_dir) { 1184c24b5dfaSDave Chinner rdev = 0; 1185c24b5dfaSDave Chinner resblks = XFS_MKDIR_SPACE_RES(mp, name->len); 11863d3c8b52SJie Liu tres.tr_logres = M_RES(mp)->tr_mkdir.tr_logres; 11873d3c8b52SJie Liu tres.tr_logcount = XFS_MKDIR_LOG_COUNT; 1188c24b5dfaSDave Chinner tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR); 1189c24b5dfaSDave Chinner } else { 1190c24b5dfaSDave Chinner resblks = XFS_CREATE_SPACE_RES(mp, name->len); 11913d3c8b52SJie Liu tres.tr_logres = M_RES(mp)->tr_create.tr_logres; 11923d3c8b52SJie Liu tres.tr_logcount = XFS_CREATE_LOG_COUNT; 1193c24b5dfaSDave Chinner tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE); 1194c24b5dfaSDave Chinner } 1195c24b5dfaSDave Chinner 1196c24b5dfaSDave Chinner cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1197c24b5dfaSDave Chinner 1198c24b5dfaSDave Chinner /* 1199c24b5dfaSDave Chinner * Initially assume that the file does not exist and 1200c24b5dfaSDave Chinner * reserve the resources for that case. If that is not 1201c24b5dfaSDave Chinner * the case we'll drop the one we have and get a more 1202c24b5dfaSDave Chinner * appropriate transaction later. 1203c24b5dfaSDave Chinner */ 12043d3c8b52SJie Liu tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; 12053d3c8b52SJie Liu error = xfs_trans_reserve(tp, &tres, resblks, 0); 1206c24b5dfaSDave Chinner if (error == ENOSPC) { 1207c24b5dfaSDave Chinner /* flush outstanding delalloc blocks and retry */ 1208c24b5dfaSDave Chinner xfs_flush_inodes(mp); 12093d3c8b52SJie Liu error = xfs_trans_reserve(tp, &tres, resblks, 0); 1210c24b5dfaSDave Chinner } 1211c24b5dfaSDave Chinner if (error == ENOSPC) { 1212c24b5dfaSDave Chinner /* No space at all so try a "no-allocation" reservation */ 1213c24b5dfaSDave Chinner resblks = 0; 12143d3c8b52SJie Liu error = xfs_trans_reserve(tp, &tres, 0, 0); 1215c24b5dfaSDave Chinner } 1216c24b5dfaSDave Chinner if (error) { 1217c24b5dfaSDave Chinner cancel_flags = 0; 1218c24b5dfaSDave Chinner goto out_trans_cancel; 1219c24b5dfaSDave Chinner } 1220c24b5dfaSDave Chinner 1221c24b5dfaSDave Chinner xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 1222c24b5dfaSDave Chinner unlock_dp_on_error = true; 1223c24b5dfaSDave Chinner 1224c24b5dfaSDave Chinner xfs_bmap_init(&free_list, &first_block); 1225c24b5dfaSDave Chinner 1226c24b5dfaSDave Chinner /* 1227c24b5dfaSDave Chinner * Reserve disk quota and the inode. 1228c24b5dfaSDave Chinner */ 1229c24b5dfaSDave Chinner error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, 1230c24b5dfaSDave Chinner pdqp, resblks, 1, 0); 1231c24b5dfaSDave Chinner if (error) 1232c24b5dfaSDave Chinner goto out_trans_cancel; 1233c24b5dfaSDave Chinner 1234c24b5dfaSDave Chinner error = xfs_dir_canenter(tp, dp, name, resblks); 1235c24b5dfaSDave Chinner if (error) 1236c24b5dfaSDave Chinner goto out_trans_cancel; 1237c24b5dfaSDave Chinner 1238c24b5dfaSDave Chinner /* 1239c24b5dfaSDave Chinner * A newly created regular or special file just has one directory 1240c24b5dfaSDave Chinner * entry pointing to them, but a directory also the "." entry 1241c24b5dfaSDave Chinner * pointing to itself. 1242c24b5dfaSDave Chinner */ 1243c24b5dfaSDave Chinner error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, 1244c24b5dfaSDave Chinner prid, resblks > 0, &ip, &committed); 1245c24b5dfaSDave Chinner if (error) { 1246c24b5dfaSDave Chinner if (error == ENOSPC) 1247c24b5dfaSDave Chinner goto out_trans_cancel; 1248c24b5dfaSDave Chinner goto out_trans_abort; 1249c24b5dfaSDave Chinner } 1250c24b5dfaSDave Chinner 1251c24b5dfaSDave Chinner /* 1252c24b5dfaSDave Chinner * Now we join the directory inode to the transaction. We do not do it 1253c24b5dfaSDave Chinner * earlier because xfs_dir_ialloc might commit the previous transaction 1254c24b5dfaSDave Chinner * (and release all the locks). An error from here on will result in 1255c24b5dfaSDave Chinner * the transaction cancel unlocking dp so don't do it explicitly in the 1256c24b5dfaSDave Chinner * error path. 1257c24b5dfaSDave Chinner */ 1258c24b5dfaSDave Chinner xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 1259c24b5dfaSDave Chinner unlock_dp_on_error = false; 1260c24b5dfaSDave Chinner 1261c24b5dfaSDave Chinner error = xfs_dir_createname(tp, dp, name, ip->i_ino, 1262c24b5dfaSDave Chinner &first_block, &free_list, resblks ? 1263c24b5dfaSDave Chinner resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 1264c24b5dfaSDave Chinner if (error) { 1265c24b5dfaSDave Chinner ASSERT(error != ENOSPC); 1266c24b5dfaSDave Chinner goto out_trans_abort; 1267c24b5dfaSDave Chinner } 1268c24b5dfaSDave Chinner xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1269c24b5dfaSDave Chinner xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 1270c24b5dfaSDave Chinner 1271c24b5dfaSDave Chinner if (is_dir) { 1272c24b5dfaSDave Chinner error = xfs_dir_init(tp, ip, dp); 1273c24b5dfaSDave Chinner if (error) 1274c24b5dfaSDave Chinner goto out_bmap_cancel; 1275c24b5dfaSDave Chinner 1276c24b5dfaSDave Chinner error = xfs_bumplink(tp, dp); 1277c24b5dfaSDave Chinner if (error) 1278c24b5dfaSDave Chinner goto out_bmap_cancel; 1279c24b5dfaSDave Chinner } 1280c24b5dfaSDave Chinner 1281c24b5dfaSDave Chinner /* 1282c24b5dfaSDave Chinner * If this is a synchronous mount, make sure that the 1283c24b5dfaSDave Chinner * create transaction goes to disk before returning to 1284c24b5dfaSDave Chinner * the user. 1285c24b5dfaSDave Chinner */ 1286c24b5dfaSDave Chinner if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) 1287c24b5dfaSDave Chinner xfs_trans_set_sync(tp); 1288c24b5dfaSDave Chinner 1289c24b5dfaSDave Chinner /* 1290c24b5dfaSDave Chinner * Attach the dquot(s) to the inodes and modify them incore. 1291c24b5dfaSDave Chinner * These ids of the inode couldn't have changed since the new 1292c24b5dfaSDave Chinner * inode has been locked ever since it was created. 1293c24b5dfaSDave Chinner */ 1294c24b5dfaSDave Chinner xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); 1295c24b5dfaSDave Chinner 1296c24b5dfaSDave Chinner error = xfs_bmap_finish(&tp, &free_list, &committed); 1297c24b5dfaSDave Chinner if (error) 1298c24b5dfaSDave Chinner goto out_bmap_cancel; 1299c24b5dfaSDave Chinner 1300c24b5dfaSDave Chinner error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1301c24b5dfaSDave Chinner if (error) 1302c24b5dfaSDave Chinner goto out_release_inode; 1303c24b5dfaSDave Chinner 1304c24b5dfaSDave Chinner xfs_qm_dqrele(udqp); 1305c24b5dfaSDave Chinner xfs_qm_dqrele(gdqp); 1306c24b5dfaSDave Chinner xfs_qm_dqrele(pdqp); 1307c24b5dfaSDave Chinner 1308c24b5dfaSDave Chinner *ipp = ip; 1309c24b5dfaSDave Chinner return 0; 1310c24b5dfaSDave Chinner 1311c24b5dfaSDave Chinner out_bmap_cancel: 1312c24b5dfaSDave Chinner xfs_bmap_cancel(&free_list); 1313c24b5dfaSDave Chinner out_trans_abort: 1314c24b5dfaSDave Chinner cancel_flags |= XFS_TRANS_ABORT; 1315c24b5dfaSDave Chinner out_trans_cancel: 1316c24b5dfaSDave Chinner xfs_trans_cancel(tp, cancel_flags); 1317c24b5dfaSDave Chinner out_release_inode: 1318c24b5dfaSDave Chinner /* 1319c24b5dfaSDave Chinner * Wait until after the current transaction is aborted to 1320c24b5dfaSDave Chinner * release the inode. This prevents recursive transactions 1321c24b5dfaSDave Chinner * and deadlocks from xfs_inactive. 1322c24b5dfaSDave Chinner */ 1323c24b5dfaSDave Chinner if (ip) 1324c24b5dfaSDave Chinner IRELE(ip); 1325c24b5dfaSDave Chinner 1326c24b5dfaSDave Chinner xfs_qm_dqrele(udqp); 1327c24b5dfaSDave Chinner xfs_qm_dqrele(gdqp); 1328c24b5dfaSDave Chinner xfs_qm_dqrele(pdqp); 1329c24b5dfaSDave Chinner 1330c24b5dfaSDave Chinner if (unlock_dp_on_error) 1331c24b5dfaSDave Chinner xfs_iunlock(dp, XFS_ILOCK_EXCL); 1332c24b5dfaSDave Chinner return error; 1333c24b5dfaSDave Chinner } 1334c24b5dfaSDave Chinner 1335c24b5dfaSDave Chinner int 1336c24b5dfaSDave Chinner xfs_link( 1337c24b5dfaSDave Chinner xfs_inode_t *tdp, 1338c24b5dfaSDave Chinner xfs_inode_t *sip, 1339c24b5dfaSDave Chinner struct xfs_name *target_name) 1340c24b5dfaSDave Chinner { 1341c24b5dfaSDave Chinner xfs_mount_t *mp = tdp->i_mount; 1342c24b5dfaSDave Chinner xfs_trans_t *tp; 1343c24b5dfaSDave Chinner int error; 1344c24b5dfaSDave Chinner xfs_bmap_free_t free_list; 1345c24b5dfaSDave Chinner xfs_fsblock_t first_block; 1346c24b5dfaSDave Chinner int cancel_flags; 1347c24b5dfaSDave Chinner int committed; 1348c24b5dfaSDave Chinner int resblks; 1349c24b5dfaSDave Chinner 1350c24b5dfaSDave Chinner trace_xfs_link(tdp, target_name); 1351c24b5dfaSDave Chinner 1352c24b5dfaSDave Chinner ASSERT(!S_ISDIR(sip->i_d.di_mode)); 1353c24b5dfaSDave Chinner 1354c24b5dfaSDave Chinner if (XFS_FORCED_SHUTDOWN(mp)) 1355c24b5dfaSDave Chinner return XFS_ERROR(EIO); 1356c24b5dfaSDave Chinner 1357c24b5dfaSDave Chinner error = xfs_qm_dqattach(sip, 0); 1358c24b5dfaSDave Chinner if (error) 1359c24b5dfaSDave Chinner goto std_return; 1360c24b5dfaSDave Chinner 1361c24b5dfaSDave Chinner error = xfs_qm_dqattach(tdp, 0); 1362c24b5dfaSDave Chinner if (error) 1363c24b5dfaSDave Chinner goto std_return; 1364c24b5dfaSDave Chinner 1365c24b5dfaSDave Chinner tp = xfs_trans_alloc(mp, XFS_TRANS_LINK); 1366c24b5dfaSDave Chinner cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1367c24b5dfaSDave Chinner resblks = XFS_LINK_SPACE_RES(mp, target_name->len); 13683d3c8b52SJie Liu error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, resblks, 0); 1369c24b5dfaSDave Chinner if (error == ENOSPC) { 1370c24b5dfaSDave Chinner resblks = 0; 13713d3c8b52SJie Liu error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, 0, 0); 1372c24b5dfaSDave Chinner } 1373c24b5dfaSDave Chinner if (error) { 1374c24b5dfaSDave Chinner cancel_flags = 0; 1375c24b5dfaSDave Chinner goto error_return; 1376c24b5dfaSDave Chinner } 1377c24b5dfaSDave Chinner 1378c24b5dfaSDave Chinner xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); 1379c24b5dfaSDave Chinner 1380c24b5dfaSDave Chinner xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); 1381c24b5dfaSDave Chinner xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); 1382c24b5dfaSDave Chinner 1383c24b5dfaSDave Chinner /* 1384c24b5dfaSDave Chinner * If we are using project inheritance, we only allow hard link 1385c24b5dfaSDave Chinner * creation in our tree when the project IDs are the same; else 1386c24b5dfaSDave Chinner * the tree quota mechanism could be circumvented. 1387c24b5dfaSDave Chinner */ 1388c24b5dfaSDave Chinner if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && 1389c24b5dfaSDave Chinner (xfs_get_projid(tdp) != xfs_get_projid(sip)))) { 1390c24b5dfaSDave Chinner error = XFS_ERROR(EXDEV); 1391c24b5dfaSDave Chinner goto error_return; 1392c24b5dfaSDave Chinner } 1393c24b5dfaSDave Chinner 1394c24b5dfaSDave Chinner error = xfs_dir_canenter(tp, tdp, target_name, resblks); 1395c24b5dfaSDave Chinner if (error) 1396c24b5dfaSDave Chinner goto error_return; 1397c24b5dfaSDave Chinner 1398c24b5dfaSDave Chinner xfs_bmap_init(&free_list, &first_block); 1399c24b5dfaSDave Chinner 1400c24b5dfaSDave Chinner error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino, 1401c24b5dfaSDave Chinner &first_block, &free_list, resblks); 1402c24b5dfaSDave Chinner if (error) 1403c24b5dfaSDave Chinner goto abort_return; 1404c24b5dfaSDave Chinner xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1405c24b5dfaSDave Chinner xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); 1406c24b5dfaSDave Chinner 1407c24b5dfaSDave Chinner error = xfs_bumplink(tp, sip); 1408c24b5dfaSDave Chinner if (error) 1409c24b5dfaSDave Chinner goto abort_return; 1410c24b5dfaSDave Chinner 1411c24b5dfaSDave Chinner /* 1412c24b5dfaSDave Chinner * If this is a synchronous mount, make sure that the 1413c24b5dfaSDave Chinner * link transaction goes to disk before returning to 1414c24b5dfaSDave Chinner * the user. 1415c24b5dfaSDave Chinner */ 1416c24b5dfaSDave Chinner if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 1417c24b5dfaSDave Chinner xfs_trans_set_sync(tp); 1418c24b5dfaSDave Chinner } 1419c24b5dfaSDave Chinner 1420c24b5dfaSDave Chinner error = xfs_bmap_finish (&tp, &free_list, &committed); 1421c24b5dfaSDave Chinner if (error) { 1422c24b5dfaSDave Chinner xfs_bmap_cancel(&free_list); 1423c24b5dfaSDave Chinner goto abort_return; 1424c24b5dfaSDave Chinner } 1425c24b5dfaSDave Chinner 1426c24b5dfaSDave Chinner return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1427c24b5dfaSDave Chinner 1428c24b5dfaSDave Chinner abort_return: 1429c24b5dfaSDave Chinner cancel_flags |= XFS_TRANS_ABORT; 1430c24b5dfaSDave Chinner error_return: 1431c24b5dfaSDave Chinner xfs_trans_cancel(tp, cancel_flags); 1432c24b5dfaSDave Chinner std_return: 1433c24b5dfaSDave Chinner return error; 1434c24b5dfaSDave Chinner } 1435c24b5dfaSDave Chinner 14361da177e4SLinus Torvalds /* 14378f04c47aSChristoph Hellwig * Free up the underlying blocks past new_size. The new size must be smaller 14388f04c47aSChristoph Hellwig * than the current size. This routine can be used both for the attribute and 14398f04c47aSChristoph Hellwig * data fork, and does not modify the inode size, which is left to the caller. 14401da177e4SLinus Torvalds * 1441f6485057SDavid Chinner * The transaction passed to this routine must have made a permanent log 1442f6485057SDavid Chinner * reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the 1443f6485057SDavid Chinner * given transaction and start new ones, so make sure everything involved in 1444f6485057SDavid Chinner * the transaction is tidy before calling here. Some transaction will be 1445f6485057SDavid Chinner * returned to the caller to be committed. The incoming transaction must 1446f6485057SDavid Chinner * already include the inode, and both inode locks must be held exclusively. 1447f6485057SDavid Chinner * The inode must also be "held" within the transaction. On return the inode 1448f6485057SDavid Chinner * will be "held" within the returned transaction. This routine does NOT 1449f6485057SDavid Chinner * require any disk space to be reserved for it within the transaction. 14501da177e4SLinus Torvalds * 1451f6485057SDavid Chinner * If we get an error, we must return with the inode locked and linked into the 1452f6485057SDavid Chinner * current transaction. This keeps things simple for the higher level code, 1453f6485057SDavid Chinner * because it always knows that the inode is locked and held in the transaction 1454f6485057SDavid Chinner * that returns to it whether errors occur or not. We don't mark the inode 1455f6485057SDavid Chinner * dirty on error so that transactions can be easily aborted if possible. 14561da177e4SLinus Torvalds */ 14571da177e4SLinus Torvalds int 14588f04c47aSChristoph Hellwig xfs_itruncate_extents( 14598f04c47aSChristoph Hellwig struct xfs_trans **tpp, 14608f04c47aSChristoph Hellwig struct xfs_inode *ip, 14618f04c47aSChristoph Hellwig int whichfork, 14628f04c47aSChristoph Hellwig xfs_fsize_t new_size) 14631da177e4SLinus Torvalds { 14648f04c47aSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 14658f04c47aSChristoph Hellwig struct xfs_trans *tp = *tpp; 14668f04c47aSChristoph Hellwig struct xfs_trans *ntp; 14678f04c47aSChristoph Hellwig xfs_bmap_free_t free_list; 14681da177e4SLinus Torvalds xfs_fsblock_t first_block; 14691da177e4SLinus Torvalds xfs_fileoff_t first_unmap_block; 14701da177e4SLinus Torvalds xfs_fileoff_t last_block; 14718f04c47aSChristoph Hellwig xfs_filblks_t unmap_len; 14721da177e4SLinus Torvalds int committed; 14738f04c47aSChristoph Hellwig int error = 0; 14748f04c47aSChristoph Hellwig int done = 0; 14751da177e4SLinus Torvalds 14760b56185bSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 14770b56185bSChristoph Hellwig ASSERT(!atomic_read(&VFS_I(ip)->i_count) || 14780b56185bSChristoph Hellwig xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 1479ce7ae151SChristoph Hellwig ASSERT(new_size <= XFS_ISIZE(ip)); 14808f04c47aSChristoph Hellwig ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); 14811da177e4SLinus Torvalds ASSERT(ip->i_itemp != NULL); 1482898621d5SChristoph Hellwig ASSERT(ip->i_itemp->ili_lock_flags == 0); 14831da177e4SLinus Torvalds ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); 14841da177e4SLinus Torvalds 1485673e8e59SChristoph Hellwig trace_xfs_itruncate_extents_start(ip, new_size); 1486673e8e59SChristoph Hellwig 14871da177e4SLinus Torvalds /* 14881da177e4SLinus Torvalds * Since it is possible for space to become allocated beyond 14891da177e4SLinus Torvalds * the end of the file (in a crash where the space is allocated 14901da177e4SLinus Torvalds * but the inode size is not yet updated), simply remove any 14911da177e4SLinus Torvalds * blocks which show up between the new EOF and the maximum 14921da177e4SLinus Torvalds * possible file size. If the first block to be removed is 14931da177e4SLinus Torvalds * beyond the maximum file size (ie it is the same as last_block), 14941da177e4SLinus Torvalds * then there is nothing to do. 14951da177e4SLinus Torvalds */ 14968f04c47aSChristoph Hellwig first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); 149732972383SDave Chinner last_block = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); 14988f04c47aSChristoph Hellwig if (first_unmap_block == last_block) 14998f04c47aSChristoph Hellwig return 0; 15008f04c47aSChristoph Hellwig 15018f04c47aSChristoph Hellwig ASSERT(first_unmap_block < last_block); 15021da177e4SLinus Torvalds unmap_len = last_block - first_unmap_block + 1; 15031da177e4SLinus Torvalds while (!done) { 15049d87c319SEric Sandeen xfs_bmap_init(&free_list, &first_block); 15058f04c47aSChristoph Hellwig error = xfs_bunmapi(tp, ip, 15063e57ecf6SOlaf Weber first_unmap_block, unmap_len, 15078f04c47aSChristoph Hellwig xfs_bmapi_aflag(whichfork), 15081da177e4SLinus Torvalds XFS_ITRUNC_MAX_EXTENTS, 15093e57ecf6SOlaf Weber &first_block, &free_list, 1510b4e9181eSChristoph Hellwig &done); 15118f04c47aSChristoph Hellwig if (error) 15128f04c47aSChristoph Hellwig goto out_bmap_cancel; 15131da177e4SLinus Torvalds 15141da177e4SLinus Torvalds /* 15151da177e4SLinus Torvalds * Duplicate the transaction that has the permanent 15161da177e4SLinus Torvalds * reservation and commit the old transaction. 15171da177e4SLinus Torvalds */ 15188f04c47aSChristoph Hellwig error = xfs_bmap_finish(&tp, &free_list, &committed); 1519898621d5SChristoph Hellwig if (committed) 1520ddc3415aSChristoph Hellwig xfs_trans_ijoin(tp, ip, 0); 15218f04c47aSChristoph Hellwig if (error) 15228f04c47aSChristoph Hellwig goto out_bmap_cancel; 15231da177e4SLinus Torvalds 15241da177e4SLinus Torvalds if (committed) { 15251da177e4SLinus Torvalds /* 1526f6485057SDavid Chinner * Mark the inode dirty so it will be logged and 1527e5720eecSDavid Chinner * moved forward in the log as part of every commit. 15281da177e4SLinus Torvalds */ 15298f04c47aSChristoph Hellwig xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 15301da177e4SLinus Torvalds } 1531f6485057SDavid Chinner 15328f04c47aSChristoph Hellwig ntp = xfs_trans_dup(tp); 15338f04c47aSChristoph Hellwig error = xfs_trans_commit(tp, 0); 15348f04c47aSChristoph Hellwig tp = ntp; 1535f6485057SDavid Chinner 1536ddc3415aSChristoph Hellwig xfs_trans_ijoin(tp, ip, 0); 1537f6485057SDavid Chinner 1538cc09c0dcSDave Chinner if (error) 15398f04c47aSChristoph Hellwig goto out; 15408f04c47aSChristoph Hellwig 1541cc09c0dcSDave Chinner /* 15428f04c47aSChristoph Hellwig * Transaction commit worked ok so we can drop the extra ticket 1543cc09c0dcSDave Chinner * reference that we gained in xfs_trans_dup() 1544cc09c0dcSDave Chinner */ 15458f04c47aSChristoph Hellwig xfs_log_ticket_put(tp->t_ticket); 15463d3c8b52SJie Liu error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); 15471da177e4SLinus Torvalds if (error) 15488f04c47aSChristoph Hellwig goto out; 15491da177e4SLinus Torvalds } 15508f04c47aSChristoph Hellwig 1551673e8e59SChristoph Hellwig /* 1552673e8e59SChristoph Hellwig * Always re-log the inode so that our permanent transaction can keep 1553673e8e59SChristoph Hellwig * on rolling it forward in the log. 1554673e8e59SChristoph Hellwig */ 1555673e8e59SChristoph Hellwig xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1556673e8e59SChristoph Hellwig 1557673e8e59SChristoph Hellwig trace_xfs_itruncate_extents_end(ip, new_size); 1558673e8e59SChristoph Hellwig 15598f04c47aSChristoph Hellwig out: 15608f04c47aSChristoph Hellwig *tpp = tp; 15618f04c47aSChristoph Hellwig return error; 15628f04c47aSChristoph Hellwig out_bmap_cancel: 15631da177e4SLinus Torvalds /* 15648f04c47aSChristoph Hellwig * If the bunmapi call encounters an error, return to the caller where 15658f04c47aSChristoph Hellwig * the transaction can be properly aborted. We just need to make sure 15668f04c47aSChristoph Hellwig * we're not holding any resources that we were not when we came in. 15671da177e4SLinus Torvalds */ 15688f04c47aSChristoph Hellwig xfs_bmap_cancel(&free_list); 15698f04c47aSChristoph Hellwig goto out; 15708f04c47aSChristoph Hellwig } 15718f04c47aSChristoph Hellwig 1572c24b5dfaSDave Chinner int 1573c24b5dfaSDave Chinner xfs_release( 1574c24b5dfaSDave Chinner xfs_inode_t *ip) 1575c24b5dfaSDave Chinner { 1576c24b5dfaSDave Chinner xfs_mount_t *mp = ip->i_mount; 1577c24b5dfaSDave Chinner int error; 1578c24b5dfaSDave Chinner 1579c24b5dfaSDave Chinner if (!S_ISREG(ip->i_d.di_mode) || (ip->i_d.di_mode == 0)) 1580c24b5dfaSDave Chinner return 0; 1581c24b5dfaSDave Chinner 1582c24b5dfaSDave Chinner /* If this is a read-only mount, don't do this (would generate I/O) */ 1583c24b5dfaSDave Chinner if (mp->m_flags & XFS_MOUNT_RDONLY) 1584c24b5dfaSDave Chinner return 0; 1585c24b5dfaSDave Chinner 1586c24b5dfaSDave Chinner if (!XFS_FORCED_SHUTDOWN(mp)) { 1587c24b5dfaSDave Chinner int truncated; 1588c24b5dfaSDave Chinner 1589c24b5dfaSDave Chinner /* 1590c24b5dfaSDave Chinner * If we are using filestreams, and we have an unlinked 1591c24b5dfaSDave Chinner * file that we are processing the last close on, then nothing 1592c24b5dfaSDave Chinner * will be able to reopen and write to this file. Purge this 1593c24b5dfaSDave Chinner * inode from the filestreams cache so that it doesn't delay 1594c24b5dfaSDave Chinner * teardown of the inode. 1595c24b5dfaSDave Chinner */ 1596c24b5dfaSDave Chinner if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip)) 1597c24b5dfaSDave Chinner xfs_filestream_deassociate(ip); 1598c24b5dfaSDave Chinner 1599c24b5dfaSDave Chinner /* 1600c24b5dfaSDave Chinner * If we previously truncated this file and removed old data 1601c24b5dfaSDave Chinner * in the process, we want to initiate "early" writeout on 1602c24b5dfaSDave Chinner * the last close. This is an attempt to combat the notorious 1603c24b5dfaSDave Chinner * NULL files problem which is particularly noticeable from a 1604c24b5dfaSDave Chinner * truncate down, buffered (re-)write (delalloc), followed by 1605c24b5dfaSDave Chinner * a crash. What we are effectively doing here is 1606c24b5dfaSDave Chinner * significantly reducing the time window where we'd otherwise 1607c24b5dfaSDave Chinner * be exposed to that problem. 1608c24b5dfaSDave Chinner */ 1609c24b5dfaSDave Chinner truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); 1610c24b5dfaSDave Chinner if (truncated) { 1611c24b5dfaSDave Chinner xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE); 1612c24b5dfaSDave Chinner if (VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) { 1613c24b5dfaSDave Chinner error = -filemap_flush(VFS_I(ip)->i_mapping); 1614c24b5dfaSDave Chinner if (error) 1615c24b5dfaSDave Chinner return error; 1616c24b5dfaSDave Chinner } 1617c24b5dfaSDave Chinner } 1618c24b5dfaSDave Chinner } 1619c24b5dfaSDave Chinner 1620c24b5dfaSDave Chinner if (ip->i_d.di_nlink == 0) 1621c24b5dfaSDave Chinner return 0; 1622c24b5dfaSDave Chinner 1623c24b5dfaSDave Chinner if (xfs_can_free_eofblocks(ip, false)) { 1624c24b5dfaSDave Chinner 1625c24b5dfaSDave Chinner /* 1626c24b5dfaSDave Chinner * If we can't get the iolock just skip truncating the blocks 1627c24b5dfaSDave Chinner * past EOF because we could deadlock with the mmap_sem 1628c24b5dfaSDave Chinner * otherwise. We'll get another chance to drop them once the 1629c24b5dfaSDave Chinner * last reference to the inode is dropped, so we'll never leak 1630c24b5dfaSDave Chinner * blocks permanently. 1631c24b5dfaSDave Chinner * 1632c24b5dfaSDave Chinner * Further, check if the inode is being opened, written and 1633c24b5dfaSDave Chinner * closed frequently and we have delayed allocation blocks 1634c24b5dfaSDave Chinner * outstanding (e.g. streaming writes from the NFS server), 1635c24b5dfaSDave Chinner * truncating the blocks past EOF will cause fragmentation to 1636c24b5dfaSDave Chinner * occur. 1637c24b5dfaSDave Chinner * 1638c24b5dfaSDave Chinner * In this case don't do the truncation, either, but we have to 1639c24b5dfaSDave Chinner * be careful how we detect this case. Blocks beyond EOF show 1640c24b5dfaSDave Chinner * up as i_delayed_blks even when the inode is clean, so we 1641c24b5dfaSDave Chinner * need to truncate them away first before checking for a dirty 1642c24b5dfaSDave Chinner * release. Hence on the first dirty close we will still remove 1643c24b5dfaSDave Chinner * the speculative allocation, but after that we will leave it 1644c24b5dfaSDave Chinner * in place. 1645c24b5dfaSDave Chinner */ 1646c24b5dfaSDave Chinner if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) 1647c24b5dfaSDave Chinner return 0; 1648c24b5dfaSDave Chinner 1649c24b5dfaSDave Chinner error = xfs_free_eofblocks(mp, ip, true); 1650c24b5dfaSDave Chinner if (error && error != EAGAIN) 1651c24b5dfaSDave Chinner return error; 1652c24b5dfaSDave Chinner 1653c24b5dfaSDave Chinner /* delalloc blocks after truncation means it really is dirty */ 1654c24b5dfaSDave Chinner if (ip->i_delayed_blks) 1655c24b5dfaSDave Chinner xfs_iflags_set(ip, XFS_IDIRTY_RELEASE); 1656c24b5dfaSDave Chinner } 1657c24b5dfaSDave Chinner return 0; 1658c24b5dfaSDave Chinner } 1659c24b5dfaSDave Chinner 1660c24b5dfaSDave Chinner /* 1661f7be2d7fSBrian Foster * xfs_inactive_truncate 1662f7be2d7fSBrian Foster * 1663f7be2d7fSBrian Foster * Called to perform a truncate when an inode becomes unlinked. 1664f7be2d7fSBrian Foster */ 1665f7be2d7fSBrian Foster STATIC int 1666f7be2d7fSBrian Foster xfs_inactive_truncate( 1667f7be2d7fSBrian Foster struct xfs_inode *ip) 1668f7be2d7fSBrian Foster { 1669f7be2d7fSBrian Foster struct xfs_mount *mp = ip->i_mount; 1670f7be2d7fSBrian Foster struct xfs_trans *tp; 1671f7be2d7fSBrian Foster int error; 1672f7be2d7fSBrian Foster 1673f7be2d7fSBrian Foster tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 1674f7be2d7fSBrian Foster error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); 1675f7be2d7fSBrian Foster if (error) { 1676f7be2d7fSBrian Foster ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1677f7be2d7fSBrian Foster xfs_trans_cancel(tp, 0); 1678f7be2d7fSBrian Foster return error; 1679f7be2d7fSBrian Foster } 1680f7be2d7fSBrian Foster 1681f7be2d7fSBrian Foster xfs_ilock(ip, XFS_ILOCK_EXCL); 1682f7be2d7fSBrian Foster xfs_trans_ijoin(tp, ip, 0); 1683f7be2d7fSBrian Foster 1684f7be2d7fSBrian Foster /* 1685f7be2d7fSBrian Foster * Log the inode size first to prevent stale data exposure in the event 1686f7be2d7fSBrian Foster * of a system crash before the truncate completes. See the related 1687f7be2d7fSBrian Foster * comment in xfs_setattr_size() for details. 1688f7be2d7fSBrian Foster */ 1689f7be2d7fSBrian Foster ip->i_d.di_size = 0; 1690f7be2d7fSBrian Foster xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1691f7be2d7fSBrian Foster 1692f7be2d7fSBrian Foster error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0); 1693f7be2d7fSBrian Foster if (error) 1694f7be2d7fSBrian Foster goto error_trans_cancel; 1695f7be2d7fSBrian Foster 1696f7be2d7fSBrian Foster ASSERT(ip->i_d.di_nextents == 0); 1697f7be2d7fSBrian Foster 1698f7be2d7fSBrian Foster error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1699f7be2d7fSBrian Foster if (error) 1700f7be2d7fSBrian Foster goto error_unlock; 1701f7be2d7fSBrian Foster 1702f7be2d7fSBrian Foster xfs_iunlock(ip, XFS_ILOCK_EXCL); 1703f7be2d7fSBrian Foster return 0; 1704f7be2d7fSBrian Foster 1705f7be2d7fSBrian Foster error_trans_cancel: 1706f7be2d7fSBrian Foster xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1707f7be2d7fSBrian Foster error_unlock: 1708f7be2d7fSBrian Foster xfs_iunlock(ip, XFS_ILOCK_EXCL); 1709f7be2d7fSBrian Foster return error; 1710f7be2d7fSBrian Foster } 1711f7be2d7fSBrian Foster 1712f7be2d7fSBrian Foster /* 171388877d2bSBrian Foster * xfs_inactive_ifree() 171488877d2bSBrian Foster * 171588877d2bSBrian Foster * Perform the inode free when an inode is unlinked. 171688877d2bSBrian Foster */ 171788877d2bSBrian Foster STATIC int 171888877d2bSBrian Foster xfs_inactive_ifree( 171988877d2bSBrian Foster struct xfs_inode *ip) 172088877d2bSBrian Foster { 172188877d2bSBrian Foster xfs_bmap_free_t free_list; 172288877d2bSBrian Foster xfs_fsblock_t first_block; 172388877d2bSBrian Foster int committed; 172488877d2bSBrian Foster struct xfs_mount *mp = ip->i_mount; 172588877d2bSBrian Foster struct xfs_trans *tp; 172688877d2bSBrian Foster int error; 172788877d2bSBrian Foster 172888877d2bSBrian Foster tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 172988877d2bSBrian Foster error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, 0, 0); 173088877d2bSBrian Foster if (error) { 173188877d2bSBrian Foster ASSERT(XFS_FORCED_SHUTDOWN(mp)); 173288877d2bSBrian Foster xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES); 173388877d2bSBrian Foster return error; 173488877d2bSBrian Foster } 173588877d2bSBrian Foster 173688877d2bSBrian Foster xfs_ilock(ip, XFS_ILOCK_EXCL); 173788877d2bSBrian Foster xfs_trans_ijoin(tp, ip, 0); 173888877d2bSBrian Foster 173988877d2bSBrian Foster xfs_bmap_init(&free_list, &first_block); 174088877d2bSBrian Foster error = xfs_ifree(tp, ip, &free_list); 174188877d2bSBrian Foster if (error) { 174288877d2bSBrian Foster /* 174388877d2bSBrian Foster * If we fail to free the inode, shut down. The cancel 174488877d2bSBrian Foster * might do that, we need to make sure. Otherwise the 174588877d2bSBrian Foster * inode might be lost for a long time or forever. 174688877d2bSBrian Foster */ 174788877d2bSBrian Foster if (!XFS_FORCED_SHUTDOWN(mp)) { 174888877d2bSBrian Foster xfs_notice(mp, "%s: xfs_ifree returned error %d", 174988877d2bSBrian Foster __func__, error); 175088877d2bSBrian Foster xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 175188877d2bSBrian Foster } 175288877d2bSBrian Foster xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); 175388877d2bSBrian Foster xfs_iunlock(ip, XFS_ILOCK_EXCL); 175488877d2bSBrian Foster return error; 175588877d2bSBrian Foster } 175688877d2bSBrian Foster 175788877d2bSBrian Foster /* 175888877d2bSBrian Foster * Credit the quota account(s). The inode is gone. 175988877d2bSBrian Foster */ 176088877d2bSBrian Foster xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1); 176188877d2bSBrian Foster 176288877d2bSBrian Foster /* 176388877d2bSBrian Foster * Just ignore errors at this point. There is nothing we can 176488877d2bSBrian Foster * do except to try to keep going. Make sure it's not a silent 176588877d2bSBrian Foster * error. 176688877d2bSBrian Foster */ 176788877d2bSBrian Foster error = xfs_bmap_finish(&tp, &free_list, &committed); 176888877d2bSBrian Foster if (error) 176988877d2bSBrian Foster xfs_notice(mp, "%s: xfs_bmap_finish returned error %d", 177088877d2bSBrian Foster __func__, error); 177188877d2bSBrian Foster error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 177288877d2bSBrian Foster if (error) 177388877d2bSBrian Foster xfs_notice(mp, "%s: xfs_trans_commit returned error %d", 177488877d2bSBrian Foster __func__, error); 177588877d2bSBrian Foster 177688877d2bSBrian Foster xfs_iunlock(ip, XFS_ILOCK_EXCL); 177788877d2bSBrian Foster return 0; 177888877d2bSBrian Foster } 177988877d2bSBrian Foster 178088877d2bSBrian Foster /* 1781c24b5dfaSDave Chinner * xfs_inactive 1782c24b5dfaSDave Chinner * 1783c24b5dfaSDave Chinner * This is called when the vnode reference count for the vnode 1784c24b5dfaSDave Chinner * goes to zero. If the file has been unlinked, then it must 1785c24b5dfaSDave Chinner * now be truncated. Also, we clear all of the read-ahead state 1786c24b5dfaSDave Chinner * kept for the inode here since the file is now closed. 1787c24b5dfaSDave Chinner */ 178874564fb4SBrian Foster void 1789c24b5dfaSDave Chinner xfs_inactive( 1790c24b5dfaSDave Chinner xfs_inode_t *ip) 1791c24b5dfaSDave Chinner { 17923d3c8b52SJie Liu struct xfs_mount *mp; 1793c24b5dfaSDave Chinner int error; 1794c24b5dfaSDave Chinner int truncate = 0; 1795c24b5dfaSDave Chinner 1796c24b5dfaSDave Chinner /* 1797c24b5dfaSDave Chinner * If the inode is already free, then there can be nothing 1798c24b5dfaSDave Chinner * to clean up here. 1799c24b5dfaSDave Chinner */ 1800d948709bSBen Myers if (ip->i_d.di_mode == 0) { 1801c24b5dfaSDave Chinner ASSERT(ip->i_df.if_real_bytes == 0); 1802c24b5dfaSDave Chinner ASSERT(ip->i_df.if_broot_bytes == 0); 180374564fb4SBrian Foster return; 1804c24b5dfaSDave Chinner } 1805c24b5dfaSDave Chinner 1806c24b5dfaSDave Chinner mp = ip->i_mount; 1807c24b5dfaSDave Chinner 1808c24b5dfaSDave Chinner /* If this is a read-only mount, don't do this (would generate I/O) */ 1809c24b5dfaSDave Chinner if (mp->m_flags & XFS_MOUNT_RDONLY) 181074564fb4SBrian Foster return; 1811c24b5dfaSDave Chinner 1812c24b5dfaSDave Chinner if (ip->i_d.di_nlink != 0) { 1813c24b5dfaSDave Chinner /* 1814c24b5dfaSDave Chinner * force is true because we are evicting an inode from the 1815c24b5dfaSDave Chinner * cache. Post-eof blocks must be freed, lest we end up with 1816c24b5dfaSDave Chinner * broken free space accounting. 1817c24b5dfaSDave Chinner */ 181874564fb4SBrian Foster if (xfs_can_free_eofblocks(ip, true)) 181974564fb4SBrian Foster xfs_free_eofblocks(mp, ip, false); 182074564fb4SBrian Foster 182174564fb4SBrian Foster return; 1822c24b5dfaSDave Chinner } 1823c24b5dfaSDave Chinner 1824c24b5dfaSDave Chinner if (S_ISREG(ip->i_d.di_mode) && 1825c24b5dfaSDave Chinner (ip->i_d.di_size != 0 || XFS_ISIZE(ip) != 0 || 1826c24b5dfaSDave Chinner ip->i_d.di_nextents > 0 || ip->i_delayed_blks > 0)) 1827c24b5dfaSDave Chinner truncate = 1; 1828c24b5dfaSDave Chinner 1829c24b5dfaSDave Chinner error = xfs_qm_dqattach(ip, 0); 1830c24b5dfaSDave Chinner if (error) 183174564fb4SBrian Foster return; 1832c24b5dfaSDave Chinner 1833f7be2d7fSBrian Foster if (S_ISLNK(ip->i_d.di_mode)) 183436b21ddeSBrian Foster error = xfs_inactive_symlink(ip); 1835f7be2d7fSBrian Foster else if (truncate) 1836f7be2d7fSBrian Foster error = xfs_inactive_truncate(ip); 183736b21ddeSBrian Foster if (error) 183874564fb4SBrian Foster return; 1839c24b5dfaSDave Chinner 1840c24b5dfaSDave Chinner /* 1841c24b5dfaSDave Chinner * If there are attributes associated with the file then blow them away 1842c24b5dfaSDave Chinner * now. The code calls a routine that recursively deconstructs the 1843c24b5dfaSDave Chinner * attribute fork. We need to just commit the current transaction 1844c24b5dfaSDave Chinner * because we can't use it for xfs_attr_inactive(). 1845c24b5dfaSDave Chinner */ 1846c24b5dfaSDave Chinner if (ip->i_d.di_anextents > 0) { 1847c24b5dfaSDave Chinner ASSERT(ip->i_d.di_forkoff != 0); 1848c24b5dfaSDave Chinner 1849c24b5dfaSDave Chinner error = xfs_attr_inactive(ip); 1850c24b5dfaSDave Chinner if (error) 185174564fb4SBrian Foster return; 1852c24b5dfaSDave Chinner } 1853c24b5dfaSDave Chinner 1854c24b5dfaSDave Chinner if (ip->i_afp) 1855c24b5dfaSDave Chinner xfs_idestroy_fork(ip, XFS_ATTR_FORK); 1856c24b5dfaSDave Chinner 1857c24b5dfaSDave Chinner ASSERT(ip->i_d.di_anextents == 0); 1858c24b5dfaSDave Chinner 1859c24b5dfaSDave Chinner /* 1860c24b5dfaSDave Chinner * Free the inode. 1861c24b5dfaSDave Chinner */ 186288877d2bSBrian Foster error = xfs_inactive_ifree(ip); 1863c24b5dfaSDave Chinner if (error) 186474564fb4SBrian Foster return; 1865c24b5dfaSDave Chinner 1866c24b5dfaSDave Chinner /* 1867c24b5dfaSDave Chinner * Release the dquots held by inode, if any. 1868c24b5dfaSDave Chinner */ 1869c24b5dfaSDave Chinner xfs_qm_dqdetach(ip); 1870c24b5dfaSDave Chinner } 1871c24b5dfaSDave Chinner 18721da177e4SLinus Torvalds /* 18731da177e4SLinus Torvalds * This is called when the inode's link count goes to 0. 18741da177e4SLinus Torvalds * We place the on-disk inode on a list in the AGI. It 18751da177e4SLinus Torvalds * will be pulled from this list when the inode is freed. 18761da177e4SLinus Torvalds */ 18771da177e4SLinus Torvalds int 18781da177e4SLinus Torvalds xfs_iunlink( 18791da177e4SLinus Torvalds xfs_trans_t *tp, 18801da177e4SLinus Torvalds xfs_inode_t *ip) 18811da177e4SLinus Torvalds { 18821da177e4SLinus Torvalds xfs_mount_t *mp; 18831da177e4SLinus Torvalds xfs_agi_t *agi; 18841da177e4SLinus Torvalds xfs_dinode_t *dip; 18851da177e4SLinus Torvalds xfs_buf_t *agibp; 18861da177e4SLinus Torvalds xfs_buf_t *ibp; 18871da177e4SLinus Torvalds xfs_agino_t agino; 18881da177e4SLinus Torvalds short bucket_index; 18891da177e4SLinus Torvalds int offset; 18901da177e4SLinus Torvalds int error; 18911da177e4SLinus Torvalds 18921da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink == 0); 18931da177e4SLinus Torvalds ASSERT(ip->i_d.di_mode != 0); 18941da177e4SLinus Torvalds 18951da177e4SLinus Torvalds mp = tp->t_mountp; 18961da177e4SLinus Torvalds 18971da177e4SLinus Torvalds /* 18981da177e4SLinus Torvalds * Get the agi buffer first. It ensures lock ordering 18991da177e4SLinus Torvalds * on the list. 19001da177e4SLinus Torvalds */ 19015e1be0fbSChristoph Hellwig error = xfs_read_agi(mp, tp, XFS_INO_TO_AGNO(mp, ip->i_ino), &agibp); 1902859d7182SVlad Apostolov if (error) 19031da177e4SLinus Torvalds return error; 19041da177e4SLinus Torvalds agi = XFS_BUF_TO_AGI(agibp); 19055e1be0fbSChristoph Hellwig 19061da177e4SLinus Torvalds /* 19071da177e4SLinus Torvalds * Get the index into the agi hash table for the 19081da177e4SLinus Torvalds * list this inode will go on. 19091da177e4SLinus Torvalds */ 19101da177e4SLinus Torvalds agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 19111da177e4SLinus Torvalds ASSERT(agino != 0); 19121da177e4SLinus Torvalds bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 19131da177e4SLinus Torvalds ASSERT(agi->agi_unlinked[bucket_index]); 191416259e7dSChristoph Hellwig ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino); 19151da177e4SLinus Torvalds 191669ef921bSChristoph Hellwig if (agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)) { 19171da177e4SLinus Torvalds /* 19181da177e4SLinus Torvalds * There is already another inode in the bucket we need 19191da177e4SLinus Torvalds * to add ourselves to. Add us at the front of the list. 19201da177e4SLinus Torvalds * Here we put the head pointer into our next pointer, 19211da177e4SLinus Torvalds * and then we fall through to point the head at us. 19221da177e4SLinus Torvalds */ 1923475ee413SChristoph Hellwig error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp, 1924475ee413SChristoph Hellwig 0, 0); 1925c319b58bSVlad Apostolov if (error) 1926c319b58bSVlad Apostolov return error; 1927c319b58bSVlad Apostolov 192869ef921bSChristoph Hellwig ASSERT(dip->di_next_unlinked == cpu_to_be32(NULLAGINO)); 19291da177e4SLinus Torvalds dip->di_next_unlinked = agi->agi_unlinked[bucket_index]; 193092bfc6e7SChristoph Hellwig offset = ip->i_imap.im_boffset + 19311da177e4SLinus Torvalds offsetof(xfs_dinode_t, di_next_unlinked); 19320a32c26eSDave Chinner 19330a32c26eSDave Chinner /* need to recalc the inode CRC if appropriate */ 19340a32c26eSDave Chinner xfs_dinode_calc_crc(mp, dip); 19350a32c26eSDave Chinner 19361da177e4SLinus Torvalds xfs_trans_inode_buf(tp, ibp); 19371da177e4SLinus Torvalds xfs_trans_log_buf(tp, ibp, offset, 19381da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 19391da177e4SLinus Torvalds xfs_inobp_check(mp, ibp); 19401da177e4SLinus Torvalds } 19411da177e4SLinus Torvalds 19421da177e4SLinus Torvalds /* 19431da177e4SLinus Torvalds * Point the bucket head pointer at the inode being inserted. 19441da177e4SLinus Torvalds */ 19451da177e4SLinus Torvalds ASSERT(agino != 0); 194616259e7dSChristoph Hellwig agi->agi_unlinked[bucket_index] = cpu_to_be32(agino); 19471da177e4SLinus Torvalds offset = offsetof(xfs_agi_t, agi_unlinked) + 19481da177e4SLinus Torvalds (sizeof(xfs_agino_t) * bucket_index); 19491da177e4SLinus Torvalds xfs_trans_log_buf(tp, agibp, offset, 19501da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 19511da177e4SLinus Torvalds return 0; 19521da177e4SLinus Torvalds } 19531da177e4SLinus Torvalds 19541da177e4SLinus Torvalds /* 19551da177e4SLinus Torvalds * Pull the on-disk inode from the AGI unlinked list. 19561da177e4SLinus Torvalds */ 19571da177e4SLinus Torvalds STATIC int 19581da177e4SLinus Torvalds xfs_iunlink_remove( 19591da177e4SLinus Torvalds xfs_trans_t *tp, 19601da177e4SLinus Torvalds xfs_inode_t *ip) 19611da177e4SLinus Torvalds { 19621da177e4SLinus Torvalds xfs_ino_t next_ino; 19631da177e4SLinus Torvalds xfs_mount_t *mp; 19641da177e4SLinus Torvalds xfs_agi_t *agi; 19651da177e4SLinus Torvalds xfs_dinode_t *dip; 19661da177e4SLinus Torvalds xfs_buf_t *agibp; 19671da177e4SLinus Torvalds xfs_buf_t *ibp; 19681da177e4SLinus Torvalds xfs_agnumber_t agno; 19691da177e4SLinus Torvalds xfs_agino_t agino; 19701da177e4SLinus Torvalds xfs_agino_t next_agino; 19711da177e4SLinus Torvalds xfs_buf_t *last_ibp; 19726fdf8cccSNathan Scott xfs_dinode_t *last_dip = NULL; 19731da177e4SLinus Torvalds short bucket_index; 19746fdf8cccSNathan Scott int offset, last_offset = 0; 19751da177e4SLinus Torvalds int error; 19761da177e4SLinus Torvalds 19771da177e4SLinus Torvalds mp = tp->t_mountp; 19781da177e4SLinus Torvalds agno = XFS_INO_TO_AGNO(mp, ip->i_ino); 19791da177e4SLinus Torvalds 19801da177e4SLinus Torvalds /* 19811da177e4SLinus Torvalds * Get the agi buffer first. It ensures lock ordering 19821da177e4SLinus Torvalds * on the list. 19831da177e4SLinus Torvalds */ 19845e1be0fbSChristoph Hellwig error = xfs_read_agi(mp, tp, agno, &agibp); 19855e1be0fbSChristoph Hellwig if (error) 19861da177e4SLinus Torvalds return error; 19875e1be0fbSChristoph Hellwig 19881da177e4SLinus Torvalds agi = XFS_BUF_TO_AGI(agibp); 19895e1be0fbSChristoph Hellwig 19901da177e4SLinus Torvalds /* 19911da177e4SLinus Torvalds * Get the index into the agi hash table for the 19921da177e4SLinus Torvalds * list this inode will go on. 19931da177e4SLinus Torvalds */ 19941da177e4SLinus Torvalds agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 19951da177e4SLinus Torvalds ASSERT(agino != 0); 19961da177e4SLinus Torvalds bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 199769ef921bSChristoph Hellwig ASSERT(agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)); 19981da177e4SLinus Torvalds ASSERT(agi->agi_unlinked[bucket_index]); 19991da177e4SLinus Torvalds 200016259e7dSChristoph Hellwig if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) { 20011da177e4SLinus Torvalds /* 2002475ee413SChristoph Hellwig * We're at the head of the list. Get the inode's on-disk 2003475ee413SChristoph Hellwig * buffer to see if there is anyone after us on the list. 2004475ee413SChristoph Hellwig * Only modify our next pointer if it is not already NULLAGINO. 2005475ee413SChristoph Hellwig * This saves us the overhead of dealing with the buffer when 2006475ee413SChristoph Hellwig * there is no need to change it. 20071da177e4SLinus Torvalds */ 2008475ee413SChristoph Hellwig error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp, 2009475ee413SChristoph Hellwig 0, 0); 20101da177e4SLinus Torvalds if (error) { 2011475ee413SChristoph Hellwig xfs_warn(mp, "%s: xfs_imap_to_bp returned error %d.", 20120b932cccSDave Chinner __func__, error); 20131da177e4SLinus Torvalds return error; 20141da177e4SLinus Torvalds } 2015347d1c01SChristoph Hellwig next_agino = be32_to_cpu(dip->di_next_unlinked); 20161da177e4SLinus Torvalds ASSERT(next_agino != 0); 20171da177e4SLinus Torvalds if (next_agino != NULLAGINO) { 2018347d1c01SChristoph Hellwig dip->di_next_unlinked = cpu_to_be32(NULLAGINO); 201992bfc6e7SChristoph Hellwig offset = ip->i_imap.im_boffset + 20201da177e4SLinus Torvalds offsetof(xfs_dinode_t, di_next_unlinked); 20210a32c26eSDave Chinner 20220a32c26eSDave Chinner /* need to recalc the inode CRC if appropriate */ 20230a32c26eSDave Chinner xfs_dinode_calc_crc(mp, dip); 20240a32c26eSDave Chinner 20251da177e4SLinus Torvalds xfs_trans_inode_buf(tp, ibp); 20261da177e4SLinus Torvalds xfs_trans_log_buf(tp, ibp, offset, 20271da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 20281da177e4SLinus Torvalds xfs_inobp_check(mp, ibp); 20291da177e4SLinus Torvalds } else { 20301da177e4SLinus Torvalds xfs_trans_brelse(tp, ibp); 20311da177e4SLinus Torvalds } 20321da177e4SLinus Torvalds /* 20331da177e4SLinus Torvalds * Point the bucket head pointer at the next inode. 20341da177e4SLinus Torvalds */ 20351da177e4SLinus Torvalds ASSERT(next_agino != 0); 20361da177e4SLinus Torvalds ASSERT(next_agino != agino); 203716259e7dSChristoph Hellwig agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino); 20381da177e4SLinus Torvalds offset = offsetof(xfs_agi_t, agi_unlinked) + 20391da177e4SLinus Torvalds (sizeof(xfs_agino_t) * bucket_index); 20401da177e4SLinus Torvalds xfs_trans_log_buf(tp, agibp, offset, 20411da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 20421da177e4SLinus Torvalds } else { 20431da177e4SLinus Torvalds /* 20441da177e4SLinus Torvalds * We need to search the list for the inode being freed. 20451da177e4SLinus Torvalds */ 204616259e7dSChristoph Hellwig next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); 20471da177e4SLinus Torvalds last_ibp = NULL; 20481da177e4SLinus Torvalds while (next_agino != agino) { 2049129dbc9aSChristoph Hellwig struct xfs_imap imap; 2050129dbc9aSChristoph Hellwig 2051129dbc9aSChristoph Hellwig if (last_ibp) 20521da177e4SLinus Torvalds xfs_trans_brelse(tp, last_ibp); 2053129dbc9aSChristoph Hellwig 2054129dbc9aSChristoph Hellwig imap.im_blkno = 0; 20551da177e4SLinus Torvalds next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino); 2056129dbc9aSChristoph Hellwig 2057129dbc9aSChristoph Hellwig error = xfs_imap(mp, tp, next_ino, &imap, 0); 20581da177e4SLinus Torvalds if (error) { 20590b932cccSDave Chinner xfs_warn(mp, 2060129dbc9aSChristoph Hellwig "%s: xfs_imap returned error %d.", 20610b932cccSDave Chinner __func__, error); 20621da177e4SLinus Torvalds return error; 20631da177e4SLinus Torvalds } 2064129dbc9aSChristoph Hellwig 2065129dbc9aSChristoph Hellwig error = xfs_imap_to_bp(mp, tp, &imap, &last_dip, 2066129dbc9aSChristoph Hellwig &last_ibp, 0, 0); 2067129dbc9aSChristoph Hellwig if (error) { 2068129dbc9aSChristoph Hellwig xfs_warn(mp, 2069129dbc9aSChristoph Hellwig "%s: xfs_imap_to_bp returned error %d.", 2070129dbc9aSChristoph Hellwig __func__, error); 2071129dbc9aSChristoph Hellwig return error; 2072129dbc9aSChristoph Hellwig } 2073129dbc9aSChristoph Hellwig 2074129dbc9aSChristoph Hellwig last_offset = imap.im_boffset; 2075347d1c01SChristoph Hellwig next_agino = be32_to_cpu(last_dip->di_next_unlinked); 20761da177e4SLinus Torvalds ASSERT(next_agino != NULLAGINO); 20771da177e4SLinus Torvalds ASSERT(next_agino != 0); 20781da177e4SLinus Torvalds } 2079475ee413SChristoph Hellwig 20801da177e4SLinus Torvalds /* 2081475ee413SChristoph Hellwig * Now last_ibp points to the buffer previous to us on the 2082475ee413SChristoph Hellwig * unlinked list. Pull us from the list. 20831da177e4SLinus Torvalds */ 2084475ee413SChristoph Hellwig error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp, 2085475ee413SChristoph Hellwig 0, 0); 20861da177e4SLinus Torvalds if (error) { 2087475ee413SChristoph Hellwig xfs_warn(mp, "%s: xfs_imap_to_bp(2) returned error %d.", 20880b932cccSDave Chinner __func__, error); 20891da177e4SLinus Torvalds return error; 20901da177e4SLinus Torvalds } 2091347d1c01SChristoph Hellwig next_agino = be32_to_cpu(dip->di_next_unlinked); 20921da177e4SLinus Torvalds ASSERT(next_agino != 0); 20931da177e4SLinus Torvalds ASSERT(next_agino != agino); 20941da177e4SLinus Torvalds if (next_agino != NULLAGINO) { 2095347d1c01SChristoph Hellwig dip->di_next_unlinked = cpu_to_be32(NULLAGINO); 209692bfc6e7SChristoph Hellwig offset = ip->i_imap.im_boffset + 20971da177e4SLinus Torvalds offsetof(xfs_dinode_t, di_next_unlinked); 20980a32c26eSDave Chinner 20990a32c26eSDave Chinner /* need to recalc the inode CRC if appropriate */ 21000a32c26eSDave Chinner xfs_dinode_calc_crc(mp, dip); 21010a32c26eSDave Chinner 21021da177e4SLinus Torvalds xfs_trans_inode_buf(tp, ibp); 21031da177e4SLinus Torvalds xfs_trans_log_buf(tp, ibp, offset, 21041da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 21051da177e4SLinus Torvalds xfs_inobp_check(mp, ibp); 21061da177e4SLinus Torvalds } else { 21071da177e4SLinus Torvalds xfs_trans_brelse(tp, ibp); 21081da177e4SLinus Torvalds } 21091da177e4SLinus Torvalds /* 21101da177e4SLinus Torvalds * Point the previous inode on the list to the next inode. 21111da177e4SLinus Torvalds */ 2112347d1c01SChristoph Hellwig last_dip->di_next_unlinked = cpu_to_be32(next_agino); 21131da177e4SLinus Torvalds ASSERT(next_agino != 0); 21141da177e4SLinus Torvalds offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked); 21150a32c26eSDave Chinner 21160a32c26eSDave Chinner /* need to recalc the inode CRC if appropriate */ 21170a32c26eSDave Chinner xfs_dinode_calc_crc(mp, last_dip); 21180a32c26eSDave Chinner 21191da177e4SLinus Torvalds xfs_trans_inode_buf(tp, last_ibp); 21201da177e4SLinus Torvalds xfs_trans_log_buf(tp, last_ibp, offset, 21211da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 21221da177e4SLinus Torvalds xfs_inobp_check(mp, last_ibp); 21231da177e4SLinus Torvalds } 21241da177e4SLinus Torvalds return 0; 21251da177e4SLinus Torvalds } 21261da177e4SLinus Torvalds 21275b3eed75SDave Chinner /* 21280b8182dbSZhi Yong Wu * A big issue when freeing the inode cluster is that we _cannot_ skip any 21295b3eed75SDave Chinner * inodes that are in memory - they all must be marked stale and attached to 21305b3eed75SDave Chinner * the cluster buffer. 21315b3eed75SDave Chinner */ 21322a30f36dSChandra Seetharaman STATIC int 21331da177e4SLinus Torvalds xfs_ifree_cluster( 21341da177e4SLinus Torvalds xfs_inode_t *free_ip, 21351da177e4SLinus Torvalds xfs_trans_t *tp, 21361da177e4SLinus Torvalds xfs_ino_t inum) 21371da177e4SLinus Torvalds { 21381da177e4SLinus Torvalds xfs_mount_t *mp = free_ip->i_mount; 21391da177e4SLinus Torvalds int blks_per_cluster; 21401da177e4SLinus Torvalds int nbufs; 21411da177e4SLinus Torvalds int ninodes; 21425b257b4aSDave Chinner int i, j; 21431da177e4SLinus Torvalds xfs_daddr_t blkno; 21441da177e4SLinus Torvalds xfs_buf_t *bp; 21455b257b4aSDave Chinner xfs_inode_t *ip; 21461da177e4SLinus Torvalds xfs_inode_log_item_t *iip; 21471da177e4SLinus Torvalds xfs_log_item_t *lip; 21485017e97dSDave Chinner struct xfs_perag *pag; 21491da177e4SLinus Torvalds 21505017e97dSDave Chinner pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum)); 21511da177e4SLinus Torvalds if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { 21521da177e4SLinus Torvalds blks_per_cluster = 1; 21531da177e4SLinus Torvalds ninodes = mp->m_sb.sb_inopblock; 21541da177e4SLinus Torvalds nbufs = XFS_IALLOC_BLOCKS(mp); 21551da177e4SLinus Torvalds } else { 21561da177e4SLinus Torvalds blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / 21571da177e4SLinus Torvalds mp->m_sb.sb_blocksize; 21581da177e4SLinus Torvalds ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; 21591da177e4SLinus Torvalds nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster; 21601da177e4SLinus Torvalds } 21611da177e4SLinus Torvalds 21621da177e4SLinus Torvalds for (j = 0; j < nbufs; j++, inum += ninodes) { 21631da177e4SLinus Torvalds blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), 21641da177e4SLinus Torvalds XFS_INO_TO_AGBNO(mp, inum)); 21651da177e4SLinus Torvalds 21661da177e4SLinus Torvalds /* 21675b257b4aSDave Chinner * We obtain and lock the backing buffer first in the process 21685b257b4aSDave Chinner * here, as we have to ensure that any dirty inode that we 21695b257b4aSDave Chinner * can't get the flush lock on is attached to the buffer. 21705b257b4aSDave Chinner * If we scan the in-memory inodes first, then buffer IO can 21715b257b4aSDave Chinner * complete before we get a lock on it, and hence we may fail 21725b257b4aSDave Chinner * to mark all the active inodes on the buffer stale. 21731da177e4SLinus Torvalds */ 21741da177e4SLinus Torvalds bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 2175b6aff29fSDave Chinner mp->m_bsize * blks_per_cluster, 2176b6aff29fSDave Chinner XBF_UNMAPPED); 21771da177e4SLinus Torvalds 21782a30f36dSChandra Seetharaman if (!bp) 21792a30f36dSChandra Seetharaman return ENOMEM; 2180b0f539deSDave Chinner 2181b0f539deSDave Chinner /* 2182b0f539deSDave Chinner * This buffer may not have been correctly initialised as we 2183b0f539deSDave Chinner * didn't read it from disk. That's not important because we are 2184b0f539deSDave Chinner * only using to mark the buffer as stale in the log, and to 2185b0f539deSDave Chinner * attach stale cached inodes on it. That means it will never be 2186b0f539deSDave Chinner * dispatched for IO. If it is, we want to know about it, and we 2187b0f539deSDave Chinner * want it to fail. We can acheive this by adding a write 2188b0f539deSDave Chinner * verifier to the buffer. 2189b0f539deSDave Chinner */ 21901813dd64SDave Chinner bp->b_ops = &xfs_inode_buf_ops; 2191b0f539deSDave Chinner 21925b257b4aSDave Chinner /* 21935b257b4aSDave Chinner * Walk the inodes already attached to the buffer and mark them 21945b257b4aSDave Chinner * stale. These will all have the flush locks held, so an 21955b3eed75SDave Chinner * in-memory inode walk can't lock them. By marking them all 21965b3eed75SDave Chinner * stale first, we will not attempt to lock them in the loop 21975b3eed75SDave Chinner * below as the XFS_ISTALE flag will be set. 21985b257b4aSDave Chinner */ 2199adadbeefSChristoph Hellwig lip = bp->b_fspriv; 22001da177e4SLinus Torvalds while (lip) { 22011da177e4SLinus Torvalds if (lip->li_type == XFS_LI_INODE) { 22021da177e4SLinus Torvalds iip = (xfs_inode_log_item_t *)lip; 22031da177e4SLinus Torvalds ASSERT(iip->ili_logged == 1); 2204ca30b2a7SChristoph Hellwig lip->li_cb = xfs_istale_done; 22057b2e2a31SDavid Chinner xfs_trans_ail_copy_lsn(mp->m_ail, 22067b2e2a31SDavid Chinner &iip->ili_flush_lsn, 22077b2e2a31SDavid Chinner &iip->ili_item.li_lsn); 2208e5ffd2bbSDavid Chinner xfs_iflags_set(iip->ili_inode, XFS_ISTALE); 22091da177e4SLinus Torvalds } 22101da177e4SLinus Torvalds lip = lip->li_bio_list; 22111da177e4SLinus Torvalds } 22121da177e4SLinus Torvalds 22135b3eed75SDave Chinner 22145b257b4aSDave Chinner /* 22155b257b4aSDave Chinner * For each inode in memory attempt to add it to the inode 22165b257b4aSDave Chinner * buffer and set it up for being staled on buffer IO 22175b257b4aSDave Chinner * completion. This is safe as we've locked out tail pushing 22185b257b4aSDave Chinner * and flushing by locking the buffer. 22195b257b4aSDave Chinner * 22205b257b4aSDave Chinner * We have already marked every inode that was part of a 22215b257b4aSDave Chinner * transaction stale above, which means there is no point in 22225b257b4aSDave Chinner * even trying to lock them. 22235b257b4aSDave Chinner */ 22245b257b4aSDave Chinner for (i = 0; i < ninodes; i++) { 22255b3eed75SDave Chinner retry: 22261a3e8f3dSDave Chinner rcu_read_lock(); 22275b257b4aSDave Chinner ip = radix_tree_lookup(&pag->pag_ici_root, 22285b257b4aSDave Chinner XFS_INO_TO_AGINO(mp, (inum + i))); 22291da177e4SLinus Torvalds 22301a3e8f3dSDave Chinner /* Inode not in memory, nothing to do */ 22311a3e8f3dSDave Chinner if (!ip) { 22321a3e8f3dSDave Chinner rcu_read_unlock(); 22335b257b4aSDave Chinner continue; 22345b257b4aSDave Chinner } 22355b257b4aSDave Chinner 22365b3eed75SDave Chinner /* 22371a3e8f3dSDave Chinner * because this is an RCU protected lookup, we could 22381a3e8f3dSDave Chinner * find a recently freed or even reallocated inode 22391a3e8f3dSDave Chinner * during the lookup. We need to check under the 22401a3e8f3dSDave Chinner * i_flags_lock for a valid inode here. Skip it if it 22411a3e8f3dSDave Chinner * is not valid, the wrong inode or stale. 22421a3e8f3dSDave Chinner */ 22431a3e8f3dSDave Chinner spin_lock(&ip->i_flags_lock); 22441a3e8f3dSDave Chinner if (ip->i_ino != inum + i || 22451a3e8f3dSDave Chinner __xfs_iflags_test(ip, XFS_ISTALE)) { 22461a3e8f3dSDave Chinner spin_unlock(&ip->i_flags_lock); 22471a3e8f3dSDave Chinner rcu_read_unlock(); 22481a3e8f3dSDave Chinner continue; 22491a3e8f3dSDave Chinner } 22501a3e8f3dSDave Chinner spin_unlock(&ip->i_flags_lock); 22511a3e8f3dSDave Chinner 22521a3e8f3dSDave Chinner /* 22535b3eed75SDave Chinner * Don't try to lock/unlock the current inode, but we 22545b3eed75SDave Chinner * _cannot_ skip the other inodes that we did not find 22555b3eed75SDave Chinner * in the list attached to the buffer and are not 22565b3eed75SDave Chinner * already marked stale. If we can't lock it, back off 22575b3eed75SDave Chinner * and retry. 22585b3eed75SDave Chinner */ 22595b257b4aSDave Chinner if (ip != free_ip && 22605b257b4aSDave Chinner !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { 22611a3e8f3dSDave Chinner rcu_read_unlock(); 22625b3eed75SDave Chinner delay(1); 22635b3eed75SDave Chinner goto retry; 22645b257b4aSDave Chinner } 22651a3e8f3dSDave Chinner rcu_read_unlock(); 22665b257b4aSDave Chinner 22675b3eed75SDave Chinner xfs_iflock(ip); 22685b257b4aSDave Chinner xfs_iflags_set(ip, XFS_ISTALE); 22695b257b4aSDave Chinner 22705b3eed75SDave Chinner /* 22715b3eed75SDave Chinner * we don't need to attach clean inodes or those only 22725b3eed75SDave Chinner * with unlogged changes (which we throw away, anyway). 22735b3eed75SDave Chinner */ 22745b257b4aSDave Chinner iip = ip->i_itemp; 22755b3eed75SDave Chinner if (!iip || xfs_inode_clean(ip)) { 22765b257b4aSDave Chinner ASSERT(ip != free_ip); 22771da177e4SLinus Torvalds xfs_ifunlock(ip); 22781da177e4SLinus Torvalds xfs_iunlock(ip, XFS_ILOCK_EXCL); 22791da177e4SLinus Torvalds continue; 22801da177e4SLinus Torvalds } 22811da177e4SLinus Torvalds 2282f5d8d5c4SChristoph Hellwig iip->ili_last_fields = iip->ili_fields; 2283f5d8d5c4SChristoph Hellwig iip->ili_fields = 0; 22841da177e4SLinus Torvalds iip->ili_logged = 1; 22857b2e2a31SDavid Chinner xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 22867b2e2a31SDavid Chinner &iip->ili_item.li_lsn); 22871da177e4SLinus Torvalds 2288ca30b2a7SChristoph Hellwig xfs_buf_attach_iodone(bp, xfs_istale_done, 2289ca30b2a7SChristoph Hellwig &iip->ili_item); 22905b257b4aSDave Chinner 22915b257b4aSDave Chinner if (ip != free_ip) 22921da177e4SLinus Torvalds xfs_iunlock(ip, XFS_ILOCK_EXCL); 22931da177e4SLinus Torvalds } 22941da177e4SLinus Torvalds 22951da177e4SLinus Torvalds xfs_trans_stale_inode_buf(tp, bp); 22961da177e4SLinus Torvalds xfs_trans_binval(tp, bp); 22971da177e4SLinus Torvalds } 22981da177e4SLinus Torvalds 22995017e97dSDave Chinner xfs_perag_put(pag); 23002a30f36dSChandra Seetharaman return 0; 23011da177e4SLinus Torvalds } 23021da177e4SLinus Torvalds 23031da177e4SLinus Torvalds /* 23041da177e4SLinus Torvalds * This is called to return an inode to the inode free list. 23051da177e4SLinus Torvalds * The inode should already be truncated to 0 length and have 23061da177e4SLinus Torvalds * no pages associated with it. This routine also assumes that 23071da177e4SLinus Torvalds * the inode is already a part of the transaction. 23081da177e4SLinus Torvalds * 23091da177e4SLinus Torvalds * The on-disk copy of the inode will have been added to the list 23101da177e4SLinus Torvalds * of unlinked inodes in the AGI. We need to remove the inode from 23111da177e4SLinus Torvalds * that list atomically with respect to freeing it here. 23121da177e4SLinus Torvalds */ 23131da177e4SLinus Torvalds int 23141da177e4SLinus Torvalds xfs_ifree( 23151da177e4SLinus Torvalds xfs_trans_t *tp, 23161da177e4SLinus Torvalds xfs_inode_t *ip, 23171da177e4SLinus Torvalds xfs_bmap_free_t *flist) 23181da177e4SLinus Torvalds { 23191da177e4SLinus Torvalds int error; 23201da177e4SLinus Torvalds int delete; 23211da177e4SLinus Torvalds xfs_ino_t first_ino; 23221da177e4SLinus Torvalds 2323579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 23241da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink == 0); 23251da177e4SLinus Torvalds ASSERT(ip->i_d.di_nextents == 0); 23261da177e4SLinus Torvalds ASSERT(ip->i_d.di_anextents == 0); 2327ce7ae151SChristoph Hellwig ASSERT(ip->i_d.di_size == 0 || !S_ISREG(ip->i_d.di_mode)); 23281da177e4SLinus Torvalds ASSERT(ip->i_d.di_nblocks == 0); 23291da177e4SLinus Torvalds 23301da177e4SLinus Torvalds /* 23311da177e4SLinus Torvalds * Pull the on-disk inode from the AGI unlinked list. 23321da177e4SLinus Torvalds */ 23331da177e4SLinus Torvalds error = xfs_iunlink_remove(tp, ip); 23341baaed8fSDave Chinner if (error) 23351da177e4SLinus Torvalds return error; 23361da177e4SLinus Torvalds 23371da177e4SLinus Torvalds error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino); 23381baaed8fSDave Chinner if (error) 23391da177e4SLinus Torvalds return error; 23401baaed8fSDave Chinner 23411da177e4SLinus Torvalds ip->i_d.di_mode = 0; /* mark incore inode as free */ 23421da177e4SLinus Torvalds ip->i_d.di_flags = 0; 23431da177e4SLinus Torvalds ip->i_d.di_dmevmask = 0; 23441da177e4SLinus Torvalds ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */ 23451da177e4SLinus Torvalds ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; 23461da177e4SLinus Torvalds ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 23471da177e4SLinus Torvalds /* 23481da177e4SLinus Torvalds * Bump the generation count so no one will be confused 23491da177e4SLinus Torvalds * by reincarnations of this inode. 23501da177e4SLinus Torvalds */ 23511da177e4SLinus Torvalds ip->i_d.di_gen++; 23521da177e4SLinus Torvalds xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 23531da177e4SLinus Torvalds 23541baaed8fSDave Chinner if (delete) 23552a30f36dSChandra Seetharaman error = xfs_ifree_cluster(ip, tp, first_ino); 23561da177e4SLinus Torvalds 23572a30f36dSChandra Seetharaman return error; 23581da177e4SLinus Torvalds } 23591da177e4SLinus Torvalds 23601da177e4SLinus Torvalds /* 236160ec6783SChristoph Hellwig * This is called to unpin an inode. The caller must have the inode locked 236260ec6783SChristoph Hellwig * in at least shared mode so that the buffer cannot be subsequently pinned 236360ec6783SChristoph Hellwig * once someone is waiting for it to be unpinned. 23641da177e4SLinus Torvalds */ 236560ec6783SChristoph Hellwig static void 2366f392e631SChristoph Hellwig xfs_iunpin( 236760ec6783SChristoph Hellwig struct xfs_inode *ip) 2368a3f74ffbSDavid Chinner { 2369579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2370a3f74ffbSDavid Chinner 23714aaf15d1SDave Chinner trace_xfs_inode_unpin_nowait(ip, _RET_IP_); 23724aaf15d1SDave Chinner 2373a3f74ffbSDavid Chinner /* Give the log a push to start the unpinning I/O */ 237460ec6783SChristoph Hellwig xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0); 2375a14a348bSChristoph Hellwig 2376a3f74ffbSDavid Chinner } 2377a3f74ffbSDavid Chinner 2378f392e631SChristoph Hellwig static void 2379f392e631SChristoph Hellwig __xfs_iunpin_wait( 2380f392e631SChristoph Hellwig struct xfs_inode *ip) 2381f392e631SChristoph Hellwig { 2382f392e631SChristoph Hellwig wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IPINNED_BIT); 2383f392e631SChristoph Hellwig DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IPINNED_BIT); 2384f392e631SChristoph Hellwig 2385f392e631SChristoph Hellwig xfs_iunpin(ip); 2386f392e631SChristoph Hellwig 2387f392e631SChristoph Hellwig do { 2388f392e631SChristoph Hellwig prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 2389f392e631SChristoph Hellwig if (xfs_ipincount(ip)) 2390f392e631SChristoph Hellwig io_schedule(); 2391f392e631SChristoph Hellwig } while (xfs_ipincount(ip)); 2392f392e631SChristoph Hellwig finish_wait(wq, &wait.wait); 2393f392e631SChristoph Hellwig } 2394f392e631SChristoph Hellwig 2395777df5afSDave Chinner void 23961da177e4SLinus Torvalds xfs_iunpin_wait( 239760ec6783SChristoph Hellwig struct xfs_inode *ip) 23981da177e4SLinus Torvalds { 2399f392e631SChristoph Hellwig if (xfs_ipincount(ip)) 2400f392e631SChristoph Hellwig __xfs_iunpin_wait(ip); 24011da177e4SLinus Torvalds } 24021da177e4SLinus Torvalds 240327320369SDave Chinner /* 240427320369SDave Chinner * Removing an inode from the namespace involves removing the directory entry 240527320369SDave Chinner * and dropping the link count on the inode. Removing the directory entry can 240627320369SDave Chinner * result in locking an AGF (directory blocks were freed) and removing a link 240727320369SDave Chinner * count can result in placing the inode on an unlinked list which results in 240827320369SDave Chinner * locking an AGI. 240927320369SDave Chinner * 241027320369SDave Chinner * The big problem here is that we have an ordering constraint on AGF and AGI 241127320369SDave Chinner * locking - inode allocation locks the AGI, then can allocate a new extent for 241227320369SDave Chinner * new inodes, locking the AGF after the AGI. Similarly, freeing the inode 241327320369SDave Chinner * removes the inode from the unlinked list, requiring that we lock the AGI 241427320369SDave Chinner * first, and then freeing the inode can result in an inode chunk being freed 241527320369SDave Chinner * and hence freeing disk space requiring that we lock an AGF. 241627320369SDave Chinner * 241727320369SDave Chinner * Hence the ordering that is imposed by other parts of the code is AGI before 241827320369SDave Chinner * AGF. This means we cannot remove the directory entry before we drop the inode 241927320369SDave Chinner * reference count and put it on the unlinked list as this results in a lock 242027320369SDave Chinner * order of AGF then AGI, and this can deadlock against inode allocation and 242127320369SDave Chinner * freeing. Therefore we must drop the link counts before we remove the 242227320369SDave Chinner * directory entry. 242327320369SDave Chinner * 242427320369SDave Chinner * This is still safe from a transactional point of view - it is not until we 242527320369SDave Chinner * get to xfs_bmap_finish() that we have the possibility of multiple 242627320369SDave Chinner * transactions in this operation. Hence as long as we remove the directory 242727320369SDave Chinner * entry and drop the link count in the first transaction of the remove 242827320369SDave Chinner * operation, there are no transactional constraints on the ordering here. 242927320369SDave Chinner */ 2430c24b5dfaSDave Chinner int 2431c24b5dfaSDave Chinner xfs_remove( 2432c24b5dfaSDave Chinner xfs_inode_t *dp, 2433c24b5dfaSDave Chinner struct xfs_name *name, 2434c24b5dfaSDave Chinner xfs_inode_t *ip) 2435c24b5dfaSDave Chinner { 2436c24b5dfaSDave Chinner xfs_mount_t *mp = dp->i_mount; 2437c24b5dfaSDave Chinner xfs_trans_t *tp = NULL; 2438c24b5dfaSDave Chinner int is_dir = S_ISDIR(ip->i_d.di_mode); 2439c24b5dfaSDave Chinner int error = 0; 2440c24b5dfaSDave Chinner xfs_bmap_free_t free_list; 2441c24b5dfaSDave Chinner xfs_fsblock_t first_block; 2442c24b5dfaSDave Chinner int cancel_flags; 2443c24b5dfaSDave Chinner int committed; 2444c24b5dfaSDave Chinner int link_zero; 2445c24b5dfaSDave Chinner uint resblks; 2446c24b5dfaSDave Chinner uint log_count; 2447c24b5dfaSDave Chinner 2448c24b5dfaSDave Chinner trace_xfs_remove(dp, name); 2449c24b5dfaSDave Chinner 2450c24b5dfaSDave Chinner if (XFS_FORCED_SHUTDOWN(mp)) 2451c24b5dfaSDave Chinner return XFS_ERROR(EIO); 2452c24b5dfaSDave Chinner 2453c24b5dfaSDave Chinner error = xfs_qm_dqattach(dp, 0); 2454c24b5dfaSDave Chinner if (error) 2455c24b5dfaSDave Chinner goto std_return; 2456c24b5dfaSDave Chinner 2457c24b5dfaSDave Chinner error = xfs_qm_dqattach(ip, 0); 2458c24b5dfaSDave Chinner if (error) 2459c24b5dfaSDave Chinner goto std_return; 2460c24b5dfaSDave Chinner 2461c24b5dfaSDave Chinner if (is_dir) { 2462c24b5dfaSDave Chinner tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR); 2463c24b5dfaSDave Chinner log_count = XFS_DEFAULT_LOG_COUNT; 2464c24b5dfaSDave Chinner } else { 2465c24b5dfaSDave Chinner tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE); 2466c24b5dfaSDave Chinner log_count = XFS_REMOVE_LOG_COUNT; 2467c24b5dfaSDave Chinner } 2468c24b5dfaSDave Chinner cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2469c24b5dfaSDave Chinner 2470c24b5dfaSDave Chinner /* 2471c24b5dfaSDave Chinner * We try to get the real space reservation first, 2472c24b5dfaSDave Chinner * allowing for directory btree deletion(s) implying 2473c24b5dfaSDave Chinner * possible bmap insert(s). If we can't get the space 2474c24b5dfaSDave Chinner * reservation then we use 0 instead, and avoid the bmap 2475c24b5dfaSDave Chinner * btree insert(s) in the directory code by, if the bmap 2476c24b5dfaSDave Chinner * insert tries to happen, instead trimming the LAST 2477c24b5dfaSDave Chinner * block from the directory. 2478c24b5dfaSDave Chinner */ 2479c24b5dfaSDave Chinner resblks = XFS_REMOVE_SPACE_RES(mp); 24803d3c8b52SJie Liu error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, resblks, 0); 2481c24b5dfaSDave Chinner if (error == ENOSPC) { 2482c24b5dfaSDave Chinner resblks = 0; 24833d3c8b52SJie Liu error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, 0, 0); 2484c24b5dfaSDave Chinner } 2485c24b5dfaSDave Chinner if (error) { 2486c24b5dfaSDave Chinner ASSERT(error != ENOSPC); 2487c24b5dfaSDave Chinner cancel_flags = 0; 2488c24b5dfaSDave Chinner goto out_trans_cancel; 2489c24b5dfaSDave Chinner } 2490c24b5dfaSDave Chinner 2491c24b5dfaSDave Chinner xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); 2492c24b5dfaSDave Chinner 2493c24b5dfaSDave Chinner xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2494c24b5dfaSDave Chinner xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 2495c24b5dfaSDave Chinner 2496c24b5dfaSDave Chinner /* 2497c24b5dfaSDave Chinner * If we're removing a directory perform some additional validation. 2498c24b5dfaSDave Chinner */ 249927320369SDave Chinner cancel_flags |= XFS_TRANS_ABORT; 2500c24b5dfaSDave Chinner if (is_dir) { 2501c24b5dfaSDave Chinner ASSERT(ip->i_d.di_nlink >= 2); 2502c24b5dfaSDave Chinner if (ip->i_d.di_nlink != 2) { 2503c24b5dfaSDave Chinner error = XFS_ERROR(ENOTEMPTY); 2504c24b5dfaSDave Chinner goto out_trans_cancel; 2505c24b5dfaSDave Chinner } 2506c24b5dfaSDave Chinner if (!xfs_dir_isempty(ip)) { 2507c24b5dfaSDave Chinner error = XFS_ERROR(ENOTEMPTY); 2508c24b5dfaSDave Chinner goto out_trans_cancel; 2509c24b5dfaSDave Chinner } 2510c24b5dfaSDave Chinner 251127320369SDave Chinner /* Drop the link from ip's "..". */ 2512c24b5dfaSDave Chinner error = xfs_droplink(tp, dp); 2513c24b5dfaSDave Chinner if (error) 251427320369SDave Chinner goto out_trans_cancel; 2515c24b5dfaSDave Chinner 251627320369SDave Chinner /* Drop the "." link from ip to self. */ 2517c24b5dfaSDave Chinner error = xfs_droplink(tp, ip); 2518c24b5dfaSDave Chinner if (error) 251927320369SDave Chinner goto out_trans_cancel; 2520c24b5dfaSDave Chinner } else { 2521c24b5dfaSDave Chinner /* 2522c24b5dfaSDave Chinner * When removing a non-directory we need to log the parent 2523c24b5dfaSDave Chinner * inode here. For a directory this is done implicitly 2524c24b5dfaSDave Chinner * by the xfs_droplink call for the ".." entry. 2525c24b5dfaSDave Chinner */ 2526c24b5dfaSDave Chinner xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 2527c24b5dfaSDave Chinner } 252827320369SDave Chinner xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2529c24b5dfaSDave Chinner 253027320369SDave Chinner /* Drop the link from dp to ip. */ 2531c24b5dfaSDave Chinner error = xfs_droplink(tp, ip); 2532c24b5dfaSDave Chinner if (error) 253327320369SDave Chinner goto out_trans_cancel; 2534c24b5dfaSDave Chinner 253527320369SDave Chinner /* Determine if this is the last link while the inode is locked */ 2536c24b5dfaSDave Chinner link_zero = (ip->i_d.di_nlink == 0); 2537c24b5dfaSDave Chinner 253827320369SDave Chinner xfs_bmap_init(&free_list, &first_block); 253927320369SDave Chinner error = xfs_dir_removename(tp, dp, name, ip->i_ino, 254027320369SDave Chinner &first_block, &free_list, resblks); 254127320369SDave Chinner if (error) { 254227320369SDave Chinner ASSERT(error != ENOENT); 254327320369SDave Chinner goto out_bmap_cancel; 254427320369SDave Chinner } 254527320369SDave Chinner 2546c24b5dfaSDave Chinner /* 2547c24b5dfaSDave Chinner * If this is a synchronous mount, make sure that the 2548c24b5dfaSDave Chinner * remove transaction goes to disk before returning to 2549c24b5dfaSDave Chinner * the user. 2550c24b5dfaSDave Chinner */ 2551c24b5dfaSDave Chinner if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) 2552c24b5dfaSDave Chinner xfs_trans_set_sync(tp); 2553c24b5dfaSDave Chinner 2554c24b5dfaSDave Chinner error = xfs_bmap_finish(&tp, &free_list, &committed); 2555c24b5dfaSDave Chinner if (error) 2556c24b5dfaSDave Chinner goto out_bmap_cancel; 2557c24b5dfaSDave Chinner 2558c24b5dfaSDave Chinner error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2559c24b5dfaSDave Chinner if (error) 2560c24b5dfaSDave Chinner goto std_return; 2561c24b5dfaSDave Chinner 2562c24b5dfaSDave Chinner /* 2563c24b5dfaSDave Chinner * If we are using filestreams, kill the stream association. 2564c24b5dfaSDave Chinner * If the file is still open it may get a new one but that 2565c24b5dfaSDave Chinner * will get killed on last close in xfs_close() so we don't 2566c24b5dfaSDave Chinner * have to worry about that. 2567c24b5dfaSDave Chinner */ 2568c24b5dfaSDave Chinner if (!is_dir && link_zero && xfs_inode_is_filestream(ip)) 2569c24b5dfaSDave Chinner xfs_filestream_deassociate(ip); 2570c24b5dfaSDave Chinner 2571c24b5dfaSDave Chinner return 0; 2572c24b5dfaSDave Chinner 2573c24b5dfaSDave Chinner out_bmap_cancel: 2574c24b5dfaSDave Chinner xfs_bmap_cancel(&free_list); 2575c24b5dfaSDave Chinner out_trans_cancel: 2576c24b5dfaSDave Chinner xfs_trans_cancel(tp, cancel_flags); 2577c24b5dfaSDave Chinner std_return: 2578c24b5dfaSDave Chinner return error; 2579c24b5dfaSDave Chinner } 2580c24b5dfaSDave Chinner 2581f6bba201SDave Chinner /* 2582f6bba201SDave Chinner * Enter all inodes for a rename transaction into a sorted array. 2583f6bba201SDave Chinner */ 2584f6bba201SDave Chinner STATIC void 2585f6bba201SDave Chinner xfs_sort_for_rename( 2586f6bba201SDave Chinner xfs_inode_t *dp1, /* in: old (source) directory inode */ 2587f6bba201SDave Chinner xfs_inode_t *dp2, /* in: new (target) directory inode */ 2588f6bba201SDave Chinner xfs_inode_t *ip1, /* in: inode of old entry */ 2589f6bba201SDave Chinner xfs_inode_t *ip2, /* in: inode of new entry, if it 2590f6bba201SDave Chinner already exists, NULL otherwise. */ 2591f6bba201SDave Chinner xfs_inode_t **i_tab,/* out: array of inode returned, sorted */ 2592f6bba201SDave Chinner int *num_inodes) /* out: number of inodes in array */ 2593f6bba201SDave Chinner { 2594f6bba201SDave Chinner xfs_inode_t *temp; 2595f6bba201SDave Chinner int i, j; 2596f6bba201SDave Chinner 2597f6bba201SDave Chinner /* 2598f6bba201SDave Chinner * i_tab contains a list of pointers to inodes. We initialize 2599f6bba201SDave Chinner * the table here & we'll sort it. We will then use it to 2600f6bba201SDave Chinner * order the acquisition of the inode locks. 2601f6bba201SDave Chinner * 2602f6bba201SDave Chinner * Note that the table may contain duplicates. e.g., dp1 == dp2. 2603f6bba201SDave Chinner */ 2604f6bba201SDave Chinner i_tab[0] = dp1; 2605f6bba201SDave Chinner i_tab[1] = dp2; 2606f6bba201SDave Chinner i_tab[2] = ip1; 2607f6bba201SDave Chinner if (ip2) { 2608f6bba201SDave Chinner *num_inodes = 4; 2609f6bba201SDave Chinner i_tab[3] = ip2; 2610f6bba201SDave Chinner } else { 2611f6bba201SDave Chinner *num_inodes = 3; 2612f6bba201SDave Chinner i_tab[3] = NULL; 2613f6bba201SDave Chinner } 2614f6bba201SDave Chinner 2615f6bba201SDave Chinner /* 2616f6bba201SDave Chinner * Sort the elements via bubble sort. (Remember, there are at 2617f6bba201SDave Chinner * most 4 elements to sort, so this is adequate.) 2618f6bba201SDave Chinner */ 2619f6bba201SDave Chinner for (i = 0; i < *num_inodes; i++) { 2620f6bba201SDave Chinner for (j = 1; j < *num_inodes; j++) { 2621f6bba201SDave Chinner if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) { 2622f6bba201SDave Chinner temp = i_tab[j]; 2623f6bba201SDave Chinner i_tab[j] = i_tab[j-1]; 2624f6bba201SDave Chinner i_tab[j-1] = temp; 2625f6bba201SDave Chinner } 2626f6bba201SDave Chinner } 2627f6bba201SDave Chinner } 2628f6bba201SDave Chinner } 2629f6bba201SDave Chinner 2630f6bba201SDave Chinner /* 2631f6bba201SDave Chinner * xfs_rename 2632f6bba201SDave Chinner */ 2633f6bba201SDave Chinner int 2634f6bba201SDave Chinner xfs_rename( 2635f6bba201SDave Chinner xfs_inode_t *src_dp, 2636f6bba201SDave Chinner struct xfs_name *src_name, 2637f6bba201SDave Chinner xfs_inode_t *src_ip, 2638f6bba201SDave Chinner xfs_inode_t *target_dp, 2639f6bba201SDave Chinner struct xfs_name *target_name, 2640f6bba201SDave Chinner xfs_inode_t *target_ip) 2641f6bba201SDave Chinner { 2642f6bba201SDave Chinner xfs_trans_t *tp = NULL; 2643f6bba201SDave Chinner xfs_mount_t *mp = src_dp->i_mount; 2644f6bba201SDave Chinner int new_parent; /* moving to a new dir */ 2645f6bba201SDave Chinner int src_is_directory; /* src_name is a directory */ 2646f6bba201SDave Chinner int error; 2647f6bba201SDave Chinner xfs_bmap_free_t free_list; 2648f6bba201SDave Chinner xfs_fsblock_t first_block; 2649f6bba201SDave Chinner int cancel_flags; 2650f6bba201SDave Chinner int committed; 2651f6bba201SDave Chinner xfs_inode_t *inodes[4]; 2652f6bba201SDave Chinner int spaceres; 2653f6bba201SDave Chinner int num_inodes; 2654f6bba201SDave Chinner 2655f6bba201SDave Chinner trace_xfs_rename(src_dp, target_dp, src_name, target_name); 2656f6bba201SDave Chinner 2657f6bba201SDave Chinner new_parent = (src_dp != target_dp); 2658f6bba201SDave Chinner src_is_directory = S_ISDIR(src_ip->i_d.di_mode); 2659f6bba201SDave Chinner 2660f6bba201SDave Chinner xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, 2661f6bba201SDave Chinner inodes, &num_inodes); 2662f6bba201SDave Chinner 2663f6bba201SDave Chinner xfs_bmap_init(&free_list, &first_block); 2664f6bba201SDave Chinner tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME); 2665f6bba201SDave Chinner cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2666f6bba201SDave Chinner spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len); 26673d3c8b52SJie Liu error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0); 2668f6bba201SDave Chinner if (error == ENOSPC) { 2669f6bba201SDave Chinner spaceres = 0; 26703d3c8b52SJie Liu error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0); 2671f6bba201SDave Chinner } 2672f6bba201SDave Chinner if (error) { 2673f6bba201SDave Chinner xfs_trans_cancel(tp, 0); 2674f6bba201SDave Chinner goto std_return; 2675f6bba201SDave Chinner } 2676f6bba201SDave Chinner 2677f6bba201SDave Chinner /* 2678f6bba201SDave Chinner * Attach the dquots to the inodes 2679f6bba201SDave Chinner */ 2680f6bba201SDave Chinner error = xfs_qm_vop_rename_dqattach(inodes); 2681f6bba201SDave Chinner if (error) { 2682f6bba201SDave Chinner xfs_trans_cancel(tp, cancel_flags); 2683f6bba201SDave Chinner goto std_return; 2684f6bba201SDave Chinner } 2685f6bba201SDave Chinner 2686f6bba201SDave Chinner /* 2687f6bba201SDave Chinner * Lock all the participating inodes. Depending upon whether 2688f6bba201SDave Chinner * the target_name exists in the target directory, and 2689f6bba201SDave Chinner * whether the target directory is the same as the source 2690f6bba201SDave Chinner * directory, we can lock from 2 to 4 inodes. 2691f6bba201SDave Chinner */ 2692f6bba201SDave Chinner xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL); 2693f6bba201SDave Chinner 2694f6bba201SDave Chinner /* 2695f6bba201SDave Chinner * Join all the inodes to the transaction. From this point on, 2696f6bba201SDave Chinner * we can rely on either trans_commit or trans_cancel to unlock 2697f6bba201SDave Chinner * them. 2698f6bba201SDave Chinner */ 2699f6bba201SDave Chinner xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); 2700f6bba201SDave Chinner if (new_parent) 2701f6bba201SDave Chinner xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); 2702f6bba201SDave Chinner xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); 2703f6bba201SDave Chinner if (target_ip) 2704f6bba201SDave Chinner xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); 2705f6bba201SDave Chinner 2706f6bba201SDave Chinner /* 2707f6bba201SDave Chinner * If we are using project inheritance, we only allow renames 2708f6bba201SDave Chinner * into our tree when the project IDs are the same; else the 2709f6bba201SDave Chinner * tree quota mechanism would be circumvented. 2710f6bba201SDave Chinner */ 2711f6bba201SDave Chinner if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && 2712f6bba201SDave Chinner (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) { 2713f6bba201SDave Chinner error = XFS_ERROR(EXDEV); 2714f6bba201SDave Chinner goto error_return; 2715f6bba201SDave Chinner } 2716f6bba201SDave Chinner 2717f6bba201SDave Chinner /* 2718f6bba201SDave Chinner * Set up the target. 2719f6bba201SDave Chinner */ 2720f6bba201SDave Chinner if (target_ip == NULL) { 2721f6bba201SDave Chinner /* 2722f6bba201SDave Chinner * If there's no space reservation, check the entry will 2723f6bba201SDave Chinner * fit before actually inserting it. 2724f6bba201SDave Chinner */ 2725f6bba201SDave Chinner error = xfs_dir_canenter(tp, target_dp, target_name, spaceres); 2726f6bba201SDave Chinner if (error) 2727f6bba201SDave Chinner goto error_return; 2728f6bba201SDave Chinner /* 2729f6bba201SDave Chinner * If target does not exist and the rename crosses 2730f6bba201SDave Chinner * directories, adjust the target directory link count 2731f6bba201SDave Chinner * to account for the ".." reference from the new entry. 2732f6bba201SDave Chinner */ 2733f6bba201SDave Chinner error = xfs_dir_createname(tp, target_dp, target_name, 2734f6bba201SDave Chinner src_ip->i_ino, &first_block, 2735f6bba201SDave Chinner &free_list, spaceres); 2736f6bba201SDave Chinner if (error == ENOSPC) 2737f6bba201SDave Chinner goto error_return; 2738f6bba201SDave Chinner if (error) 2739f6bba201SDave Chinner goto abort_return; 2740f6bba201SDave Chinner 2741f6bba201SDave Chinner xfs_trans_ichgtime(tp, target_dp, 2742f6bba201SDave Chinner XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2743f6bba201SDave Chinner 2744f6bba201SDave Chinner if (new_parent && src_is_directory) { 2745f6bba201SDave Chinner error = xfs_bumplink(tp, target_dp); 2746f6bba201SDave Chinner if (error) 2747f6bba201SDave Chinner goto abort_return; 2748f6bba201SDave Chinner } 2749f6bba201SDave Chinner } else { /* target_ip != NULL */ 2750f6bba201SDave Chinner /* 2751f6bba201SDave Chinner * If target exists and it's a directory, check that both 2752f6bba201SDave Chinner * target and source are directories and that target can be 2753f6bba201SDave Chinner * destroyed, or that neither is a directory. 2754f6bba201SDave Chinner */ 2755f6bba201SDave Chinner if (S_ISDIR(target_ip->i_d.di_mode)) { 2756f6bba201SDave Chinner /* 2757f6bba201SDave Chinner * Make sure target dir is empty. 2758f6bba201SDave Chinner */ 2759f6bba201SDave Chinner if (!(xfs_dir_isempty(target_ip)) || 2760f6bba201SDave Chinner (target_ip->i_d.di_nlink > 2)) { 2761f6bba201SDave Chinner error = XFS_ERROR(EEXIST); 2762f6bba201SDave Chinner goto error_return; 2763f6bba201SDave Chinner } 2764f6bba201SDave Chinner } 2765f6bba201SDave Chinner 2766f6bba201SDave Chinner /* 2767f6bba201SDave Chinner * Link the source inode under the target name. 2768f6bba201SDave Chinner * If the source inode is a directory and we are moving 2769f6bba201SDave Chinner * it across directories, its ".." entry will be 2770f6bba201SDave Chinner * inconsistent until we replace that down below. 2771f6bba201SDave Chinner * 2772f6bba201SDave Chinner * In case there is already an entry with the same 2773f6bba201SDave Chinner * name at the destination directory, remove it first. 2774f6bba201SDave Chinner */ 2775f6bba201SDave Chinner error = xfs_dir_replace(tp, target_dp, target_name, 2776f6bba201SDave Chinner src_ip->i_ino, 2777f6bba201SDave Chinner &first_block, &free_list, spaceres); 2778f6bba201SDave Chinner if (error) 2779f6bba201SDave Chinner goto abort_return; 2780f6bba201SDave Chinner 2781f6bba201SDave Chinner xfs_trans_ichgtime(tp, target_dp, 2782f6bba201SDave Chinner XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2783f6bba201SDave Chinner 2784f6bba201SDave Chinner /* 2785f6bba201SDave Chinner * Decrement the link count on the target since the target 2786f6bba201SDave Chinner * dir no longer points to it. 2787f6bba201SDave Chinner */ 2788f6bba201SDave Chinner error = xfs_droplink(tp, target_ip); 2789f6bba201SDave Chinner if (error) 2790f6bba201SDave Chinner goto abort_return; 2791f6bba201SDave Chinner 2792f6bba201SDave Chinner if (src_is_directory) { 2793f6bba201SDave Chinner /* 2794f6bba201SDave Chinner * Drop the link from the old "." entry. 2795f6bba201SDave Chinner */ 2796f6bba201SDave Chinner error = xfs_droplink(tp, target_ip); 2797f6bba201SDave Chinner if (error) 2798f6bba201SDave Chinner goto abort_return; 2799f6bba201SDave Chinner } 2800f6bba201SDave Chinner } /* target_ip != NULL */ 2801f6bba201SDave Chinner 2802f6bba201SDave Chinner /* 2803f6bba201SDave Chinner * Remove the source. 2804f6bba201SDave Chinner */ 2805f6bba201SDave Chinner if (new_parent && src_is_directory) { 2806f6bba201SDave Chinner /* 2807f6bba201SDave Chinner * Rewrite the ".." entry to point to the new 2808f6bba201SDave Chinner * directory. 2809f6bba201SDave Chinner */ 2810f6bba201SDave Chinner error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot, 2811f6bba201SDave Chinner target_dp->i_ino, 2812f6bba201SDave Chinner &first_block, &free_list, spaceres); 2813f6bba201SDave Chinner ASSERT(error != EEXIST); 2814f6bba201SDave Chinner if (error) 2815f6bba201SDave Chinner goto abort_return; 2816f6bba201SDave Chinner } 2817f6bba201SDave Chinner 2818f6bba201SDave Chinner /* 2819f6bba201SDave Chinner * We always want to hit the ctime on the source inode. 2820f6bba201SDave Chinner * 2821f6bba201SDave Chinner * This isn't strictly required by the standards since the source 2822f6bba201SDave Chinner * inode isn't really being changed, but old unix file systems did 2823f6bba201SDave Chinner * it and some incremental backup programs won't work without it. 2824f6bba201SDave Chinner */ 2825f6bba201SDave Chinner xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG); 2826f6bba201SDave Chinner xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE); 2827f6bba201SDave Chinner 2828f6bba201SDave Chinner /* 2829f6bba201SDave Chinner * Adjust the link count on src_dp. This is necessary when 2830f6bba201SDave Chinner * renaming a directory, either within one parent when 2831f6bba201SDave Chinner * the target existed, or across two parent directories. 2832f6bba201SDave Chinner */ 2833f6bba201SDave Chinner if (src_is_directory && (new_parent || target_ip != NULL)) { 2834f6bba201SDave Chinner 2835f6bba201SDave Chinner /* 2836f6bba201SDave Chinner * Decrement link count on src_directory since the 2837f6bba201SDave Chinner * entry that's moved no longer points to it. 2838f6bba201SDave Chinner */ 2839f6bba201SDave Chinner error = xfs_droplink(tp, src_dp); 2840f6bba201SDave Chinner if (error) 2841f6bba201SDave Chinner goto abort_return; 2842f6bba201SDave Chinner } 2843f6bba201SDave Chinner 2844f6bba201SDave Chinner error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, 2845f6bba201SDave Chinner &first_block, &free_list, spaceres); 2846f6bba201SDave Chinner if (error) 2847f6bba201SDave Chinner goto abort_return; 2848f6bba201SDave Chinner 2849f6bba201SDave Chinner xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2850f6bba201SDave Chinner xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); 2851f6bba201SDave Chinner if (new_parent) 2852f6bba201SDave Chinner xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); 2853f6bba201SDave Chinner 2854f6bba201SDave Chinner /* 2855f6bba201SDave Chinner * If this is a synchronous mount, make sure that the 2856f6bba201SDave Chinner * rename transaction goes to disk before returning to 2857f6bba201SDave Chinner * the user. 2858f6bba201SDave Chinner */ 2859f6bba201SDave Chinner if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 2860f6bba201SDave Chinner xfs_trans_set_sync(tp); 2861f6bba201SDave Chinner } 2862f6bba201SDave Chinner 2863f6bba201SDave Chinner error = xfs_bmap_finish(&tp, &free_list, &committed); 2864f6bba201SDave Chinner if (error) { 2865f6bba201SDave Chinner xfs_bmap_cancel(&free_list); 2866f6bba201SDave Chinner xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | 2867f6bba201SDave Chinner XFS_TRANS_ABORT)); 2868f6bba201SDave Chinner goto std_return; 2869f6bba201SDave Chinner } 2870f6bba201SDave Chinner 2871f6bba201SDave Chinner /* 2872f6bba201SDave Chinner * trans_commit will unlock src_ip, target_ip & decrement 2873f6bba201SDave Chinner * the vnode references. 2874f6bba201SDave Chinner */ 2875f6bba201SDave Chinner return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2876f6bba201SDave Chinner 2877f6bba201SDave Chinner abort_return: 2878f6bba201SDave Chinner cancel_flags |= XFS_TRANS_ABORT; 2879f6bba201SDave Chinner error_return: 2880f6bba201SDave Chinner xfs_bmap_cancel(&free_list); 2881f6bba201SDave Chinner xfs_trans_cancel(tp, cancel_flags); 2882f6bba201SDave Chinner std_return: 2883f6bba201SDave Chinner return error; 2884f6bba201SDave Chinner } 2885f6bba201SDave Chinner 2886bad55843SDavid Chinner STATIC int 2887bad55843SDavid Chinner xfs_iflush_cluster( 2888bad55843SDavid Chinner xfs_inode_t *ip, 2889bad55843SDavid Chinner xfs_buf_t *bp) 2890bad55843SDavid Chinner { 2891bad55843SDavid Chinner xfs_mount_t *mp = ip->i_mount; 28925017e97dSDave Chinner struct xfs_perag *pag; 2893bad55843SDavid Chinner unsigned long first_index, mask; 2894c8f5f12eSDavid Chinner unsigned long inodes_per_cluster; 2895bad55843SDavid Chinner int ilist_size; 2896bad55843SDavid Chinner xfs_inode_t **ilist; 2897bad55843SDavid Chinner xfs_inode_t *iq; 2898bad55843SDavid Chinner int nr_found; 2899bad55843SDavid Chinner int clcount = 0; 2900bad55843SDavid Chinner int bufwasdelwri; 2901bad55843SDavid Chinner int i; 2902bad55843SDavid Chinner 29035017e97dSDave Chinner pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 2904bad55843SDavid Chinner 2905c8f5f12eSDavid Chinner inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog; 2906c8f5f12eSDavid Chinner ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); 290749383b0eSDavid Chinner ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); 2908bad55843SDavid Chinner if (!ilist) 290944b56e0aSDave Chinner goto out_put; 2910bad55843SDavid Chinner 2911bad55843SDavid Chinner mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); 2912bad55843SDavid Chinner first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; 29131a3e8f3dSDave Chinner rcu_read_lock(); 2914bad55843SDavid Chinner /* really need a gang lookup range call here */ 2915bad55843SDavid Chinner nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, 2916c8f5f12eSDavid Chinner first_index, inodes_per_cluster); 2917bad55843SDavid Chinner if (nr_found == 0) 2918bad55843SDavid Chinner goto out_free; 2919bad55843SDavid Chinner 2920bad55843SDavid Chinner for (i = 0; i < nr_found; i++) { 2921bad55843SDavid Chinner iq = ilist[i]; 2922bad55843SDavid Chinner if (iq == ip) 2923bad55843SDavid Chinner continue; 29241a3e8f3dSDave Chinner 29251a3e8f3dSDave Chinner /* 29261a3e8f3dSDave Chinner * because this is an RCU protected lookup, we could find a 29271a3e8f3dSDave Chinner * recently freed or even reallocated inode during the lookup. 29281a3e8f3dSDave Chinner * We need to check under the i_flags_lock for a valid inode 29291a3e8f3dSDave Chinner * here. Skip it if it is not valid or the wrong inode. 29301a3e8f3dSDave Chinner */ 29311a3e8f3dSDave Chinner spin_lock(&ip->i_flags_lock); 29321a3e8f3dSDave Chinner if (!ip->i_ino || 29331a3e8f3dSDave Chinner (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) { 29341a3e8f3dSDave Chinner spin_unlock(&ip->i_flags_lock); 29351a3e8f3dSDave Chinner continue; 29361a3e8f3dSDave Chinner } 29371a3e8f3dSDave Chinner spin_unlock(&ip->i_flags_lock); 29381a3e8f3dSDave Chinner 2939bad55843SDavid Chinner /* 2940bad55843SDavid Chinner * Do an un-protected check to see if the inode is dirty and 2941bad55843SDavid Chinner * is a candidate for flushing. These checks will be repeated 2942bad55843SDavid Chinner * later after the appropriate locks are acquired. 2943bad55843SDavid Chinner */ 294433540408SDavid Chinner if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0) 2945bad55843SDavid Chinner continue; 2946bad55843SDavid Chinner 2947bad55843SDavid Chinner /* 2948bad55843SDavid Chinner * Try to get locks. If any are unavailable or it is pinned, 2949bad55843SDavid Chinner * then this inode cannot be flushed and is skipped. 2950bad55843SDavid Chinner */ 2951bad55843SDavid Chinner 2952bad55843SDavid Chinner if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) 2953bad55843SDavid Chinner continue; 2954bad55843SDavid Chinner if (!xfs_iflock_nowait(iq)) { 2955bad55843SDavid Chinner xfs_iunlock(iq, XFS_ILOCK_SHARED); 2956bad55843SDavid Chinner continue; 2957bad55843SDavid Chinner } 2958bad55843SDavid Chinner if (xfs_ipincount(iq)) { 2959bad55843SDavid Chinner xfs_ifunlock(iq); 2960bad55843SDavid Chinner xfs_iunlock(iq, XFS_ILOCK_SHARED); 2961bad55843SDavid Chinner continue; 2962bad55843SDavid Chinner } 2963bad55843SDavid Chinner 2964bad55843SDavid Chinner /* 2965bad55843SDavid Chinner * arriving here means that this inode can be flushed. First 2966bad55843SDavid Chinner * re-check that it's dirty before flushing. 2967bad55843SDavid Chinner */ 296833540408SDavid Chinner if (!xfs_inode_clean(iq)) { 2969bad55843SDavid Chinner int error; 2970bad55843SDavid Chinner error = xfs_iflush_int(iq, bp); 2971bad55843SDavid Chinner if (error) { 2972bad55843SDavid Chinner xfs_iunlock(iq, XFS_ILOCK_SHARED); 2973bad55843SDavid Chinner goto cluster_corrupt_out; 2974bad55843SDavid Chinner } 2975bad55843SDavid Chinner clcount++; 2976bad55843SDavid Chinner } else { 2977bad55843SDavid Chinner xfs_ifunlock(iq); 2978bad55843SDavid Chinner } 2979bad55843SDavid Chinner xfs_iunlock(iq, XFS_ILOCK_SHARED); 2980bad55843SDavid Chinner } 2981bad55843SDavid Chinner 2982bad55843SDavid Chinner if (clcount) { 2983bad55843SDavid Chinner XFS_STATS_INC(xs_icluster_flushcnt); 2984bad55843SDavid Chinner XFS_STATS_ADD(xs_icluster_flushinode, clcount); 2985bad55843SDavid Chinner } 2986bad55843SDavid Chinner 2987bad55843SDavid Chinner out_free: 29881a3e8f3dSDave Chinner rcu_read_unlock(); 2989f0e2d93cSDenys Vlasenko kmem_free(ilist); 299044b56e0aSDave Chinner out_put: 299144b56e0aSDave Chinner xfs_perag_put(pag); 2992bad55843SDavid Chinner return 0; 2993bad55843SDavid Chinner 2994bad55843SDavid Chinner 2995bad55843SDavid Chinner cluster_corrupt_out: 2996bad55843SDavid Chinner /* 2997bad55843SDavid Chinner * Corruption detected in the clustering loop. Invalidate the 2998bad55843SDavid Chinner * inode buffer and shut down the filesystem. 2999bad55843SDavid Chinner */ 30001a3e8f3dSDave Chinner rcu_read_unlock(); 3001bad55843SDavid Chinner /* 300243ff2122SChristoph Hellwig * Clean up the buffer. If it was delwri, just release it -- 3003bad55843SDavid Chinner * brelse can handle it with no problems. If not, shut down the 3004bad55843SDavid Chinner * filesystem before releasing the buffer. 3005bad55843SDavid Chinner */ 300643ff2122SChristoph Hellwig bufwasdelwri = (bp->b_flags & _XBF_DELWRI_Q); 3007bad55843SDavid Chinner if (bufwasdelwri) 3008bad55843SDavid Chinner xfs_buf_relse(bp); 3009bad55843SDavid Chinner 3010bad55843SDavid Chinner xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 3011bad55843SDavid Chinner 3012bad55843SDavid Chinner if (!bufwasdelwri) { 3013bad55843SDavid Chinner /* 3014bad55843SDavid Chinner * Just like incore_relse: if we have b_iodone functions, 3015bad55843SDavid Chinner * mark the buffer as an error and call them. Otherwise 3016bad55843SDavid Chinner * mark it as stale and brelse. 3017bad55843SDavid Chinner */ 3018cb669ca5SChristoph Hellwig if (bp->b_iodone) { 3019bad55843SDavid Chinner XFS_BUF_UNDONE(bp); 3020c867cb61SChristoph Hellwig xfs_buf_stale(bp); 30215a52c2a5SChandra Seetharaman xfs_buf_ioerror(bp, EIO); 30221a1a3e97SChristoph Hellwig xfs_buf_ioend(bp, 0); 3023bad55843SDavid Chinner } else { 3024c867cb61SChristoph Hellwig xfs_buf_stale(bp); 3025bad55843SDavid Chinner xfs_buf_relse(bp); 3026bad55843SDavid Chinner } 3027bad55843SDavid Chinner } 3028bad55843SDavid Chinner 3029bad55843SDavid Chinner /* 3030bad55843SDavid Chinner * Unlocks the flush lock 3031bad55843SDavid Chinner */ 303204913fddSDave Chinner xfs_iflush_abort(iq, false); 3033f0e2d93cSDenys Vlasenko kmem_free(ilist); 303444b56e0aSDave Chinner xfs_perag_put(pag); 3035bad55843SDavid Chinner return XFS_ERROR(EFSCORRUPTED); 3036bad55843SDavid Chinner } 3037bad55843SDavid Chinner 30381da177e4SLinus Torvalds /* 30394c46819aSChristoph Hellwig * Flush dirty inode metadata into the backing buffer. 30404c46819aSChristoph Hellwig * 30414c46819aSChristoph Hellwig * The caller must have the inode lock and the inode flush lock held. The 30424c46819aSChristoph Hellwig * inode lock will still be held upon return to the caller, and the inode 30434c46819aSChristoph Hellwig * flush lock will be released after the inode has reached the disk. 30444c46819aSChristoph Hellwig * 30454c46819aSChristoph Hellwig * The caller must write out the buffer returned in *bpp and release it. 30461da177e4SLinus Torvalds */ 30471da177e4SLinus Torvalds int 30481da177e4SLinus Torvalds xfs_iflush( 30494c46819aSChristoph Hellwig struct xfs_inode *ip, 30504c46819aSChristoph Hellwig struct xfs_buf **bpp) 30511da177e4SLinus Torvalds { 30524c46819aSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 30534c46819aSChristoph Hellwig struct xfs_buf *bp; 30544c46819aSChristoph Hellwig struct xfs_dinode *dip; 30551da177e4SLinus Torvalds int error; 30561da177e4SLinus Torvalds 30571da177e4SLinus Torvalds XFS_STATS_INC(xs_iflush_count); 30581da177e4SLinus Torvalds 3059579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 3060474fce06SChristoph Hellwig ASSERT(xfs_isiflocked(ip)); 30611da177e4SLinus Torvalds ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 30628096b1ebSChristoph Hellwig ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); 30631da177e4SLinus Torvalds 30644c46819aSChristoph Hellwig *bpp = NULL; 30651da177e4SLinus Torvalds 30661da177e4SLinus Torvalds xfs_iunpin_wait(ip); 30671da177e4SLinus Torvalds 30681da177e4SLinus Torvalds /* 30694b6a4688SDave Chinner * For stale inodes we cannot rely on the backing buffer remaining 30704b6a4688SDave Chinner * stale in cache for the remaining life of the stale inode and so 3071475ee413SChristoph Hellwig * xfs_imap_to_bp() below may give us a buffer that no longer contains 30724b6a4688SDave Chinner * inodes below. We have to check this after ensuring the inode is 30734b6a4688SDave Chinner * unpinned so that it is safe to reclaim the stale inode after the 30744b6a4688SDave Chinner * flush call. 30754b6a4688SDave Chinner */ 30764b6a4688SDave Chinner if (xfs_iflags_test(ip, XFS_ISTALE)) { 30774b6a4688SDave Chinner xfs_ifunlock(ip); 30784b6a4688SDave Chinner return 0; 30794b6a4688SDave Chinner } 30804b6a4688SDave Chinner 30814b6a4688SDave Chinner /* 30821da177e4SLinus Torvalds * This may have been unpinned because the filesystem is shutting 30831da177e4SLinus Torvalds * down forcibly. If that's the case we must not write this inode 308432ce90a4SChristoph Hellwig * to disk, because the log record didn't make it to disk. 308532ce90a4SChristoph Hellwig * 308632ce90a4SChristoph Hellwig * We also have to remove the log item from the AIL in this case, 308732ce90a4SChristoph Hellwig * as we wait for an empty AIL as part of the unmount process. 30881da177e4SLinus Torvalds */ 30891da177e4SLinus Torvalds if (XFS_FORCED_SHUTDOWN(mp)) { 309032ce90a4SChristoph Hellwig error = XFS_ERROR(EIO); 309132ce90a4SChristoph Hellwig goto abort_out; 30921da177e4SLinus Torvalds } 30931da177e4SLinus Torvalds 30941da177e4SLinus Torvalds /* 3095a3f74ffbSDavid Chinner * Get the buffer containing the on-disk inode. 3096a3f74ffbSDavid Chinner */ 3097475ee413SChristoph Hellwig error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK, 3098475ee413SChristoph Hellwig 0); 3099a3f74ffbSDavid Chinner if (error || !bp) { 3100a3f74ffbSDavid Chinner xfs_ifunlock(ip); 3101a3f74ffbSDavid Chinner return error; 3102a3f74ffbSDavid Chinner } 3103a3f74ffbSDavid Chinner 3104a3f74ffbSDavid Chinner /* 31051da177e4SLinus Torvalds * First flush out the inode that xfs_iflush was called with. 31061da177e4SLinus Torvalds */ 31071da177e4SLinus Torvalds error = xfs_iflush_int(ip, bp); 3108bad55843SDavid Chinner if (error) 31091da177e4SLinus Torvalds goto corrupt_out; 31101da177e4SLinus Torvalds 31111da177e4SLinus Torvalds /* 3112a3f74ffbSDavid Chinner * If the buffer is pinned then push on the log now so we won't 3113a3f74ffbSDavid Chinner * get stuck waiting in the write for too long. 3114a3f74ffbSDavid Chinner */ 3115811e64c7SChandra Seetharaman if (xfs_buf_ispinned(bp)) 3116a14a348bSChristoph Hellwig xfs_log_force(mp, 0); 3117a3f74ffbSDavid Chinner 3118a3f74ffbSDavid Chinner /* 31191da177e4SLinus Torvalds * inode clustering: 31201da177e4SLinus Torvalds * see if other inodes can be gathered into this write 31211da177e4SLinus Torvalds */ 3122bad55843SDavid Chinner error = xfs_iflush_cluster(ip, bp); 3123bad55843SDavid Chinner if (error) 31241da177e4SLinus Torvalds goto cluster_corrupt_out; 31251da177e4SLinus Torvalds 31264c46819aSChristoph Hellwig *bpp = bp; 31274c46819aSChristoph Hellwig return 0; 31281da177e4SLinus Torvalds 31291da177e4SLinus Torvalds corrupt_out: 31301da177e4SLinus Torvalds xfs_buf_relse(bp); 31317d04a335SNathan Scott xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 31321da177e4SLinus Torvalds cluster_corrupt_out: 313332ce90a4SChristoph Hellwig error = XFS_ERROR(EFSCORRUPTED); 313432ce90a4SChristoph Hellwig abort_out: 31351da177e4SLinus Torvalds /* 31361da177e4SLinus Torvalds * Unlocks the flush lock 31371da177e4SLinus Torvalds */ 313804913fddSDave Chinner xfs_iflush_abort(ip, false); 313932ce90a4SChristoph Hellwig return error; 31401da177e4SLinus Torvalds } 31411da177e4SLinus Torvalds 31421da177e4SLinus Torvalds STATIC int 31431da177e4SLinus Torvalds xfs_iflush_int( 314493848a99SChristoph Hellwig struct xfs_inode *ip, 314593848a99SChristoph Hellwig struct xfs_buf *bp) 31461da177e4SLinus Torvalds { 314793848a99SChristoph Hellwig struct xfs_inode_log_item *iip = ip->i_itemp; 314893848a99SChristoph Hellwig struct xfs_dinode *dip; 314993848a99SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 31501da177e4SLinus Torvalds 3151579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 3152474fce06SChristoph Hellwig ASSERT(xfs_isiflocked(ip)); 31531da177e4SLinus Torvalds ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 31548096b1ebSChristoph Hellwig ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); 315593848a99SChristoph Hellwig ASSERT(iip != NULL && iip->ili_fields != 0); 31561da177e4SLinus Torvalds 31571da177e4SLinus Torvalds /* set *dip = inode's place in the buffer */ 315892bfc6e7SChristoph Hellwig dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); 31591da177e4SLinus Torvalds 316069ef921bSChristoph Hellwig if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), 31611da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { 31626a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 31636a19d939SDave Chinner "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p", 31646a19d939SDave Chinner __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip); 31651da177e4SLinus Torvalds goto corrupt_out; 31661da177e4SLinus Torvalds } 31671da177e4SLinus Torvalds if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC, 31681da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) { 31696a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 31706a19d939SDave Chinner "%s: Bad inode %Lu, ptr 0x%p, magic number 0x%x", 31716a19d939SDave Chinner __func__, ip->i_ino, ip, ip->i_d.di_magic); 31721da177e4SLinus Torvalds goto corrupt_out; 31731da177e4SLinus Torvalds } 3174abbede1bSAl Viro if (S_ISREG(ip->i_d.di_mode)) { 31751da177e4SLinus Torvalds if (XFS_TEST_ERROR( 31761da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && 31771da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), 31781da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) { 31796a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 31806a19d939SDave Chinner "%s: Bad regular inode %Lu, ptr 0x%p", 31816a19d939SDave Chinner __func__, ip->i_ino, ip); 31821da177e4SLinus Torvalds goto corrupt_out; 31831da177e4SLinus Torvalds } 3184abbede1bSAl Viro } else if (S_ISDIR(ip->i_d.di_mode)) { 31851da177e4SLinus Torvalds if (XFS_TEST_ERROR( 31861da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && 31871da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && 31881da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), 31891da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) { 31906a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 31916a19d939SDave Chinner "%s: Bad directory inode %Lu, ptr 0x%p", 31926a19d939SDave Chinner __func__, ip->i_ino, ip); 31931da177e4SLinus Torvalds goto corrupt_out; 31941da177e4SLinus Torvalds } 31951da177e4SLinus Torvalds } 31961da177e4SLinus Torvalds if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents > 31971da177e4SLinus Torvalds ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5, 31981da177e4SLinus Torvalds XFS_RANDOM_IFLUSH_5)) { 31996a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 32006a19d939SDave Chinner "%s: detected corrupt incore inode %Lu, " 32016a19d939SDave Chinner "total extents = %d, nblocks = %Ld, ptr 0x%p", 32026a19d939SDave Chinner __func__, ip->i_ino, 32031da177e4SLinus Torvalds ip->i_d.di_nextents + ip->i_d.di_anextents, 32046a19d939SDave Chinner ip->i_d.di_nblocks, ip); 32051da177e4SLinus Torvalds goto corrupt_out; 32061da177e4SLinus Torvalds } 32071da177e4SLinus Torvalds if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, 32081da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) { 32096a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 32106a19d939SDave Chinner "%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p", 32116a19d939SDave Chinner __func__, ip->i_ino, ip->i_d.di_forkoff, ip); 32121da177e4SLinus Torvalds goto corrupt_out; 32131da177e4SLinus Torvalds } 3214e60896d8SDave Chinner 32151da177e4SLinus Torvalds /* 3216e60896d8SDave Chinner * Inode item log recovery for v1/v2 inodes are dependent on the 3217e60896d8SDave Chinner * di_flushiter count for correct sequencing. We bump the flush 3218e60896d8SDave Chinner * iteration count so we can detect flushes which postdate a log record 3219e60896d8SDave Chinner * during recovery. This is redundant as we now log every change and 3220e60896d8SDave Chinner * hence this can't happen but we need to still do it to ensure 3221e60896d8SDave Chinner * backwards compatibility with old kernels that predate logging all 3222e60896d8SDave Chinner * inode changes. 32231da177e4SLinus Torvalds */ 3224e60896d8SDave Chinner if (ip->i_d.di_version < 3) 32251da177e4SLinus Torvalds ip->i_d.di_flushiter++; 32261da177e4SLinus Torvalds 32271da177e4SLinus Torvalds /* 32281da177e4SLinus Torvalds * Copy the dirty parts of the inode into the on-disk 32291da177e4SLinus Torvalds * inode. We always copy out the core of the inode, 32301da177e4SLinus Torvalds * because if the inode is dirty at all the core must 32311da177e4SLinus Torvalds * be. 32321da177e4SLinus Torvalds */ 323381591fe2SChristoph Hellwig xfs_dinode_to_disk(dip, &ip->i_d); 32341da177e4SLinus Torvalds 32351da177e4SLinus Torvalds /* Wrap, we never let the log put out DI_MAX_FLUSH */ 32361da177e4SLinus Torvalds if (ip->i_d.di_flushiter == DI_MAX_FLUSH) 32371da177e4SLinus Torvalds ip->i_d.di_flushiter = 0; 32381da177e4SLinus Torvalds 32391da177e4SLinus Torvalds /* 32401da177e4SLinus Torvalds * If this is really an old format inode and the superblock version 32411da177e4SLinus Torvalds * has not been updated to support only new format inodes, then 32421da177e4SLinus Torvalds * convert back to the old inode format. If the superblock version 32431da177e4SLinus Torvalds * has been updated, then make the conversion permanent. 32441da177e4SLinus Torvalds */ 324551ce16d5SChristoph Hellwig ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb)); 324651ce16d5SChristoph Hellwig if (ip->i_d.di_version == 1) { 324762118709SEric Sandeen if (!xfs_sb_version_hasnlink(&mp->m_sb)) { 32481da177e4SLinus Torvalds /* 32491da177e4SLinus Torvalds * Convert it back. 32501da177e4SLinus Torvalds */ 32511da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); 325281591fe2SChristoph Hellwig dip->di_onlink = cpu_to_be16(ip->i_d.di_nlink); 32531da177e4SLinus Torvalds } else { 32541da177e4SLinus Torvalds /* 32551da177e4SLinus Torvalds * The superblock version has already been bumped, 32561da177e4SLinus Torvalds * so just make the conversion to the new inode 32571da177e4SLinus Torvalds * format permanent. 32581da177e4SLinus Torvalds */ 325951ce16d5SChristoph Hellwig ip->i_d.di_version = 2; 326051ce16d5SChristoph Hellwig dip->di_version = 2; 32611da177e4SLinus Torvalds ip->i_d.di_onlink = 0; 326281591fe2SChristoph Hellwig dip->di_onlink = 0; 32631da177e4SLinus Torvalds memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 326481591fe2SChristoph Hellwig memset(&(dip->di_pad[0]), 0, 326581591fe2SChristoph Hellwig sizeof(dip->di_pad)); 32666743099cSArkadiusz Mi?kiewicz ASSERT(xfs_get_projid(ip) == 0); 32671da177e4SLinus Torvalds } 32681da177e4SLinus Torvalds } 32691da177e4SLinus Torvalds 3270e4ac967bSDavid Chinner xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp); 3271e4ac967bSDavid Chinner if (XFS_IFORK_Q(ip)) 3272e4ac967bSDavid Chinner xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); 32731da177e4SLinus Torvalds xfs_inobp_check(mp, bp); 32741da177e4SLinus Torvalds 32751da177e4SLinus Torvalds /* 3276f5d8d5c4SChristoph Hellwig * We've recorded everything logged in the inode, so we'd like to clear 3277f5d8d5c4SChristoph Hellwig * the ili_fields bits so we don't log and flush things unnecessarily. 3278f5d8d5c4SChristoph Hellwig * However, we can't stop logging all this information until the data 3279f5d8d5c4SChristoph Hellwig * we've copied into the disk buffer is written to disk. If we did we 3280f5d8d5c4SChristoph Hellwig * might overwrite the copy of the inode in the log with all the data 3281f5d8d5c4SChristoph Hellwig * after re-logging only part of it, and in the face of a crash we 3282f5d8d5c4SChristoph Hellwig * wouldn't have all the data we need to recover. 32831da177e4SLinus Torvalds * 3284f5d8d5c4SChristoph Hellwig * What we do is move the bits to the ili_last_fields field. When 3285f5d8d5c4SChristoph Hellwig * logging the inode, these bits are moved back to the ili_fields field. 3286f5d8d5c4SChristoph Hellwig * In the xfs_iflush_done() routine we clear ili_last_fields, since we 3287f5d8d5c4SChristoph Hellwig * know that the information those bits represent is permanently on 3288f5d8d5c4SChristoph Hellwig * disk. As long as the flush completes before the inode is logged 3289f5d8d5c4SChristoph Hellwig * again, then both ili_fields and ili_last_fields will be cleared. 32901da177e4SLinus Torvalds * 3291f5d8d5c4SChristoph Hellwig * We can play with the ili_fields bits here, because the inode lock 3292f5d8d5c4SChristoph Hellwig * must be held exclusively in order to set bits there and the flush 3293f5d8d5c4SChristoph Hellwig * lock protects the ili_last_fields bits. Set ili_logged so the flush 3294f5d8d5c4SChristoph Hellwig * done routine can tell whether or not to look in the AIL. Also, store 3295f5d8d5c4SChristoph Hellwig * the current LSN of the inode so that we can tell whether the item has 3296f5d8d5c4SChristoph Hellwig * moved in the AIL from xfs_iflush_done(). In order to read the lsn we 3297f5d8d5c4SChristoph Hellwig * need the AIL lock, because it is a 64 bit value that cannot be read 3298f5d8d5c4SChristoph Hellwig * atomically. 32991da177e4SLinus Torvalds */ 3300f5d8d5c4SChristoph Hellwig iip->ili_last_fields = iip->ili_fields; 3301f5d8d5c4SChristoph Hellwig iip->ili_fields = 0; 33021da177e4SLinus Torvalds iip->ili_logged = 1; 33031da177e4SLinus Torvalds 33047b2e2a31SDavid Chinner xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 33057b2e2a31SDavid Chinner &iip->ili_item.li_lsn); 33061da177e4SLinus Torvalds 33071da177e4SLinus Torvalds /* 33081da177e4SLinus Torvalds * Attach the function xfs_iflush_done to the inode's 33091da177e4SLinus Torvalds * buffer. This will remove the inode from the AIL 33101da177e4SLinus Torvalds * and unlock the inode's flush lock when the inode is 33111da177e4SLinus Torvalds * completely written to disk. 33121da177e4SLinus Torvalds */ 3313ca30b2a7SChristoph Hellwig xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item); 33141da177e4SLinus Torvalds 331593848a99SChristoph Hellwig /* update the lsn in the on disk inode if required */ 331693848a99SChristoph Hellwig if (ip->i_d.di_version == 3) 331793848a99SChristoph Hellwig dip->di_lsn = cpu_to_be64(iip->ili_item.li_lsn); 331893848a99SChristoph Hellwig 331993848a99SChristoph Hellwig /* generate the checksum. */ 332093848a99SChristoph Hellwig xfs_dinode_calc_crc(mp, dip); 332193848a99SChristoph Hellwig 3322adadbeefSChristoph Hellwig ASSERT(bp->b_fspriv != NULL); 3323cb669ca5SChristoph Hellwig ASSERT(bp->b_iodone != NULL); 33241da177e4SLinus Torvalds return 0; 33251da177e4SLinus Torvalds 33261da177e4SLinus Torvalds corrupt_out: 33271da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 33281da177e4SLinus Torvalds } 3329