10b61f8a4SDave Chinner // SPDX-License-Identifier: GPL-2.0 26d8b79cfSDave Chinner /* 36d8b79cfSDave Chinner * Copyright (c) 2000-2005 Silicon Graphics, Inc. 46d8b79cfSDave Chinner * All Rights Reserved. 56d8b79cfSDave Chinner */ 66d8b79cfSDave Chinner #include "xfs.h" 76d8b79cfSDave Chinner #include "xfs_fs.h" 85467b34bSDarrick J. Wong #include "xfs_shared.h" 96ca1c906SDave Chinner #include "xfs_format.h" 10239880efSDave Chinner #include "xfs_log_format.h" 11239880efSDave Chinner #include "xfs_trans_resv.h" 126d8b79cfSDave Chinner #include "xfs_sb.h" 136d8b79cfSDave Chinner #include "xfs_mount.h" 146d8b79cfSDave Chinner #include "xfs_inode.h" 15239880efSDave Chinner #include "xfs_trans.h" 16239880efSDave Chinner #include "xfs_trans_priv.h" 176d8b79cfSDave Chinner #include "xfs_inode_item.h" 186d8b79cfSDave Chinner #include "xfs_quota.h" 196d8b79cfSDave Chinner #include "xfs_trace.h" 206d8b79cfSDave Chinner #include "xfs_icache.h" 21c24b5dfaSDave Chinner #include "xfs_bmap_util.h" 22dc06f398SBrian Foster #include "xfs_dquot_item.h" 23dc06f398SBrian Foster #include "xfs_dquot.h" 2483104d44SDarrick J. Wong #include "xfs_reflink.h" 25bb8a66afSChristoph Hellwig #include "xfs_ialloc.h" 266d8b79cfSDave Chinner 27f0e28280SJeff Layton #include <linux/iversion.h> 286d8b79cfSDave Chinner 29c809d7e9SDarrick J. Wong /* Radix tree tags for incore inode tree. */ 30c809d7e9SDarrick J. Wong 31c809d7e9SDarrick J. Wong /* inode is to be reclaimed */ 32c809d7e9SDarrick J. Wong #define XFS_ICI_RECLAIM_TAG 0 33c809d7e9SDarrick J. Wong /* Inode has speculative preallocations (posteof or cow) to clean. */ 34c809d7e9SDarrick J. Wong #define XFS_ICI_BLOCKGC_TAG 1 35c809d7e9SDarrick J. Wong 36c809d7e9SDarrick J. Wong /* 37c809d7e9SDarrick J. Wong * The goal for walking incore inodes. These can correspond with incore inode 38c809d7e9SDarrick J. Wong * radix tree tags when convenient. Avoid existing XFS_IWALK namespace. 39c809d7e9SDarrick J. Wong */ 40c809d7e9SDarrick J. Wong enum xfs_icwalk_goal { 41c809d7e9SDarrick J. Wong /* Goals that are not related to tags; these must be < 0. */ 42c809d7e9SDarrick J. Wong XFS_ICWALK_DQRELE = -1, 43c809d7e9SDarrick J. Wong 44c809d7e9SDarrick J. Wong /* Goals directly associated with tagged inodes. */ 45c809d7e9SDarrick J. Wong XFS_ICWALK_BLOCKGC = XFS_ICI_BLOCKGC_TAG, 46c809d7e9SDarrick J. Wong }; 47c809d7e9SDarrick J. Wong 48c809d7e9SDarrick J. Wong #define XFS_ICWALK_NULL_TAG (-1U) 49c809d7e9SDarrick J. Wong 50c809d7e9SDarrick J. Wong /* Compute the inode radix tree tag for this goal. */ 51c809d7e9SDarrick J. Wong static inline unsigned int 52c809d7e9SDarrick J. Wong xfs_icwalk_tag(enum xfs_icwalk_goal goal) 53c809d7e9SDarrick J. Wong { 54c809d7e9SDarrick J. Wong return goal < 0 ? XFS_ICWALK_NULL_TAG : goal; 55c809d7e9SDarrick J. Wong } 56c809d7e9SDarrick J. Wong 57*7fdff526SDarrick J. Wong static int xfs_icwalk(struct xfs_mount *mp, 58df600197SDarrick J. Wong int (*execute)(struct xfs_inode *ip, void *args), 59c809d7e9SDarrick J. Wong void *args, enum xfs_icwalk_goal goal); 60*7fdff526SDarrick J. Wong static int xfs_icwalk_ag(struct xfs_perag *pag, 61df600197SDarrick J. Wong int (*execute)(struct xfs_inode *ip, void *args), 62c809d7e9SDarrick J. Wong void *args, enum xfs_icwalk_goal goal); 63df600197SDarrick J. Wong 6433479e05SDave Chinner /* 651ad2cfe0SDarrick J. Wong * Private inode cache walk flags for struct xfs_eofblocks. Must not coincide 661ad2cfe0SDarrick J. Wong * with XFS_EOF_FLAGS_*. 671ad2cfe0SDarrick J. Wong */ 681ad2cfe0SDarrick J. Wong #define XFS_ICWALK_FLAG_DROP_UDQUOT (1U << 31) 691ad2cfe0SDarrick J. Wong #define XFS_ICWALK_FLAG_DROP_GDQUOT (1U << 30) 701ad2cfe0SDarrick J. Wong #define XFS_ICWALK_FLAG_DROP_PDQUOT (1U << 29) 711ad2cfe0SDarrick J. Wong 721ad2cfe0SDarrick J. Wong #define XFS_ICWALK_PRIVATE_FLAGS (XFS_ICWALK_FLAG_DROP_UDQUOT | \ 731ad2cfe0SDarrick J. Wong XFS_ICWALK_FLAG_DROP_GDQUOT | \ 741ad2cfe0SDarrick J. Wong XFS_ICWALK_FLAG_DROP_PDQUOT) 751ad2cfe0SDarrick J. Wong 761ad2cfe0SDarrick J. Wong /* 7733479e05SDave Chinner * Allocate and initialise an xfs_inode. 7833479e05SDave Chinner */ 79638f4416SDave Chinner struct xfs_inode * 8033479e05SDave Chinner xfs_inode_alloc( 8133479e05SDave Chinner struct xfs_mount *mp, 8233479e05SDave Chinner xfs_ino_t ino) 8333479e05SDave Chinner { 8433479e05SDave Chinner struct xfs_inode *ip; 8533479e05SDave Chinner 8633479e05SDave Chinner /* 873050bd0bSCarlos Maiolino * XXX: If this didn't occur in transactions, we could drop GFP_NOFAIL 883050bd0bSCarlos Maiolino * and return NULL here on ENOMEM. 8933479e05SDave Chinner */ 903050bd0bSCarlos Maiolino ip = kmem_cache_alloc(xfs_inode_zone, GFP_KERNEL | __GFP_NOFAIL); 913050bd0bSCarlos Maiolino 9233479e05SDave Chinner if (inode_init_always(mp->m_super, VFS_I(ip))) { 93377bcd5fSCarlos Maiolino kmem_cache_free(xfs_inode_zone, ip); 9433479e05SDave Chinner return NULL; 9533479e05SDave Chinner } 9633479e05SDave Chinner 97c19b3b05SDave Chinner /* VFS doesn't initialise i_mode! */ 98c19b3b05SDave Chinner VFS_I(ip)->i_mode = 0; 99c19b3b05SDave Chinner 100ff6d6af2SBill O'Donnell XFS_STATS_INC(mp, vn_active); 10133479e05SDave Chinner ASSERT(atomic_read(&ip->i_pincount) == 0); 10233479e05SDave Chinner ASSERT(ip->i_ino == 0); 10333479e05SDave Chinner 10433479e05SDave Chinner /* initialise the xfs inode */ 10533479e05SDave Chinner ip->i_ino = ino; 10633479e05SDave Chinner ip->i_mount = mp; 10733479e05SDave Chinner memset(&ip->i_imap, 0, sizeof(struct xfs_imap)); 10833479e05SDave Chinner ip->i_afp = NULL; 1093993baebSDarrick J. Wong ip->i_cowfp = NULL; 1103ba738dfSChristoph Hellwig memset(&ip->i_df, 0, sizeof(ip->i_df)); 11133479e05SDave Chinner ip->i_flags = 0; 11233479e05SDave Chinner ip->i_delayed_blks = 0; 1133e09ab8fSChristoph Hellwig ip->i_diflags2 = mp->m_ino_geo.new_diflags2; 1146e73a545SChristoph Hellwig ip->i_nblocks = 0; 1157821ea30SChristoph Hellwig ip->i_forkoff = 0; 1166772c1f1SDarrick J. Wong ip->i_sick = 0; 1176772c1f1SDarrick J. Wong ip->i_checked = 0; 118cb357bf3SDarrick J. Wong INIT_WORK(&ip->i_ioend_work, xfs_end_io); 119cb357bf3SDarrick J. Wong INIT_LIST_HEAD(&ip->i_ioend_list); 120cb357bf3SDarrick J. Wong spin_lock_init(&ip->i_ioend_lock); 12133479e05SDave Chinner 12233479e05SDave Chinner return ip; 12333479e05SDave Chinner } 12433479e05SDave Chinner 12533479e05SDave Chinner STATIC void 12633479e05SDave Chinner xfs_inode_free_callback( 12733479e05SDave Chinner struct rcu_head *head) 12833479e05SDave Chinner { 12933479e05SDave Chinner struct inode *inode = container_of(head, struct inode, i_rcu); 13033479e05SDave Chinner struct xfs_inode *ip = XFS_I(inode); 13133479e05SDave Chinner 132c19b3b05SDave Chinner switch (VFS_I(ip)->i_mode & S_IFMT) { 13333479e05SDave Chinner case S_IFREG: 13433479e05SDave Chinner case S_IFDIR: 13533479e05SDave Chinner case S_IFLNK: 136ef838512SChristoph Hellwig xfs_idestroy_fork(&ip->i_df); 13733479e05SDave Chinner break; 13833479e05SDave Chinner } 13933479e05SDave Chinner 140ef838512SChristoph Hellwig if (ip->i_afp) { 141ef838512SChristoph Hellwig xfs_idestroy_fork(ip->i_afp); 142ef838512SChristoph Hellwig kmem_cache_free(xfs_ifork_zone, ip->i_afp); 143ef838512SChristoph Hellwig } 144ef838512SChristoph Hellwig if (ip->i_cowfp) { 145ef838512SChristoph Hellwig xfs_idestroy_fork(ip->i_cowfp); 146ef838512SChristoph Hellwig kmem_cache_free(xfs_ifork_zone, ip->i_cowfp); 147ef838512SChristoph Hellwig } 14833479e05SDave Chinner if (ip->i_itemp) { 14922525c17SDave Chinner ASSERT(!test_bit(XFS_LI_IN_AIL, 15022525c17SDave Chinner &ip->i_itemp->ili_item.li_flags)); 15133479e05SDave Chinner xfs_inode_item_destroy(ip); 15233479e05SDave Chinner ip->i_itemp = NULL; 15333479e05SDave Chinner } 15433479e05SDave Chinner 155377bcd5fSCarlos Maiolino kmem_cache_free(xfs_inode_zone, ip); 1561f2dcfe8SDave Chinner } 1571f2dcfe8SDave Chinner 1588a17d7ddSDave Chinner static void 1598a17d7ddSDave Chinner __xfs_inode_free( 1608a17d7ddSDave Chinner struct xfs_inode *ip) 1618a17d7ddSDave Chinner { 1628a17d7ddSDave Chinner /* asserts to verify all state is correct here */ 1638a17d7ddSDave Chinner ASSERT(atomic_read(&ip->i_pincount) == 0); 16448d55e2aSDave Chinner ASSERT(!ip->i_itemp || list_empty(&ip->i_itemp->ili_item.li_bio_list)); 1658a17d7ddSDave Chinner XFS_STATS_DEC(ip->i_mount, vn_active); 1668a17d7ddSDave Chinner 1678a17d7ddSDave Chinner call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); 1688a17d7ddSDave Chinner } 1698a17d7ddSDave Chinner 1701f2dcfe8SDave Chinner void 1711f2dcfe8SDave Chinner xfs_inode_free( 1721f2dcfe8SDave Chinner struct xfs_inode *ip) 1731f2dcfe8SDave Chinner { 174718ecc50SDave Chinner ASSERT(!xfs_iflags_test(ip, XFS_IFLUSHING)); 17598efe8afSBrian Foster 17633479e05SDave Chinner /* 17733479e05SDave Chinner * Because we use RCU freeing we need to ensure the inode always 17833479e05SDave Chinner * appears to be reclaimed with an invalid inode number when in the 17933479e05SDave Chinner * free state. The ip->i_flags_lock provides the barrier against lookup 18033479e05SDave Chinner * races. 18133479e05SDave Chinner */ 18233479e05SDave Chinner spin_lock(&ip->i_flags_lock); 18333479e05SDave Chinner ip->i_flags = XFS_IRECLAIM; 18433479e05SDave Chinner ip->i_ino = 0; 18533479e05SDave Chinner spin_unlock(&ip->i_flags_lock); 18633479e05SDave Chinner 1878a17d7ddSDave Chinner __xfs_inode_free(ip); 18833479e05SDave Chinner } 18933479e05SDave Chinner 19033479e05SDave Chinner /* 19102511a5aSDave Chinner * Queue background inode reclaim work if there are reclaimable inodes and there 19202511a5aSDave Chinner * isn't reclaim work already scheduled or in progress. 193ad438c40SDave Chinner */ 194ad438c40SDave Chinner static void 195ad438c40SDave Chinner xfs_reclaim_work_queue( 196ad438c40SDave Chinner struct xfs_mount *mp) 197ad438c40SDave Chinner { 198ad438c40SDave Chinner 199ad438c40SDave Chinner rcu_read_lock(); 200ad438c40SDave Chinner if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { 201ad438c40SDave Chinner queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work, 202ad438c40SDave Chinner msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); 203ad438c40SDave Chinner } 204ad438c40SDave Chinner rcu_read_unlock(); 205ad438c40SDave Chinner } 206ad438c40SDave Chinner 207ad438c40SDave Chinner static void 208ad438c40SDave Chinner xfs_perag_set_reclaim_tag( 209ad438c40SDave Chinner struct xfs_perag *pag) 210ad438c40SDave Chinner { 211ad438c40SDave Chinner struct xfs_mount *mp = pag->pag_mount; 212ad438c40SDave Chinner 21395989c46SBrian Foster lockdep_assert_held(&pag->pag_ici_lock); 214ad438c40SDave Chinner if (pag->pag_ici_reclaimable++) 215ad438c40SDave Chinner return; 216ad438c40SDave Chinner 217ad438c40SDave Chinner /* propagate the reclaim tag up into the perag radix tree */ 218ad438c40SDave Chinner spin_lock(&mp->m_perag_lock); 219ad438c40SDave Chinner radix_tree_tag_set(&mp->m_perag_tree, pag->pag_agno, 220ad438c40SDave Chinner XFS_ICI_RECLAIM_TAG); 221ad438c40SDave Chinner spin_unlock(&mp->m_perag_lock); 222ad438c40SDave Chinner 223ad438c40SDave Chinner /* schedule periodic background inode reclaim */ 224ad438c40SDave Chinner xfs_reclaim_work_queue(mp); 225ad438c40SDave Chinner 226ad438c40SDave Chinner trace_xfs_perag_set_reclaim(mp, pag->pag_agno, -1, _RET_IP_); 227ad438c40SDave Chinner } 228ad438c40SDave Chinner 229ad438c40SDave Chinner static void 230ad438c40SDave Chinner xfs_perag_clear_reclaim_tag( 231ad438c40SDave Chinner struct xfs_perag *pag) 232ad438c40SDave Chinner { 233ad438c40SDave Chinner struct xfs_mount *mp = pag->pag_mount; 234ad438c40SDave Chinner 23595989c46SBrian Foster lockdep_assert_held(&pag->pag_ici_lock); 236ad438c40SDave Chinner if (--pag->pag_ici_reclaimable) 237ad438c40SDave Chinner return; 238ad438c40SDave Chinner 239ad438c40SDave Chinner /* clear the reclaim tag from the perag radix tree */ 240ad438c40SDave Chinner spin_lock(&mp->m_perag_lock); 241ad438c40SDave Chinner radix_tree_tag_clear(&mp->m_perag_tree, pag->pag_agno, 242ad438c40SDave Chinner XFS_ICI_RECLAIM_TAG); 243ad438c40SDave Chinner spin_unlock(&mp->m_perag_lock); 244ad438c40SDave Chinner trace_xfs_perag_clear_reclaim(mp, pag->pag_agno, -1, _RET_IP_); 245ad438c40SDave Chinner } 246ad438c40SDave Chinner 247ad438c40SDave Chinner 248ad438c40SDave Chinner /* 249ad438c40SDave Chinner * We set the inode flag atomically with the radix tree tag. 250ad438c40SDave Chinner * Once we get tag lookups on the radix tree, this inode flag 251ad438c40SDave Chinner * can go away. 252ad438c40SDave Chinner */ 253ad438c40SDave Chinner void 254ad438c40SDave Chinner xfs_inode_set_reclaim_tag( 255ad438c40SDave Chinner struct xfs_inode *ip) 256ad438c40SDave Chinner { 257ad438c40SDave Chinner struct xfs_mount *mp = ip->i_mount; 258ad438c40SDave Chinner struct xfs_perag *pag; 259ad438c40SDave Chinner 260ad438c40SDave Chinner pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 261ad438c40SDave Chinner spin_lock(&pag->pag_ici_lock); 262ad438c40SDave Chinner spin_lock(&ip->i_flags_lock); 263ad438c40SDave Chinner 264ad438c40SDave Chinner radix_tree_tag_set(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino), 265ad438c40SDave Chinner XFS_ICI_RECLAIM_TAG); 266ad438c40SDave Chinner xfs_perag_set_reclaim_tag(pag); 267ad438c40SDave Chinner __xfs_iflags_set(ip, XFS_IRECLAIMABLE); 268ad438c40SDave Chinner 269ad438c40SDave Chinner spin_unlock(&ip->i_flags_lock); 270ad438c40SDave Chinner spin_unlock(&pag->pag_ici_lock); 271ad438c40SDave Chinner xfs_perag_put(pag); 272ad438c40SDave Chinner } 273ad438c40SDave Chinner 274ad438c40SDave Chinner STATIC void 275ad438c40SDave Chinner xfs_inode_clear_reclaim_tag( 276ad438c40SDave Chinner struct xfs_perag *pag, 277ad438c40SDave Chinner xfs_ino_t ino) 278ad438c40SDave Chinner { 279ad438c40SDave Chinner radix_tree_tag_clear(&pag->pag_ici_root, 280ad438c40SDave Chinner XFS_INO_TO_AGINO(pag->pag_mount, ino), 281ad438c40SDave Chinner XFS_ICI_RECLAIM_TAG); 282ad438c40SDave Chinner xfs_perag_clear_reclaim_tag(pag); 283ad438c40SDave Chinner } 284ad438c40SDave Chinner 285*7fdff526SDarrick J. Wong static inline void 286ae2c4ac2SBrian Foster xfs_inew_wait( 287ae2c4ac2SBrian Foster struct xfs_inode *ip) 288ae2c4ac2SBrian Foster { 289ae2c4ac2SBrian Foster wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_INEW_BIT); 290ae2c4ac2SBrian Foster DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_INEW_BIT); 291ae2c4ac2SBrian Foster 292ae2c4ac2SBrian Foster do { 29321417136SIngo Molnar prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); 294ae2c4ac2SBrian Foster if (!xfs_iflags_test(ip, XFS_INEW)) 295ae2c4ac2SBrian Foster break; 296ae2c4ac2SBrian Foster schedule(); 297ae2c4ac2SBrian Foster } while (true); 29821417136SIngo Molnar finish_wait(wq, &wait.wq_entry); 299ae2c4ac2SBrian Foster } 300ae2c4ac2SBrian Foster 301ad438c40SDave Chinner /* 30250997470SDave Chinner * When we recycle a reclaimable inode, we need to re-initialise the VFS inode 30350997470SDave Chinner * part of the structure. This is made more complex by the fact we store 30450997470SDave Chinner * information about the on-disk values in the VFS inode and so we can't just 30583e06f21SDave Chinner * overwrite the values unconditionally. Hence we save the parameters we 30650997470SDave Chinner * need to retain across reinitialisation, and rewrite them into the VFS inode 30783e06f21SDave Chinner * after reinitialisation even if it fails. 30850997470SDave Chinner */ 30950997470SDave Chinner static int 31050997470SDave Chinner xfs_reinit_inode( 31150997470SDave Chinner struct xfs_mount *mp, 31250997470SDave Chinner struct inode *inode) 31350997470SDave Chinner { 31450997470SDave Chinner int error; 31554d7b5c1SDave Chinner uint32_t nlink = inode->i_nlink; 3169e9a2674SDave Chinner uint32_t generation = inode->i_generation; 317f0e28280SJeff Layton uint64_t version = inode_peek_iversion(inode); 318c19b3b05SDave Chinner umode_t mode = inode->i_mode; 319acd1d715SAmir Goldstein dev_t dev = inode->i_rdev; 3203d8f2821SChristoph Hellwig kuid_t uid = inode->i_uid; 3213d8f2821SChristoph Hellwig kgid_t gid = inode->i_gid; 32250997470SDave Chinner 32350997470SDave Chinner error = inode_init_always(mp->m_super, inode); 32450997470SDave Chinner 32554d7b5c1SDave Chinner set_nlink(inode, nlink); 3269e9a2674SDave Chinner inode->i_generation = generation; 327f0e28280SJeff Layton inode_set_iversion_queried(inode, version); 328c19b3b05SDave Chinner inode->i_mode = mode; 329acd1d715SAmir Goldstein inode->i_rdev = dev; 3303d8f2821SChristoph Hellwig inode->i_uid = uid; 3313d8f2821SChristoph Hellwig inode->i_gid = gid; 33250997470SDave Chinner return error; 33350997470SDave Chinner } 33450997470SDave Chinner 33550997470SDave Chinner /* 336afca6c5bSDave Chinner * If we are allocating a new inode, then check what was returned is 337afca6c5bSDave Chinner * actually a free, empty inode. If we are not allocating an inode, 338afca6c5bSDave Chinner * then check we didn't find a free inode. 339afca6c5bSDave Chinner * 340afca6c5bSDave Chinner * Returns: 341afca6c5bSDave Chinner * 0 if the inode free state matches the lookup context 342afca6c5bSDave Chinner * -ENOENT if the inode is free and we are not allocating 343afca6c5bSDave Chinner * -EFSCORRUPTED if there is any state mismatch at all 344afca6c5bSDave Chinner */ 345afca6c5bSDave Chinner static int 346afca6c5bSDave Chinner xfs_iget_check_free_state( 347afca6c5bSDave Chinner struct xfs_inode *ip, 348afca6c5bSDave Chinner int flags) 349afca6c5bSDave Chinner { 350afca6c5bSDave Chinner if (flags & XFS_IGET_CREATE) { 351afca6c5bSDave Chinner /* should be a free inode */ 352afca6c5bSDave Chinner if (VFS_I(ip)->i_mode != 0) { 353afca6c5bSDave Chinner xfs_warn(ip->i_mount, 354afca6c5bSDave Chinner "Corruption detected! Free inode 0x%llx not marked free! (mode 0x%x)", 355afca6c5bSDave Chinner ip->i_ino, VFS_I(ip)->i_mode); 356afca6c5bSDave Chinner return -EFSCORRUPTED; 357afca6c5bSDave Chinner } 358afca6c5bSDave Chinner 3596e73a545SChristoph Hellwig if (ip->i_nblocks != 0) { 360afca6c5bSDave Chinner xfs_warn(ip->i_mount, 361afca6c5bSDave Chinner "Corruption detected! Free inode 0x%llx has blocks allocated!", 362afca6c5bSDave Chinner ip->i_ino); 363afca6c5bSDave Chinner return -EFSCORRUPTED; 364afca6c5bSDave Chinner } 365afca6c5bSDave Chinner return 0; 366afca6c5bSDave Chinner } 367afca6c5bSDave Chinner 368afca6c5bSDave Chinner /* should be an allocated inode */ 369afca6c5bSDave Chinner if (VFS_I(ip)->i_mode == 0) 370afca6c5bSDave Chinner return -ENOENT; 371afca6c5bSDave Chinner 372afca6c5bSDave Chinner return 0; 373afca6c5bSDave Chinner } 374afca6c5bSDave Chinner 375afca6c5bSDave Chinner /* 37633479e05SDave Chinner * Check the validity of the inode we just found it the cache 37733479e05SDave Chinner */ 37833479e05SDave Chinner static int 37933479e05SDave Chinner xfs_iget_cache_hit( 38033479e05SDave Chinner struct xfs_perag *pag, 38133479e05SDave Chinner struct xfs_inode *ip, 38233479e05SDave Chinner xfs_ino_t ino, 38333479e05SDave Chinner int flags, 38433479e05SDave Chinner int lock_flags) __releases(RCU) 38533479e05SDave Chinner { 38633479e05SDave Chinner struct inode *inode = VFS_I(ip); 38733479e05SDave Chinner struct xfs_mount *mp = ip->i_mount; 38833479e05SDave Chinner int error; 38933479e05SDave Chinner 39033479e05SDave Chinner /* 39133479e05SDave Chinner * check for re-use of an inode within an RCU grace period due to the 39233479e05SDave Chinner * radix tree nodes not being updated yet. We monitor for this by 39333479e05SDave Chinner * setting the inode number to zero before freeing the inode structure. 39433479e05SDave Chinner * If the inode has been reallocated and set up, then the inode number 39533479e05SDave Chinner * will not match, so check for that, too. 39633479e05SDave Chinner */ 39733479e05SDave Chinner spin_lock(&ip->i_flags_lock); 39833479e05SDave Chinner if (ip->i_ino != ino) { 39933479e05SDave Chinner trace_xfs_iget_skip(ip); 400ff6d6af2SBill O'Donnell XFS_STATS_INC(mp, xs_ig_frecycle); 4012451337dSDave Chinner error = -EAGAIN; 40233479e05SDave Chinner goto out_error; 40333479e05SDave Chinner } 40433479e05SDave Chinner 40533479e05SDave Chinner 40633479e05SDave Chinner /* 40733479e05SDave Chinner * If we are racing with another cache hit that is currently 40833479e05SDave Chinner * instantiating this inode or currently recycling it out of 40933479e05SDave Chinner * reclaimabe state, wait for the initialisation to complete 41033479e05SDave Chinner * before continuing. 41133479e05SDave Chinner * 41233479e05SDave Chinner * XXX(hch): eventually we should do something equivalent to 41333479e05SDave Chinner * wait_on_inode to wait for these flags to be cleared 41433479e05SDave Chinner * instead of polling for it. 41533479e05SDave Chinner */ 41633479e05SDave Chinner if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { 41733479e05SDave Chinner trace_xfs_iget_skip(ip); 418ff6d6af2SBill O'Donnell XFS_STATS_INC(mp, xs_ig_frecycle); 4192451337dSDave Chinner error = -EAGAIN; 42033479e05SDave Chinner goto out_error; 42133479e05SDave Chinner } 42233479e05SDave Chinner 42333479e05SDave Chinner /* 424afca6c5bSDave Chinner * Check the inode free state is valid. This also detects lookup 425afca6c5bSDave Chinner * racing with unlinks. 42633479e05SDave Chinner */ 427afca6c5bSDave Chinner error = xfs_iget_check_free_state(ip, flags); 428afca6c5bSDave Chinner if (error) 42933479e05SDave Chinner goto out_error; 43033479e05SDave Chinner 43133479e05SDave Chinner /* 43233479e05SDave Chinner * If IRECLAIMABLE is set, we've torn down the VFS inode already. 43333479e05SDave Chinner * Need to carefully get it back into useable state. 43433479e05SDave Chinner */ 43533479e05SDave Chinner if (ip->i_flags & XFS_IRECLAIMABLE) { 43633479e05SDave Chinner trace_xfs_iget_reclaim(ip); 43733479e05SDave Chinner 438378f681cSDarrick J. Wong if (flags & XFS_IGET_INCORE) { 439378f681cSDarrick J. Wong error = -EAGAIN; 440378f681cSDarrick J. Wong goto out_error; 441378f681cSDarrick J. Wong } 442378f681cSDarrick J. Wong 44333479e05SDave Chinner /* 44433479e05SDave Chinner * We need to set XFS_IRECLAIM to prevent xfs_reclaim_inode 44533479e05SDave Chinner * from stomping over us while we recycle the inode. We can't 44633479e05SDave Chinner * clear the radix tree reclaimable tag yet as it requires 44733479e05SDave Chinner * pag_ici_lock to be held exclusive. 44833479e05SDave Chinner */ 44933479e05SDave Chinner ip->i_flags |= XFS_IRECLAIM; 45033479e05SDave Chinner 45133479e05SDave Chinner spin_unlock(&ip->i_flags_lock); 45233479e05SDave Chinner rcu_read_unlock(); 45333479e05SDave Chinner 454d45344d6SIra Weiny ASSERT(!rwsem_is_locked(&inode->i_rwsem)); 45550997470SDave Chinner error = xfs_reinit_inode(mp, inode); 45633479e05SDave Chinner if (error) { 457756baca2SBrian Foster bool wake; 45833479e05SDave Chinner /* 45933479e05SDave Chinner * Re-initializing the inode failed, and we are in deep 46033479e05SDave Chinner * trouble. Try to re-add it to the reclaim list. 46133479e05SDave Chinner */ 46233479e05SDave Chinner rcu_read_lock(); 46333479e05SDave Chinner spin_lock(&ip->i_flags_lock); 464756baca2SBrian Foster wake = !!__xfs_iflags_test(ip, XFS_INEW); 46533479e05SDave Chinner ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM); 466756baca2SBrian Foster if (wake) 467756baca2SBrian Foster wake_up_bit(&ip->i_flags, __XFS_INEW_BIT); 46833479e05SDave Chinner ASSERT(ip->i_flags & XFS_IRECLAIMABLE); 46933479e05SDave Chinner trace_xfs_iget_reclaim_fail(ip); 47033479e05SDave Chinner goto out_error; 47133479e05SDave Chinner } 47233479e05SDave Chinner 47333479e05SDave Chinner spin_lock(&pag->pag_ici_lock); 47433479e05SDave Chinner spin_lock(&ip->i_flags_lock); 47533479e05SDave Chinner 47633479e05SDave Chinner /* 47733479e05SDave Chinner * Clear the per-lifetime state in the inode as we are now 47833479e05SDave Chinner * effectively a new inode and need to return to the initial 47933479e05SDave Chinner * state before reuse occurs. 48033479e05SDave Chinner */ 48133479e05SDave Chinner ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS; 48233479e05SDave Chinner ip->i_flags |= XFS_INEW; 483545c0889SDave Chinner xfs_inode_clear_reclaim_tag(pag, ip->i_ino); 48433479e05SDave Chinner inode->i_state = I_NEW; 4856772c1f1SDarrick J. Wong ip->i_sick = 0; 4866772c1f1SDarrick J. Wong ip->i_checked = 0; 48733479e05SDave Chinner 48833479e05SDave Chinner spin_unlock(&ip->i_flags_lock); 48933479e05SDave Chinner spin_unlock(&pag->pag_ici_lock); 49033479e05SDave Chinner } else { 49133479e05SDave Chinner /* If the VFS inode is being torn down, pause and try again. */ 49233479e05SDave Chinner if (!igrab(inode)) { 49333479e05SDave Chinner trace_xfs_iget_skip(ip); 4942451337dSDave Chinner error = -EAGAIN; 49533479e05SDave Chinner goto out_error; 49633479e05SDave Chinner } 49733479e05SDave Chinner 49833479e05SDave Chinner /* We've got a live one. */ 49933479e05SDave Chinner spin_unlock(&ip->i_flags_lock); 50033479e05SDave Chinner rcu_read_unlock(); 50133479e05SDave Chinner trace_xfs_iget_hit(ip); 50233479e05SDave Chinner } 50333479e05SDave Chinner 50433479e05SDave Chinner if (lock_flags != 0) 50533479e05SDave Chinner xfs_ilock(ip, lock_flags); 50633479e05SDave Chinner 507378f681cSDarrick J. Wong if (!(flags & XFS_IGET_INCORE)) 508dae2f8edSIra Weiny xfs_iflags_clear(ip, XFS_ISTALE); 509ff6d6af2SBill O'Donnell XFS_STATS_INC(mp, xs_ig_found); 51033479e05SDave Chinner 51133479e05SDave Chinner return 0; 51233479e05SDave Chinner 51333479e05SDave Chinner out_error: 51433479e05SDave Chinner spin_unlock(&ip->i_flags_lock); 51533479e05SDave Chinner rcu_read_unlock(); 51633479e05SDave Chinner return error; 51733479e05SDave Chinner } 51833479e05SDave Chinner 51933479e05SDave Chinner 52033479e05SDave Chinner static int 52133479e05SDave Chinner xfs_iget_cache_miss( 52233479e05SDave Chinner struct xfs_mount *mp, 52333479e05SDave Chinner struct xfs_perag *pag, 52433479e05SDave Chinner xfs_trans_t *tp, 52533479e05SDave Chinner xfs_ino_t ino, 52633479e05SDave Chinner struct xfs_inode **ipp, 52733479e05SDave Chinner int flags, 52833479e05SDave Chinner int lock_flags) 52933479e05SDave Chinner { 53033479e05SDave Chinner struct xfs_inode *ip; 53133479e05SDave Chinner int error; 53233479e05SDave Chinner xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino); 53333479e05SDave Chinner int iflags; 53433479e05SDave Chinner 53533479e05SDave Chinner ip = xfs_inode_alloc(mp, ino); 53633479e05SDave Chinner if (!ip) 5372451337dSDave Chinner return -ENOMEM; 53833479e05SDave Chinner 539bb8a66afSChristoph Hellwig error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, flags); 54033479e05SDave Chinner if (error) 54133479e05SDave Chinner goto out_destroy; 54233479e05SDave Chinner 543bb8a66afSChristoph Hellwig /* 544bb8a66afSChristoph Hellwig * For version 5 superblocks, if we are initialising a new inode and we 545bb8a66afSChristoph Hellwig * are not utilising the XFS_MOUNT_IKEEP inode cluster mode, we can 546bb8a66afSChristoph Hellwig * simply build the new inode core with a random generation number. 547bb8a66afSChristoph Hellwig * 548bb8a66afSChristoph Hellwig * For version 4 (and older) superblocks, log recovery is dependent on 549965e0a1aSChristoph Hellwig * the i_flushiter field being initialised from the current on-disk 550bb8a66afSChristoph Hellwig * value and hence we must also read the inode off disk even when 551bb8a66afSChristoph Hellwig * initializing new inodes. 552bb8a66afSChristoph Hellwig */ 553bb8a66afSChristoph Hellwig if (xfs_sb_version_has_v3inode(&mp->m_sb) && 554bb8a66afSChristoph Hellwig (flags & XFS_IGET_CREATE) && !(mp->m_flags & XFS_MOUNT_IKEEP)) { 555bb8a66afSChristoph Hellwig VFS_I(ip)->i_generation = prandom_u32(); 556bb8a66afSChristoph Hellwig } else { 557bb8a66afSChristoph Hellwig struct xfs_buf *bp; 558bb8a66afSChristoph Hellwig 559af9dcddeSChristoph Hellwig error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp); 560bb8a66afSChristoph Hellwig if (error) 561bb8a66afSChristoph Hellwig goto out_destroy; 562bb8a66afSChristoph Hellwig 563af9dcddeSChristoph Hellwig error = xfs_inode_from_disk(ip, 564af9dcddeSChristoph Hellwig xfs_buf_offset(bp, ip->i_imap.im_boffset)); 565bb8a66afSChristoph Hellwig if (!error) 566bb8a66afSChristoph Hellwig xfs_buf_set_ref(bp, XFS_INO_REF); 567bb8a66afSChristoph Hellwig xfs_trans_brelse(tp, bp); 568bb8a66afSChristoph Hellwig 569bb8a66afSChristoph Hellwig if (error) 570bb8a66afSChristoph Hellwig goto out_destroy; 571bb8a66afSChristoph Hellwig } 572bb8a66afSChristoph Hellwig 57333479e05SDave Chinner trace_xfs_iget_miss(ip); 57433479e05SDave Chinner 575ee457001SDave Chinner /* 576afca6c5bSDave Chinner * Check the inode free state is valid. This also detects lookup 577afca6c5bSDave Chinner * racing with unlinks. 578ee457001SDave Chinner */ 579afca6c5bSDave Chinner error = xfs_iget_check_free_state(ip, flags); 580afca6c5bSDave Chinner if (error) 581ee457001SDave Chinner goto out_destroy; 58233479e05SDave Chinner 58333479e05SDave Chinner /* 58433479e05SDave Chinner * Preload the radix tree so we can insert safely under the 58533479e05SDave Chinner * write spinlock. Note that we cannot sleep inside the preload 58633479e05SDave Chinner * region. Since we can be called from transaction context, don't 58733479e05SDave Chinner * recurse into the file system. 58833479e05SDave Chinner */ 58933479e05SDave Chinner if (radix_tree_preload(GFP_NOFS)) { 5902451337dSDave Chinner error = -EAGAIN; 59133479e05SDave Chinner goto out_destroy; 59233479e05SDave Chinner } 59333479e05SDave Chinner 59433479e05SDave Chinner /* 59533479e05SDave Chinner * Because the inode hasn't been added to the radix-tree yet it can't 59633479e05SDave Chinner * be found by another thread, so we can do the non-sleeping lock here. 59733479e05SDave Chinner */ 59833479e05SDave Chinner if (lock_flags) { 59933479e05SDave Chinner if (!xfs_ilock_nowait(ip, lock_flags)) 60033479e05SDave Chinner BUG(); 60133479e05SDave Chinner } 60233479e05SDave Chinner 60333479e05SDave Chinner /* 60433479e05SDave Chinner * These values must be set before inserting the inode into the radix 60533479e05SDave Chinner * tree as the moment it is inserted a concurrent lookup (allowed by the 60633479e05SDave Chinner * RCU locking mechanism) can find it and that lookup must see that this 60733479e05SDave Chinner * is an inode currently under construction (i.e. that XFS_INEW is set). 60833479e05SDave Chinner * The ip->i_flags_lock that protects the XFS_INEW flag forms the 60933479e05SDave Chinner * memory barrier that ensures this detection works correctly at lookup 61033479e05SDave Chinner * time. 61133479e05SDave Chinner */ 61233479e05SDave Chinner iflags = XFS_INEW; 61333479e05SDave Chinner if (flags & XFS_IGET_DONTCACHE) 6142c567af4SIra Weiny d_mark_dontcache(VFS_I(ip)); 615113a5683SChandra Seetharaman ip->i_udquot = NULL; 616113a5683SChandra Seetharaman ip->i_gdquot = NULL; 61792f8ff73SChandra Seetharaman ip->i_pdquot = NULL; 61833479e05SDave Chinner xfs_iflags_set(ip, iflags); 61933479e05SDave Chinner 62033479e05SDave Chinner /* insert the new inode */ 62133479e05SDave Chinner spin_lock(&pag->pag_ici_lock); 62233479e05SDave Chinner error = radix_tree_insert(&pag->pag_ici_root, agino, ip); 62333479e05SDave Chinner if (unlikely(error)) { 62433479e05SDave Chinner WARN_ON(error != -EEXIST); 625ff6d6af2SBill O'Donnell XFS_STATS_INC(mp, xs_ig_dup); 6262451337dSDave Chinner error = -EAGAIN; 62733479e05SDave Chinner goto out_preload_end; 62833479e05SDave Chinner } 62933479e05SDave Chinner spin_unlock(&pag->pag_ici_lock); 63033479e05SDave Chinner radix_tree_preload_end(); 63133479e05SDave Chinner 63233479e05SDave Chinner *ipp = ip; 63333479e05SDave Chinner return 0; 63433479e05SDave Chinner 63533479e05SDave Chinner out_preload_end: 63633479e05SDave Chinner spin_unlock(&pag->pag_ici_lock); 63733479e05SDave Chinner radix_tree_preload_end(); 63833479e05SDave Chinner if (lock_flags) 63933479e05SDave Chinner xfs_iunlock(ip, lock_flags); 64033479e05SDave Chinner out_destroy: 64133479e05SDave Chinner __destroy_inode(VFS_I(ip)); 64233479e05SDave Chinner xfs_inode_free(ip); 64333479e05SDave Chinner return error; 64433479e05SDave Chinner } 64533479e05SDave Chinner 64633479e05SDave Chinner /* 64702511a5aSDave Chinner * Look up an inode by number in the given file system. The inode is looked up 64802511a5aSDave Chinner * in the cache held in each AG. If the inode is found in the cache, initialise 64902511a5aSDave Chinner * the vfs inode if necessary. 65033479e05SDave Chinner * 65102511a5aSDave Chinner * If it is not in core, read it in from the file system's device, add it to the 65202511a5aSDave Chinner * cache and initialise the vfs inode. 65333479e05SDave Chinner * 65433479e05SDave Chinner * The inode is locked according to the value of the lock_flags parameter. 65502511a5aSDave Chinner * Inode lookup is only done during metadata operations and not as part of the 65602511a5aSDave Chinner * data IO path. Hence we only allow locking of the XFS_ILOCK during lookup. 65733479e05SDave Chinner */ 65833479e05SDave Chinner int 65933479e05SDave Chinner xfs_iget( 66002511a5aSDave Chinner struct xfs_mount *mp, 66102511a5aSDave Chinner struct xfs_trans *tp, 66233479e05SDave Chinner xfs_ino_t ino, 66333479e05SDave Chinner uint flags, 66433479e05SDave Chinner uint lock_flags, 66502511a5aSDave Chinner struct xfs_inode **ipp) 66633479e05SDave Chinner { 66702511a5aSDave Chinner struct xfs_inode *ip; 66802511a5aSDave Chinner struct xfs_perag *pag; 66933479e05SDave Chinner xfs_agino_t agino; 67002511a5aSDave Chinner int error; 67133479e05SDave Chinner 67233479e05SDave Chinner ASSERT((lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) == 0); 67333479e05SDave Chinner 67433479e05SDave Chinner /* reject inode numbers outside existing AGs */ 67533479e05SDave Chinner if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) 6762451337dSDave Chinner return -EINVAL; 67733479e05SDave Chinner 678ff6d6af2SBill O'Donnell XFS_STATS_INC(mp, xs_ig_attempts); 6798774cf8bSLucas Stach 68033479e05SDave Chinner /* get the perag structure and ensure that it's inode capable */ 68133479e05SDave Chinner pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); 68233479e05SDave Chinner agino = XFS_INO_TO_AGINO(mp, ino); 68333479e05SDave Chinner 68433479e05SDave Chinner again: 68533479e05SDave Chinner error = 0; 68633479e05SDave Chinner rcu_read_lock(); 68733479e05SDave Chinner ip = radix_tree_lookup(&pag->pag_ici_root, agino); 68833479e05SDave Chinner 68933479e05SDave Chinner if (ip) { 69033479e05SDave Chinner error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags); 69133479e05SDave Chinner if (error) 69233479e05SDave Chinner goto out_error_or_again; 69333479e05SDave Chinner } else { 69433479e05SDave Chinner rcu_read_unlock(); 695378f681cSDarrick J. Wong if (flags & XFS_IGET_INCORE) { 696ed438b47SDarrick J. Wong error = -ENODATA; 697378f681cSDarrick J. Wong goto out_error_or_again; 698378f681cSDarrick J. Wong } 699ff6d6af2SBill O'Donnell XFS_STATS_INC(mp, xs_ig_missed); 70033479e05SDave Chinner 70133479e05SDave Chinner error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, 70233479e05SDave Chinner flags, lock_flags); 70333479e05SDave Chinner if (error) 70433479e05SDave Chinner goto out_error_or_again; 70533479e05SDave Chinner } 70633479e05SDave Chinner xfs_perag_put(pag); 70733479e05SDave Chinner 70833479e05SDave Chinner *ipp = ip; 70933479e05SDave Chinner 71033479e05SDave Chinner /* 71158c90473SDave Chinner * If we have a real type for an on-disk inode, we can setup the inode 71233479e05SDave Chinner * now. If it's a new inode being created, xfs_ialloc will handle it. 71333479e05SDave Chinner */ 714c19b3b05SDave Chinner if (xfs_iflags_test(ip, XFS_INEW) && VFS_I(ip)->i_mode != 0) 71558c90473SDave Chinner xfs_setup_existing_inode(ip); 71633479e05SDave Chinner return 0; 71733479e05SDave Chinner 71833479e05SDave Chinner out_error_or_again: 719378f681cSDarrick J. Wong if (!(flags & XFS_IGET_INCORE) && error == -EAGAIN) { 72033479e05SDave Chinner delay(1); 72133479e05SDave Chinner goto again; 72233479e05SDave Chinner } 72333479e05SDave Chinner xfs_perag_put(pag); 72433479e05SDave Chinner return error; 72533479e05SDave Chinner } 72633479e05SDave Chinner 7276d8b79cfSDave Chinner /* 728378f681cSDarrick J. Wong * "Is this a cached inode that's also allocated?" 729378f681cSDarrick J. Wong * 730378f681cSDarrick J. Wong * Look up an inode by number in the given file system. If the inode is 731378f681cSDarrick J. Wong * in cache and isn't in purgatory, return 1 if the inode is allocated 732378f681cSDarrick J. Wong * and 0 if it is not. For all other cases (not in cache, being torn 733378f681cSDarrick J. Wong * down, etc.), return a negative error code. 734378f681cSDarrick J. Wong * 735378f681cSDarrick J. Wong * The caller has to prevent inode allocation and freeing activity, 736378f681cSDarrick J. Wong * presumably by locking the AGI buffer. This is to ensure that an 737378f681cSDarrick J. Wong * inode cannot transition from allocated to freed until the caller is 738378f681cSDarrick J. Wong * ready to allow that. If the inode is in an intermediate state (new, 739378f681cSDarrick J. Wong * reclaimable, or being reclaimed), -EAGAIN will be returned; if the 740378f681cSDarrick J. Wong * inode is not in the cache, -ENOENT will be returned. The caller must 741378f681cSDarrick J. Wong * deal with these scenarios appropriately. 742378f681cSDarrick J. Wong * 743378f681cSDarrick J. Wong * This is a specialized use case for the online scrubber; if you're 744378f681cSDarrick J. Wong * reading this, you probably want xfs_iget. 745378f681cSDarrick J. Wong */ 746378f681cSDarrick J. Wong int 747378f681cSDarrick J. Wong xfs_icache_inode_is_allocated( 748378f681cSDarrick J. Wong struct xfs_mount *mp, 749378f681cSDarrick J. Wong struct xfs_trans *tp, 750378f681cSDarrick J. Wong xfs_ino_t ino, 751378f681cSDarrick J. Wong bool *inuse) 752378f681cSDarrick J. Wong { 753378f681cSDarrick J. Wong struct xfs_inode *ip; 754378f681cSDarrick J. Wong int error; 755378f681cSDarrick J. Wong 756378f681cSDarrick J. Wong error = xfs_iget(mp, tp, ino, XFS_IGET_INCORE, 0, &ip); 757378f681cSDarrick J. Wong if (error) 758378f681cSDarrick J. Wong return error; 759378f681cSDarrick J. Wong 760378f681cSDarrick J. Wong *inuse = !!(VFS_I(ip)->i_mode); 76144a8736bSDarrick J. Wong xfs_irele(ip); 762378f681cSDarrick J. Wong return 0; 763378f681cSDarrick J. Wong } 764378f681cSDarrick J. Wong 765378f681cSDarrick J. Wong /* 7666d8b79cfSDave Chinner * The inode lookup is done in batches to keep the amount of lock traffic and 7676d8b79cfSDave Chinner * radix tree lookups to a minimum. The batch size is a trade off between 7686d8b79cfSDave Chinner * lookup reduction and stack usage. This is in the reclaim path, so we can't 7696d8b79cfSDave Chinner * be too greedy. 770df600197SDarrick J. Wong * 771c1115c0cSDarrick J. Wong * XXX: This will be moved closer to xfs_icwalk* once we get rid of the 772df600197SDarrick J. Wong * separate reclaim walk functions. 7736d8b79cfSDave Chinner */ 7746d8b79cfSDave Chinner #define XFS_LOOKUP_BATCH 32 7756d8b79cfSDave Chinner 7761ad2cfe0SDarrick J. Wong #ifdef CONFIG_XFS_QUOTA 777b9baaef4SDarrick J. Wong /* Decide if we want to grab this inode to drop its dquots. */ 778b9baaef4SDarrick J. Wong static bool 779b9baaef4SDarrick J. Wong xfs_dqrele_igrab( 780b9baaef4SDarrick J. Wong struct xfs_inode *ip) 781b9baaef4SDarrick J. Wong { 782b9baaef4SDarrick J. Wong bool ret = false; 783b9baaef4SDarrick J. Wong 784b9baaef4SDarrick J. Wong ASSERT(rcu_read_lock_held()); 785b9baaef4SDarrick J. Wong 786b9baaef4SDarrick J. Wong /* Check for stale RCU freed inode */ 787b9baaef4SDarrick J. Wong spin_lock(&ip->i_flags_lock); 788b9baaef4SDarrick J. Wong if (!ip->i_ino) 789b9baaef4SDarrick J. Wong goto out_unlock; 790b9baaef4SDarrick J. Wong 791b9baaef4SDarrick J. Wong /* 792b9baaef4SDarrick J. Wong * Skip inodes that are anywhere in the reclaim machinery because we 793b9baaef4SDarrick J. Wong * drop dquots before tagging an inode for reclamation. 794b9baaef4SDarrick J. Wong */ 795b9baaef4SDarrick J. Wong if (ip->i_flags & (XFS_IRECLAIM | XFS_IRECLAIMABLE)) 796b9baaef4SDarrick J. Wong goto out_unlock; 797b9baaef4SDarrick J. Wong 798b9baaef4SDarrick J. Wong /* 799b9baaef4SDarrick J. Wong * The inode looks alive; try to grab a VFS reference so that it won't 800b9baaef4SDarrick J. Wong * get destroyed. If we got the reference, return true to say that 801b9baaef4SDarrick J. Wong * we grabbed the inode. 802b9baaef4SDarrick J. Wong * 803b9baaef4SDarrick J. Wong * If we can't get the reference, then we know the inode had its VFS 804b9baaef4SDarrick J. Wong * state torn down and hasn't yet entered the reclaim machinery. Since 805b9baaef4SDarrick J. Wong * we also know that dquots are detached from an inode before it enters 806b9baaef4SDarrick J. Wong * reclaim, we can skip the inode. 807b9baaef4SDarrick J. Wong */ 808b9baaef4SDarrick J. Wong ret = igrab(VFS_I(ip)) != NULL; 809b9baaef4SDarrick J. Wong 810b9baaef4SDarrick J. Wong out_unlock: 811b9baaef4SDarrick J. Wong spin_unlock(&ip->i_flags_lock); 812b9baaef4SDarrick J. Wong return ret; 813b9baaef4SDarrick J. Wong } 814b9baaef4SDarrick J. Wong 8151ad2cfe0SDarrick J. Wong /* Drop this inode's dquots. */ 8161ad2cfe0SDarrick J. Wong static int 8171ad2cfe0SDarrick J. Wong xfs_dqrele_inode( 8181ad2cfe0SDarrick J. Wong struct xfs_inode *ip, 8191ad2cfe0SDarrick J. Wong void *priv) 8201ad2cfe0SDarrick J. Wong { 8211ad2cfe0SDarrick J. Wong struct xfs_eofblocks *eofb = priv; 8221ad2cfe0SDarrick J. Wong 8239d2793ceSDarrick J. Wong if (xfs_iflags_test(ip, XFS_INEW)) 8249d2793ceSDarrick J. Wong xfs_inew_wait(ip); 8259d2793ceSDarrick J. Wong 8261ad2cfe0SDarrick J. Wong xfs_ilock(ip, XFS_ILOCK_EXCL); 8271ad2cfe0SDarrick J. Wong if (eofb->eof_flags & XFS_ICWALK_FLAG_DROP_UDQUOT) { 8281ad2cfe0SDarrick J. Wong xfs_qm_dqrele(ip->i_udquot); 8291ad2cfe0SDarrick J. Wong ip->i_udquot = NULL; 8301ad2cfe0SDarrick J. Wong } 8311ad2cfe0SDarrick J. Wong if (eofb->eof_flags & XFS_ICWALK_FLAG_DROP_GDQUOT) { 8321ad2cfe0SDarrick J. Wong xfs_qm_dqrele(ip->i_gdquot); 8331ad2cfe0SDarrick J. Wong ip->i_gdquot = NULL; 8341ad2cfe0SDarrick J. Wong } 8351ad2cfe0SDarrick J. Wong if (eofb->eof_flags & XFS_ICWALK_FLAG_DROP_PDQUOT) { 8361ad2cfe0SDarrick J. Wong xfs_qm_dqrele(ip->i_pdquot); 8371ad2cfe0SDarrick J. Wong ip->i_pdquot = NULL; 8381ad2cfe0SDarrick J. Wong } 8391ad2cfe0SDarrick J. Wong xfs_iunlock(ip, XFS_ILOCK_EXCL); 8401ad2cfe0SDarrick J. Wong return 0; 8411ad2cfe0SDarrick J. Wong } 8421ad2cfe0SDarrick J. Wong 8431ad2cfe0SDarrick J. Wong /* 8441ad2cfe0SDarrick J. Wong * Detach all dquots from incore inodes if we can. The caller must already 8451ad2cfe0SDarrick J. Wong * have dropped the relevant XFS_[UGP]QUOTA_ACTIVE flags so that dquots will 8461ad2cfe0SDarrick J. Wong * not get reattached. 8471ad2cfe0SDarrick J. Wong */ 8481ad2cfe0SDarrick J. Wong int 8491ad2cfe0SDarrick J. Wong xfs_dqrele_all_inodes( 8501ad2cfe0SDarrick J. Wong struct xfs_mount *mp, 8511ad2cfe0SDarrick J. Wong unsigned int qflags) 8521ad2cfe0SDarrick J. Wong { 8531ad2cfe0SDarrick J. Wong struct xfs_eofblocks eofb = { .eof_flags = 0 }; 8541ad2cfe0SDarrick J. Wong 8551ad2cfe0SDarrick J. Wong if (qflags & XFS_UQUOTA_ACCT) 8561ad2cfe0SDarrick J. Wong eofb.eof_flags |= XFS_ICWALK_FLAG_DROP_UDQUOT; 8571ad2cfe0SDarrick J. Wong if (qflags & XFS_GQUOTA_ACCT) 8581ad2cfe0SDarrick J. Wong eofb.eof_flags |= XFS_ICWALK_FLAG_DROP_GDQUOT; 8591ad2cfe0SDarrick J. Wong if (qflags & XFS_PQUOTA_ACCT) 8601ad2cfe0SDarrick J. Wong eofb.eof_flags |= XFS_ICWALK_FLAG_DROP_PDQUOT; 8611ad2cfe0SDarrick J. Wong 862*7fdff526SDarrick J. Wong return xfs_icwalk(mp, xfs_dqrele_inode, &eofb, XFS_ICWALK_DQRELE); 8631ad2cfe0SDarrick J. Wong } 864b9baaef4SDarrick J. Wong #else 865b9baaef4SDarrick J. Wong # define xfs_dqrele_igrab(ip) (false) 8661ad2cfe0SDarrick J. Wong #endif /* CONFIG_XFS_QUOTA */ 8671ad2cfe0SDarrick J. Wong 868579b62faSBrian Foster /* 8696d8b79cfSDave Chinner * Grab the inode for reclaim exclusively. 87050718b8dSDave Chinner * 87150718b8dSDave Chinner * We have found this inode via a lookup under RCU, so the inode may have 87250718b8dSDave Chinner * already been freed, or it may be in the process of being recycled by 87350718b8dSDave Chinner * xfs_iget(). In both cases, the inode will have XFS_IRECLAIM set. If the inode 87450718b8dSDave Chinner * has been fully recycled by the time we get the i_flags_lock, XFS_IRECLAIMABLE 87550718b8dSDave Chinner * will not be set. Hence we need to check for both these flag conditions to 87650718b8dSDave Chinner * avoid inodes that are no longer reclaim candidates. 87750718b8dSDave Chinner * 87850718b8dSDave Chinner * Note: checking for other state flags here, under the i_flags_lock or not, is 87950718b8dSDave Chinner * racy and should be avoided. Those races should be resolved only after we have 88050718b8dSDave Chinner * ensured that we are able to reclaim this inode and the world can see that we 88150718b8dSDave Chinner * are going to reclaim it. 88250718b8dSDave Chinner * 88350718b8dSDave Chinner * Return true if we grabbed it, false otherwise. 8846d8b79cfSDave Chinner */ 88550718b8dSDave Chinner static bool 8866d8b79cfSDave Chinner xfs_reclaim_inode_grab( 88750718b8dSDave Chinner struct xfs_inode *ip) 8886d8b79cfSDave Chinner { 8896d8b79cfSDave Chinner ASSERT(rcu_read_lock_held()); 8906d8b79cfSDave Chinner 8916d8b79cfSDave Chinner spin_lock(&ip->i_flags_lock); 8926d8b79cfSDave Chinner if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) || 8936d8b79cfSDave Chinner __xfs_iflags_test(ip, XFS_IRECLAIM)) { 8946d8b79cfSDave Chinner /* not a reclaim candidate. */ 8956d8b79cfSDave Chinner spin_unlock(&ip->i_flags_lock); 89650718b8dSDave Chinner return false; 8976d8b79cfSDave Chinner } 8986d8b79cfSDave Chinner __xfs_iflags_set(ip, XFS_IRECLAIM); 8996d8b79cfSDave Chinner spin_unlock(&ip->i_flags_lock); 90050718b8dSDave Chinner return true; 9016d8b79cfSDave Chinner } 9026d8b79cfSDave Chinner 9036d8b79cfSDave Chinner /* 90402511a5aSDave Chinner * Inode reclaim is non-blocking, so the default action if progress cannot be 90502511a5aSDave Chinner * made is to "requeue" the inode for reclaim by unlocking it and clearing the 90602511a5aSDave Chinner * XFS_IRECLAIM flag. If we are in a shutdown state, we don't care about 90702511a5aSDave Chinner * blocking anymore and hence we can wait for the inode to be able to reclaim 90802511a5aSDave Chinner * it. 9096d8b79cfSDave Chinner * 91002511a5aSDave Chinner * We do no IO here - if callers require inodes to be cleaned they must push the 91102511a5aSDave Chinner * AIL first to trigger writeback of dirty inodes. This enables writeback to be 91202511a5aSDave Chinner * done in the background in a non-blocking manner, and enables memory reclaim 91302511a5aSDave Chinner * to make progress without blocking. 9146d8b79cfSDave Chinner */ 9154d0bab3aSDave Chinner static void 9166d8b79cfSDave Chinner xfs_reclaim_inode( 9176d8b79cfSDave Chinner struct xfs_inode *ip, 91850718b8dSDave Chinner struct xfs_perag *pag) 9196d8b79cfSDave Chinner { 9208a17d7ddSDave Chinner xfs_ino_t ino = ip->i_ino; /* for radix_tree_delete */ 9216d8b79cfSDave Chinner 9229552e14dSDave Chinner if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) 9236d8b79cfSDave Chinner goto out; 924718ecc50SDave Chinner if (xfs_iflags_test_and_set(ip, XFS_IFLUSHING)) 9259552e14dSDave Chinner goto out_iunlock; 9266d8b79cfSDave Chinner 9276d8b79cfSDave Chinner if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 9286d8b79cfSDave Chinner xfs_iunpin_wait(ip); 92988fc1879SBrian Foster xfs_iflush_abort(ip); 9306d8b79cfSDave Chinner goto reclaim; 9316d8b79cfSDave Chinner } 932617825feSDave Chinner if (xfs_ipincount(ip)) 933718ecc50SDave Chinner goto out_clear_flush; 934617825feSDave Chinner if (!xfs_inode_clean(ip)) 935718ecc50SDave Chinner goto out_clear_flush; 936617825feSDave Chinner 937718ecc50SDave Chinner xfs_iflags_clear(ip, XFS_IFLUSHING); 9386d8b79cfSDave Chinner reclaim: 93998efe8afSBrian Foster 9408a17d7ddSDave Chinner /* 9418a17d7ddSDave Chinner * Because we use RCU freeing we need to ensure the inode always appears 9428a17d7ddSDave Chinner * to be reclaimed with an invalid inode number when in the free state. 94398efe8afSBrian Foster * We do this as early as possible under the ILOCK so that 944f2e9ad21SOmar Sandoval * xfs_iflush_cluster() and xfs_ifree_cluster() can be guaranteed to 945f2e9ad21SOmar Sandoval * detect races with us here. By doing this, we guarantee that once 946f2e9ad21SOmar Sandoval * xfs_iflush_cluster() or xfs_ifree_cluster() has locked XFS_ILOCK that 947f2e9ad21SOmar Sandoval * it will see either a valid inode that will serialise correctly, or it 948f2e9ad21SOmar Sandoval * will see an invalid inode that it can skip. 9498a17d7ddSDave Chinner */ 9508a17d7ddSDave Chinner spin_lock(&ip->i_flags_lock); 9518a17d7ddSDave Chinner ip->i_flags = XFS_IRECLAIM; 9528a17d7ddSDave Chinner ip->i_ino = 0; 9538a17d7ddSDave Chinner spin_unlock(&ip->i_flags_lock); 9548a17d7ddSDave Chinner 9556d8b79cfSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 9566d8b79cfSDave Chinner 957ff6d6af2SBill O'Donnell XFS_STATS_INC(ip->i_mount, xs_ig_reclaims); 9586d8b79cfSDave Chinner /* 9596d8b79cfSDave Chinner * Remove the inode from the per-AG radix tree. 9606d8b79cfSDave Chinner * 9616d8b79cfSDave Chinner * Because radix_tree_delete won't complain even if the item was never 9626d8b79cfSDave Chinner * added to the tree assert that it's been there before to catch 9636d8b79cfSDave Chinner * problems with the inode life time early on. 9646d8b79cfSDave Chinner */ 9656d8b79cfSDave Chinner spin_lock(&pag->pag_ici_lock); 9666d8b79cfSDave Chinner if (!radix_tree_delete(&pag->pag_ici_root, 9678a17d7ddSDave Chinner XFS_INO_TO_AGINO(ip->i_mount, ino))) 9686d8b79cfSDave Chinner ASSERT(0); 969545c0889SDave Chinner xfs_perag_clear_reclaim_tag(pag); 9706d8b79cfSDave Chinner spin_unlock(&pag->pag_ici_lock); 9716d8b79cfSDave Chinner 9726d8b79cfSDave Chinner /* 9736d8b79cfSDave Chinner * Here we do an (almost) spurious inode lock in order to coordinate 9746d8b79cfSDave Chinner * with inode cache radix tree lookups. This is because the lookup 9756d8b79cfSDave Chinner * can reference the inodes in the cache without taking references. 9766d8b79cfSDave Chinner * 9776d8b79cfSDave Chinner * We make that OK here by ensuring that we wait until the inode is 9786d8b79cfSDave Chinner * unlocked after the lookup before we go ahead and free it. 9796d8b79cfSDave Chinner */ 9806d8b79cfSDave Chinner xfs_ilock(ip, XFS_ILOCK_EXCL); 9813ea06d73SDarrick J. Wong ASSERT(!ip->i_udquot && !ip->i_gdquot && !ip->i_pdquot); 9826d8b79cfSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 98396355d5aSDave Chinner ASSERT(xfs_inode_clean(ip)); 9846d8b79cfSDave Chinner 9858a17d7ddSDave Chinner __xfs_inode_free(ip); 9864d0bab3aSDave Chinner return; 9876d8b79cfSDave Chinner 988718ecc50SDave Chinner out_clear_flush: 989718ecc50SDave Chinner xfs_iflags_clear(ip, XFS_IFLUSHING); 9909552e14dSDave Chinner out_iunlock: 9916d8b79cfSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 9929552e14dSDave Chinner out: 993617825feSDave Chinner xfs_iflags_clear(ip, XFS_IRECLAIM); 9946d8b79cfSDave Chinner } 9956d8b79cfSDave Chinner 9966d8b79cfSDave Chinner /* 9976d8b79cfSDave Chinner * Walk the AGs and reclaim the inodes in them. Even if the filesystem is 9986d8b79cfSDave Chinner * corrupted, we still want to try to reclaim all the inodes. If we don't, 9996d8b79cfSDave Chinner * then a shut down during filesystem unmount reclaim walk leak all the 10006d8b79cfSDave Chinner * unreclaimed inodes. 1001617825feSDave Chinner * 1002617825feSDave Chinner * Returns non-zero if any AGs or inodes were skipped in the reclaim pass 1003617825feSDave Chinner * so that callers that want to block until all dirty inodes are written back 1004617825feSDave Chinner * and reclaimed can sanely loop. 10056d8b79cfSDave Chinner */ 10064d0bab3aSDave Chinner static void 10076d8b79cfSDave Chinner xfs_reclaim_inodes_ag( 10086d8b79cfSDave Chinner struct xfs_mount *mp, 10096d8b79cfSDave Chinner int *nr_to_scan) 10106d8b79cfSDave Chinner { 10116d8b79cfSDave Chinner struct xfs_perag *pag; 10120e8e2c63SDave Chinner xfs_agnumber_t ag = 0; 10136d8b79cfSDave Chinner 10146d8b79cfSDave Chinner while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { 10156d8b79cfSDave Chinner unsigned long first_index = 0; 10166d8b79cfSDave Chinner int done = 0; 10176d8b79cfSDave Chinner int nr_found = 0; 10186d8b79cfSDave Chinner 10196d8b79cfSDave Chinner ag = pag->pag_agno + 1; 10206d8b79cfSDave Chinner 10210e8e2c63SDave Chinner first_index = READ_ONCE(pag->pag_ici_reclaim_cursor); 10226d8b79cfSDave Chinner do { 10236d8b79cfSDave Chinner struct xfs_inode *batch[XFS_LOOKUP_BATCH]; 10246d8b79cfSDave Chinner int i; 10256d8b79cfSDave Chinner 10266d8b79cfSDave Chinner rcu_read_lock(); 10276d8b79cfSDave Chinner nr_found = radix_tree_gang_lookup_tag( 10286d8b79cfSDave Chinner &pag->pag_ici_root, 10296d8b79cfSDave Chinner (void **)batch, first_index, 10306d8b79cfSDave Chinner XFS_LOOKUP_BATCH, 10316d8b79cfSDave Chinner XFS_ICI_RECLAIM_TAG); 10326d8b79cfSDave Chinner if (!nr_found) { 10336d8b79cfSDave Chinner done = 1; 10346d8b79cfSDave Chinner rcu_read_unlock(); 10356d8b79cfSDave Chinner break; 10366d8b79cfSDave Chinner } 10376d8b79cfSDave Chinner 10386d8b79cfSDave Chinner /* 10396d8b79cfSDave Chinner * Grab the inodes before we drop the lock. if we found 10406d8b79cfSDave Chinner * nothing, nr == 0 and the loop will be skipped. 10416d8b79cfSDave Chinner */ 10426d8b79cfSDave Chinner for (i = 0; i < nr_found; i++) { 10436d8b79cfSDave Chinner struct xfs_inode *ip = batch[i]; 10446d8b79cfSDave Chinner 104550718b8dSDave Chinner if (done || !xfs_reclaim_inode_grab(ip)) 10466d8b79cfSDave Chinner batch[i] = NULL; 10476d8b79cfSDave Chinner 10486d8b79cfSDave Chinner /* 10496d8b79cfSDave Chinner * Update the index for the next lookup. Catch 10506d8b79cfSDave Chinner * overflows into the next AG range which can 10516d8b79cfSDave Chinner * occur if we have inodes in the last block of 10526d8b79cfSDave Chinner * the AG and we are currently pointing to the 10536d8b79cfSDave Chinner * last inode. 10546d8b79cfSDave Chinner * 10556d8b79cfSDave Chinner * Because we may see inodes that are from the 10566d8b79cfSDave Chinner * wrong AG due to RCU freeing and 10576d8b79cfSDave Chinner * reallocation, only update the index if it 10586d8b79cfSDave Chinner * lies in this AG. It was a race that lead us 10596d8b79cfSDave Chinner * to see this inode, so another lookup from 10606d8b79cfSDave Chinner * the same index will not find it again. 10616d8b79cfSDave Chinner */ 10626d8b79cfSDave Chinner if (XFS_INO_TO_AGNO(mp, ip->i_ino) != 10636d8b79cfSDave Chinner pag->pag_agno) 10646d8b79cfSDave Chinner continue; 10656d8b79cfSDave Chinner first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 10666d8b79cfSDave Chinner if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 10676d8b79cfSDave Chinner done = 1; 10686d8b79cfSDave Chinner } 10696d8b79cfSDave Chinner 10706d8b79cfSDave Chinner /* unlock now we've grabbed the inodes. */ 10716d8b79cfSDave Chinner rcu_read_unlock(); 10726d8b79cfSDave Chinner 10736d8b79cfSDave Chinner for (i = 0; i < nr_found; i++) { 10744d0bab3aSDave Chinner if (batch[i]) 10754d0bab3aSDave Chinner xfs_reclaim_inode(batch[i], pag); 10766d8b79cfSDave Chinner } 10776d8b79cfSDave Chinner 10786d8b79cfSDave Chinner *nr_to_scan -= XFS_LOOKUP_BATCH; 10796d8b79cfSDave Chinner cond_resched(); 10806d8b79cfSDave Chinner } while (nr_found && !done && *nr_to_scan > 0); 10816d8b79cfSDave Chinner 10820e8e2c63SDave Chinner if (done) 10830e8e2c63SDave Chinner first_index = 0; 10840e8e2c63SDave Chinner WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index); 10856d8b79cfSDave Chinner xfs_perag_put(pag); 10866d8b79cfSDave Chinner } 10876d8b79cfSDave Chinner } 10886d8b79cfSDave Chinner 10894d0bab3aSDave Chinner void 10906d8b79cfSDave Chinner xfs_reclaim_inodes( 10914d0bab3aSDave Chinner struct xfs_mount *mp) 10926d8b79cfSDave Chinner { 10936d8b79cfSDave Chinner int nr_to_scan = INT_MAX; 10946d8b79cfSDave Chinner 10954d0bab3aSDave Chinner while (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { 1096617825feSDave Chinner xfs_ail_push_all_sync(mp->m_ail); 10974d0bab3aSDave Chinner xfs_reclaim_inodes_ag(mp, &nr_to_scan); 10980f4ec0f1SZheng Bin } 10996d8b79cfSDave Chinner } 11006d8b79cfSDave Chinner 11016d8b79cfSDave Chinner /* 110202511a5aSDave Chinner * The shrinker infrastructure determines how many inodes we should scan for 110302511a5aSDave Chinner * reclaim. We want as many clean inodes ready to reclaim as possible, so we 110402511a5aSDave Chinner * push the AIL here. We also want to proactively free up memory if we can to 110502511a5aSDave Chinner * minimise the amount of work memory reclaim has to do so we kick the 110602511a5aSDave Chinner * background reclaim if it isn't already scheduled. 11076d8b79cfSDave Chinner */ 11080a234c6dSDave Chinner long 11096d8b79cfSDave Chinner xfs_reclaim_inodes_nr( 11106d8b79cfSDave Chinner struct xfs_mount *mp, 11116d8b79cfSDave Chinner int nr_to_scan) 11126d8b79cfSDave Chinner { 11136d8b79cfSDave Chinner /* kick background reclaimer and push the AIL */ 11146d8b79cfSDave Chinner xfs_reclaim_work_queue(mp); 11156d8b79cfSDave Chinner xfs_ail_push_all(mp->m_ail); 11166d8b79cfSDave Chinner 111750718b8dSDave Chinner xfs_reclaim_inodes_ag(mp, &nr_to_scan); 1118617825feSDave Chinner return 0; 11196d8b79cfSDave Chinner } 11206d8b79cfSDave Chinner 11216d8b79cfSDave Chinner /* 11226d8b79cfSDave Chinner * Return the number of reclaimable inodes in the filesystem for 11236d8b79cfSDave Chinner * the shrinker to determine how much to reclaim. 11246d8b79cfSDave Chinner */ 11256d8b79cfSDave Chinner int 11266d8b79cfSDave Chinner xfs_reclaim_inodes_count( 11276d8b79cfSDave Chinner struct xfs_mount *mp) 11286d8b79cfSDave Chinner { 11296d8b79cfSDave Chinner struct xfs_perag *pag; 11306d8b79cfSDave Chinner xfs_agnumber_t ag = 0; 11316d8b79cfSDave Chinner int reclaimable = 0; 11326d8b79cfSDave Chinner 11336d8b79cfSDave Chinner while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { 11346d8b79cfSDave Chinner ag = pag->pag_agno + 1; 11356d8b79cfSDave Chinner reclaimable += pag->pag_ici_reclaimable; 11366d8b79cfSDave Chinner xfs_perag_put(pag); 11376d8b79cfSDave Chinner } 11386d8b79cfSDave Chinner return reclaimable; 11396d8b79cfSDave Chinner } 11406d8b79cfSDave Chinner 114139b1cfd7SDarrick J. Wong STATIC bool 11423e3f9f58SBrian Foster xfs_inode_match_id( 11433e3f9f58SBrian Foster struct xfs_inode *ip, 11443e3f9f58SBrian Foster struct xfs_eofblocks *eofb) 11453e3f9f58SBrian Foster { 1146b9fe5052SDwight Engen if ((eofb->eof_flags & XFS_EOF_FLAGS_UID) && 1147b9fe5052SDwight Engen !uid_eq(VFS_I(ip)->i_uid, eofb->eof_uid)) 114839b1cfd7SDarrick J. Wong return false; 11491b556048SBrian Foster 1150b9fe5052SDwight Engen if ((eofb->eof_flags & XFS_EOF_FLAGS_GID) && 1151b9fe5052SDwight Engen !gid_eq(VFS_I(ip)->i_gid, eofb->eof_gid)) 115239b1cfd7SDarrick J. Wong return false; 11531b556048SBrian Foster 1154b9fe5052SDwight Engen if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) && 1155ceaf603cSChristoph Hellwig ip->i_projid != eofb->eof_prid) 115639b1cfd7SDarrick J. Wong return false; 11571b556048SBrian Foster 115839b1cfd7SDarrick J. Wong return true; 11593e3f9f58SBrian Foster } 11603e3f9f58SBrian Foster 1161f4526397SBrian Foster /* 1162f4526397SBrian Foster * A union-based inode filtering algorithm. Process the inode if any of the 1163f4526397SBrian Foster * criteria match. This is for global/internal scans only. 1164f4526397SBrian Foster */ 116539b1cfd7SDarrick J. Wong STATIC bool 1166f4526397SBrian Foster xfs_inode_match_id_union( 1167f4526397SBrian Foster struct xfs_inode *ip, 1168f4526397SBrian Foster struct xfs_eofblocks *eofb) 1169f4526397SBrian Foster { 1170f4526397SBrian Foster if ((eofb->eof_flags & XFS_EOF_FLAGS_UID) && 1171f4526397SBrian Foster uid_eq(VFS_I(ip)->i_uid, eofb->eof_uid)) 117239b1cfd7SDarrick J. Wong return true; 1173f4526397SBrian Foster 1174f4526397SBrian Foster if ((eofb->eof_flags & XFS_EOF_FLAGS_GID) && 1175f4526397SBrian Foster gid_eq(VFS_I(ip)->i_gid, eofb->eof_gid)) 117639b1cfd7SDarrick J. Wong return true; 1177f4526397SBrian Foster 1178f4526397SBrian Foster if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) && 1179ceaf603cSChristoph Hellwig ip->i_projid == eofb->eof_prid) 118039b1cfd7SDarrick J. Wong return true; 1181f4526397SBrian Foster 118239b1cfd7SDarrick J. Wong return false; 1183f4526397SBrian Foster } 1184f4526397SBrian Foster 1185a91bf992SDarrick J. Wong /* 1186a91bf992SDarrick J. Wong * Is this inode @ip eligible for eof/cow block reclamation, given some 1187a91bf992SDarrick J. Wong * filtering parameters @eofb? The inode is eligible if @eofb is null or 1188a91bf992SDarrick J. Wong * if the predicate functions match. 1189a91bf992SDarrick J. Wong */ 1190a91bf992SDarrick J. Wong static bool 1191a91bf992SDarrick J. Wong xfs_inode_matches_eofb( 1192a91bf992SDarrick J. Wong struct xfs_inode *ip, 1193a91bf992SDarrick J. Wong struct xfs_eofblocks *eofb) 1194a91bf992SDarrick J. Wong { 119539b1cfd7SDarrick J. Wong bool match; 1196a91bf992SDarrick J. Wong 1197a91bf992SDarrick J. Wong if (!eofb) 1198a91bf992SDarrick J. Wong return true; 1199a91bf992SDarrick J. Wong 1200a91bf992SDarrick J. Wong if (eofb->eof_flags & XFS_EOF_FLAGS_UNION) 1201a91bf992SDarrick J. Wong match = xfs_inode_match_id_union(ip, eofb); 1202a91bf992SDarrick J. Wong else 1203a91bf992SDarrick J. Wong match = xfs_inode_match_id(ip, eofb); 1204a91bf992SDarrick J. Wong if (!match) 1205a91bf992SDarrick J. Wong return false; 1206a91bf992SDarrick J. Wong 1207a91bf992SDarrick J. Wong /* skip the inode if the file size is too small */ 1208a91bf992SDarrick J. Wong if ((eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE) && 1209a91bf992SDarrick J. Wong XFS_ISIZE(ip) < eofb->eof_min_file_size) 1210a91bf992SDarrick J. Wong return false; 1211a91bf992SDarrick J. Wong 1212a91bf992SDarrick J. Wong return true; 1213a91bf992SDarrick J. Wong } 1214a91bf992SDarrick J. Wong 12154d0bab3aSDave Chinner /* 12164d0bab3aSDave Chinner * This is a fast pass over the inode cache to try to get reclaim moving on as 12174d0bab3aSDave Chinner * many inodes as possible in a short period of time. It kicks itself every few 12184d0bab3aSDave Chinner * seconds, as well as being kicked by the inode cache shrinker when memory 121902511a5aSDave Chinner * goes low. 12204d0bab3aSDave Chinner */ 12214d0bab3aSDave Chinner void 12224d0bab3aSDave Chinner xfs_reclaim_worker( 12234d0bab3aSDave Chinner struct work_struct *work) 12244d0bab3aSDave Chinner { 12254d0bab3aSDave Chinner struct xfs_mount *mp = container_of(to_delayed_work(work), 12264d0bab3aSDave Chinner struct xfs_mount, m_reclaim_work); 12274d0bab3aSDave Chinner int nr_to_scan = INT_MAX; 12284d0bab3aSDave Chinner 12294d0bab3aSDave Chinner xfs_reclaim_inodes_ag(mp, &nr_to_scan); 12304d0bab3aSDave Chinner xfs_reclaim_work_queue(mp); 12314d0bab3aSDave Chinner } 12324d0bab3aSDave Chinner 12333e3f9f58SBrian Foster STATIC int 123441176a68SBrian Foster xfs_inode_free_eofblocks( 123541176a68SBrian Foster struct xfs_inode *ip, 12360fa4a10aSDarrick J. Wong void *args, 12370fa4a10aSDarrick J. Wong unsigned int *lockflags) 123841176a68SBrian Foster { 12393e3f9f58SBrian Foster struct xfs_eofblocks *eofb = args; 1240390600f8SDarrick J. Wong bool wait; 1241390600f8SDarrick J. Wong 1242390600f8SDarrick J. Wong wait = eofb && (eofb->eof_flags & XFS_EOF_FLAGS_SYNC); 12435400da7dSBrian Foster 1244ce2d3bbeSDarrick J. Wong if (!xfs_iflags_test(ip, XFS_IEOFBLOCKS)) 1245ce2d3bbeSDarrick J. Wong return 0; 1246ce2d3bbeSDarrick J. Wong 124741176a68SBrian Foster /* 124841176a68SBrian Foster * If the mapping is dirty the operation can block and wait for some 124941176a68SBrian Foster * time. Unless we are waiting, skip it. 125041176a68SBrian Foster */ 1251390600f8SDarrick J. Wong if (!wait && mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY)) 125241176a68SBrian Foster return 0; 125341176a68SBrian Foster 1254a91bf992SDarrick J. Wong if (!xfs_inode_matches_eofb(ip, eofb)) 12553e3f9f58SBrian Foster return 0; 12563e3f9f58SBrian Foster 1257a36b9261SBrian Foster /* 1258a36b9261SBrian Foster * If the caller is waiting, return -EAGAIN to keep the background 1259a36b9261SBrian Foster * scanner moving and revisit the inode in a subsequent pass. 1260a36b9261SBrian Foster */ 1261c3155097SBrian Foster if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { 1262390600f8SDarrick J. Wong if (wait) 1263390600f8SDarrick J. Wong return -EAGAIN; 1264390600f8SDarrick J. Wong return 0; 1265a36b9261SBrian Foster } 12660fa4a10aSDarrick J. Wong *lockflags |= XFS_IOLOCK_EXCL; 1267390600f8SDarrick J. Wong 12682b156ff8SDarrick J. Wong if (xfs_can_free_eofblocks(ip, false)) 12690fa4a10aSDarrick J. Wong return xfs_free_eofblocks(ip); 12702b156ff8SDarrick J. Wong 12712b156ff8SDarrick J. Wong /* inode could be preallocated or append-only */ 12722b156ff8SDarrick J. Wong trace_xfs_inode_free_eofblocks_invalid(ip); 12732b156ff8SDarrick J. Wong xfs_inode_clear_eofblocks_tag(ip); 12742b156ff8SDarrick J. Wong return 0; 127541176a68SBrian Foster } 127641176a68SBrian Foster 1277f9296569SDarrick J. Wong /* 12789669f51dSDarrick J. Wong * Background scanning to trim preallocated space. This is queued based on the 12799669f51dSDarrick J. Wong * 'speculative_prealloc_lifetime' tunable (5m by default). 1280f9296569SDarrick J. Wong */ 12819669f51dSDarrick J. Wong static inline void 12829669f51dSDarrick J. Wong xfs_blockgc_queue( 1283894ecacfSDarrick J. Wong struct xfs_perag *pag) 1284f9296569SDarrick J. Wong { 1285f9296569SDarrick J. Wong rcu_read_lock(); 1286894ecacfSDarrick J. Wong if (radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_BLOCKGC_TAG)) 12873fef46fcSDarrick J. Wong queue_delayed_work(pag->pag_mount->m_gc_workqueue, 1288894ecacfSDarrick J. Wong &pag->pag_blockgc_work, 12899669f51dSDarrick J. Wong msecs_to_jiffies(xfs_blockgc_secs * 1000)); 1290f9296569SDarrick J. Wong rcu_read_unlock(); 1291f9296569SDarrick J. Wong } 1292f9296569SDarrick J. Wong 129383104d44SDarrick J. Wong static void 1294ce2d3bbeSDarrick J. Wong xfs_blockgc_set_iflag( 1295ce2d3bbeSDarrick J. Wong struct xfs_inode *ip, 1296ce2d3bbeSDarrick J. Wong unsigned long iflag) 129727b52867SBrian Foster { 129827b52867SBrian Foster struct xfs_mount *mp = ip->i_mount; 129927b52867SBrian Foster struct xfs_perag *pag; 130027b52867SBrian Foster int tagged; 130127b52867SBrian Foster 1302ce2d3bbeSDarrick J. Wong ASSERT((iflag & ~(XFS_IEOFBLOCKS | XFS_ICOWBLOCKS)) == 0); 1303ce2d3bbeSDarrick J. Wong 130485a6e764SChristoph Hellwig /* 130585a6e764SChristoph Hellwig * Don't bother locking the AG and looking up in the radix trees 130685a6e764SChristoph Hellwig * if we already know that we have the tag set. 130785a6e764SChristoph Hellwig */ 1308ce2d3bbeSDarrick J. Wong if (ip->i_flags & iflag) 130985a6e764SChristoph Hellwig return; 131085a6e764SChristoph Hellwig spin_lock(&ip->i_flags_lock); 1311ce2d3bbeSDarrick J. Wong ip->i_flags |= iflag; 131285a6e764SChristoph Hellwig spin_unlock(&ip->i_flags_lock); 131385a6e764SChristoph Hellwig 131427b52867SBrian Foster pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 131527b52867SBrian Foster spin_lock(&pag->pag_ici_lock); 131627b52867SBrian Foster 1317ce2d3bbeSDarrick J. Wong tagged = radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_BLOCKGC_TAG); 131827b52867SBrian Foster radix_tree_tag_set(&pag->pag_ici_root, 1319ce2d3bbeSDarrick J. Wong XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), 1320ce2d3bbeSDarrick J. Wong XFS_ICI_BLOCKGC_TAG); 132127b52867SBrian Foster if (!tagged) { 1322ce2d3bbeSDarrick J. Wong /* propagate the blockgc tag up into the perag radix tree */ 132327b52867SBrian Foster spin_lock(&ip->i_mount->m_perag_lock); 132427b52867SBrian Foster radix_tree_tag_set(&ip->i_mount->m_perag_tree, 132527b52867SBrian Foster XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), 1326ce2d3bbeSDarrick J. Wong XFS_ICI_BLOCKGC_TAG); 132727b52867SBrian Foster spin_unlock(&ip->i_mount->m_perag_lock); 132827b52867SBrian Foster 1329579b62faSBrian Foster /* kick off background trimming */ 1330894ecacfSDarrick J. Wong xfs_blockgc_queue(pag); 1331579b62faSBrian Foster 1332ce2d3bbeSDarrick J. Wong trace_xfs_perag_set_blockgc(ip->i_mount, pag->pag_agno, -1, 1333ce2d3bbeSDarrick J. Wong _RET_IP_); 133427b52867SBrian Foster } 133527b52867SBrian Foster 133627b52867SBrian Foster spin_unlock(&pag->pag_ici_lock); 133727b52867SBrian Foster xfs_perag_put(pag); 133827b52867SBrian Foster } 133927b52867SBrian Foster 134027b52867SBrian Foster void 134183104d44SDarrick J. Wong xfs_inode_set_eofblocks_tag( 134227b52867SBrian Foster xfs_inode_t *ip) 134327b52867SBrian Foster { 134483104d44SDarrick J. Wong trace_xfs_inode_set_eofblocks_tag(ip); 13459669f51dSDarrick J. Wong return xfs_blockgc_set_iflag(ip, XFS_IEOFBLOCKS); 134683104d44SDarrick J. Wong } 134783104d44SDarrick J. Wong 134883104d44SDarrick J. Wong static void 1349ce2d3bbeSDarrick J. Wong xfs_blockgc_clear_iflag( 1350ce2d3bbeSDarrick J. Wong struct xfs_inode *ip, 1351ce2d3bbeSDarrick J. Wong unsigned long iflag) 135283104d44SDarrick J. Wong { 135327b52867SBrian Foster struct xfs_mount *mp = ip->i_mount; 135427b52867SBrian Foster struct xfs_perag *pag; 1355ce2d3bbeSDarrick J. Wong bool clear_tag; 1356ce2d3bbeSDarrick J. Wong 1357ce2d3bbeSDarrick J. Wong ASSERT((iflag & ~(XFS_IEOFBLOCKS | XFS_ICOWBLOCKS)) == 0); 135827b52867SBrian Foster 135985a6e764SChristoph Hellwig spin_lock(&ip->i_flags_lock); 1360ce2d3bbeSDarrick J. Wong ip->i_flags &= ~iflag; 1361ce2d3bbeSDarrick J. Wong clear_tag = (ip->i_flags & (XFS_IEOFBLOCKS | XFS_ICOWBLOCKS)) == 0; 136285a6e764SChristoph Hellwig spin_unlock(&ip->i_flags_lock); 136385a6e764SChristoph Hellwig 1364ce2d3bbeSDarrick J. Wong if (!clear_tag) 1365ce2d3bbeSDarrick J. Wong return; 1366ce2d3bbeSDarrick J. Wong 136727b52867SBrian Foster pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 136827b52867SBrian Foster spin_lock(&pag->pag_ici_lock); 136927b52867SBrian Foster 137027b52867SBrian Foster radix_tree_tag_clear(&pag->pag_ici_root, 1371ce2d3bbeSDarrick J. Wong XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), 1372ce2d3bbeSDarrick J. Wong XFS_ICI_BLOCKGC_TAG); 1373ce2d3bbeSDarrick J. Wong if (!radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_BLOCKGC_TAG)) { 1374ce2d3bbeSDarrick J. Wong /* clear the blockgc tag from the perag radix tree */ 137527b52867SBrian Foster spin_lock(&ip->i_mount->m_perag_lock); 137627b52867SBrian Foster radix_tree_tag_clear(&ip->i_mount->m_perag_tree, 137727b52867SBrian Foster XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), 1378ce2d3bbeSDarrick J. Wong XFS_ICI_BLOCKGC_TAG); 137927b52867SBrian Foster spin_unlock(&ip->i_mount->m_perag_lock); 1380ce2d3bbeSDarrick J. Wong trace_xfs_perag_clear_blockgc(ip->i_mount, pag->pag_agno, -1, 1381ce2d3bbeSDarrick J. Wong _RET_IP_); 138227b52867SBrian Foster } 138327b52867SBrian Foster 138427b52867SBrian Foster spin_unlock(&pag->pag_ici_lock); 138527b52867SBrian Foster xfs_perag_put(pag); 138627b52867SBrian Foster } 138727b52867SBrian Foster 138883104d44SDarrick J. Wong void 138983104d44SDarrick J. Wong xfs_inode_clear_eofblocks_tag( 139083104d44SDarrick J. Wong xfs_inode_t *ip) 139183104d44SDarrick J. Wong { 139283104d44SDarrick J. Wong trace_xfs_inode_clear_eofblocks_tag(ip); 1393ce2d3bbeSDarrick J. Wong return xfs_blockgc_clear_iflag(ip, XFS_IEOFBLOCKS); 139483104d44SDarrick J. Wong } 139583104d44SDarrick J. Wong 139683104d44SDarrick J. Wong /* 1397be78ff0eSDarrick J. Wong * Set ourselves up to free CoW blocks from this file. If it's already clean 1398be78ff0eSDarrick J. Wong * then we can bail out quickly, but otherwise we must back off if the file 1399be78ff0eSDarrick J. Wong * is undergoing some kind of write. 1400be78ff0eSDarrick J. Wong */ 1401be78ff0eSDarrick J. Wong static bool 1402be78ff0eSDarrick J. Wong xfs_prep_free_cowblocks( 140351d62690SChristoph Hellwig struct xfs_inode *ip) 1404be78ff0eSDarrick J. Wong { 1405be78ff0eSDarrick J. Wong /* 1406be78ff0eSDarrick J. Wong * Just clear the tag if we have an empty cow fork or none at all. It's 1407be78ff0eSDarrick J. Wong * possible the inode was fully unshared since it was originally tagged. 1408be78ff0eSDarrick J. Wong */ 140951d62690SChristoph Hellwig if (!xfs_inode_has_cow_data(ip)) { 1410be78ff0eSDarrick J. Wong trace_xfs_inode_free_cowblocks_invalid(ip); 1411be78ff0eSDarrick J. Wong xfs_inode_clear_cowblocks_tag(ip); 1412be78ff0eSDarrick J. Wong return false; 1413be78ff0eSDarrick J. Wong } 1414be78ff0eSDarrick J. Wong 1415be78ff0eSDarrick J. Wong /* 1416be78ff0eSDarrick J. Wong * If the mapping is dirty or under writeback we cannot touch the 1417be78ff0eSDarrick J. Wong * CoW fork. Leave it alone if we're in the midst of a directio. 1418be78ff0eSDarrick J. Wong */ 1419be78ff0eSDarrick J. Wong if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) || 1420be78ff0eSDarrick J. Wong mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) || 1421be78ff0eSDarrick J. Wong mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) || 1422be78ff0eSDarrick J. Wong atomic_read(&VFS_I(ip)->i_dio_count)) 1423be78ff0eSDarrick J. Wong return false; 1424be78ff0eSDarrick J. Wong 1425be78ff0eSDarrick J. Wong return true; 1426be78ff0eSDarrick J. Wong } 1427be78ff0eSDarrick J. Wong 1428be78ff0eSDarrick J. Wong /* 142983104d44SDarrick J. Wong * Automatic CoW Reservation Freeing 143083104d44SDarrick J. Wong * 143183104d44SDarrick J. Wong * These functions automatically garbage collect leftover CoW reservations 143283104d44SDarrick J. Wong * that were made on behalf of a cowextsize hint when we start to run out 143383104d44SDarrick J. Wong * of quota or when the reservations sit around for too long. If the file 143483104d44SDarrick J. Wong * has dirty pages or is undergoing writeback, its CoW reservations will 143583104d44SDarrick J. Wong * be retained. 143683104d44SDarrick J. Wong * 143783104d44SDarrick J. Wong * The actual garbage collection piggybacks off the same code that runs 143883104d44SDarrick J. Wong * the speculative EOF preallocation garbage collector. 143983104d44SDarrick J. Wong */ 144083104d44SDarrick J. Wong STATIC int 144183104d44SDarrick J. Wong xfs_inode_free_cowblocks( 144283104d44SDarrick J. Wong struct xfs_inode *ip, 14430fa4a10aSDarrick J. Wong void *args, 14440fa4a10aSDarrick J. Wong unsigned int *lockflags) 144583104d44SDarrick J. Wong { 144683104d44SDarrick J. Wong struct xfs_eofblocks *eofb = args; 1447f41a0716SDarrick J. Wong bool wait; 1448be78ff0eSDarrick J. Wong int ret = 0; 144983104d44SDarrick J. Wong 1450f41a0716SDarrick J. Wong wait = eofb && (eofb->eof_flags & XFS_EOF_FLAGS_SYNC); 1451f41a0716SDarrick J. Wong 1452ce2d3bbeSDarrick J. Wong if (!xfs_iflags_test(ip, XFS_ICOWBLOCKS)) 1453ce2d3bbeSDarrick J. Wong return 0; 1454ce2d3bbeSDarrick J. Wong 145551d62690SChristoph Hellwig if (!xfs_prep_free_cowblocks(ip)) 145683104d44SDarrick J. Wong return 0; 145783104d44SDarrick J. Wong 1458a91bf992SDarrick J. Wong if (!xfs_inode_matches_eofb(ip, eofb)) 145983104d44SDarrick J. Wong return 0; 146083104d44SDarrick J. Wong 1461f41a0716SDarrick J. Wong /* 1462f41a0716SDarrick J. Wong * If the caller is waiting, return -EAGAIN to keep the background 1463f41a0716SDarrick J. Wong * scanner moving and revisit the inode in a subsequent pass. 1464f41a0716SDarrick J. Wong */ 14650fa4a10aSDarrick J. Wong if (!(*lockflags & XFS_IOLOCK_EXCL) && 14660fa4a10aSDarrick J. Wong !xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { 1467f41a0716SDarrick J. Wong if (wait) 1468f41a0716SDarrick J. Wong return -EAGAIN; 1469f41a0716SDarrick J. Wong return 0; 1470f41a0716SDarrick J. Wong } 14710fa4a10aSDarrick J. Wong *lockflags |= XFS_IOLOCK_EXCL; 14720fa4a10aSDarrick J. Wong 1473f41a0716SDarrick J. Wong if (!xfs_ilock_nowait(ip, XFS_MMAPLOCK_EXCL)) { 1474f41a0716SDarrick J. Wong if (wait) 14750fa4a10aSDarrick J. Wong return -EAGAIN; 14760fa4a10aSDarrick J. Wong return 0; 1477f41a0716SDarrick J. Wong } 14780fa4a10aSDarrick J. Wong *lockflags |= XFS_MMAPLOCK_EXCL; 147983104d44SDarrick J. Wong 1480be78ff0eSDarrick J. Wong /* 1481be78ff0eSDarrick J. Wong * Check again, nobody else should be able to dirty blocks or change 1482be78ff0eSDarrick J. Wong * the reflink iflag now that we have the first two locks held. 1483be78ff0eSDarrick J. Wong */ 148451d62690SChristoph Hellwig if (xfs_prep_free_cowblocks(ip)) 14853802a345SChristoph Hellwig ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); 148683104d44SDarrick J. Wong return ret; 148783104d44SDarrick J. Wong } 148883104d44SDarrick J. Wong 148983104d44SDarrick J. Wong void 149083104d44SDarrick J. Wong xfs_inode_set_cowblocks_tag( 149183104d44SDarrick J. Wong xfs_inode_t *ip) 149283104d44SDarrick J. Wong { 14937b7381f0SBrian Foster trace_xfs_inode_set_cowblocks_tag(ip); 14949669f51dSDarrick J. Wong return xfs_blockgc_set_iflag(ip, XFS_ICOWBLOCKS); 149583104d44SDarrick J. Wong } 149683104d44SDarrick J. Wong 149783104d44SDarrick J. Wong void 149883104d44SDarrick J. Wong xfs_inode_clear_cowblocks_tag( 149983104d44SDarrick J. Wong xfs_inode_t *ip) 150083104d44SDarrick J. Wong { 15017b7381f0SBrian Foster trace_xfs_inode_clear_cowblocks_tag(ip); 1502ce2d3bbeSDarrick J. Wong return xfs_blockgc_clear_iflag(ip, XFS_ICOWBLOCKS); 150383104d44SDarrick J. Wong } 1504d6b636ebSDarrick J. Wong 1505894ecacfSDarrick J. Wong #define for_each_perag_tag(mp, next_agno, pag, tag) \ 1506894ecacfSDarrick J. Wong for ((next_agno) = 0, (pag) = xfs_perag_get_tag((mp), 0, (tag)); \ 1507894ecacfSDarrick J. Wong (pag) != NULL; \ 1508894ecacfSDarrick J. Wong (next_agno) = (pag)->pag_agno + 1, \ 1509894ecacfSDarrick J. Wong xfs_perag_put(pag), \ 1510894ecacfSDarrick J. Wong (pag) = xfs_perag_get_tag((mp), (next_agno), (tag))) 1511894ecacfSDarrick J. Wong 1512894ecacfSDarrick J. Wong 1513d6b636ebSDarrick J. Wong /* Disable post-EOF and CoW block auto-reclamation. */ 1514d6b636ebSDarrick J. Wong void 1515c9a6526fSDarrick J. Wong xfs_blockgc_stop( 1516d6b636ebSDarrick J. Wong struct xfs_mount *mp) 1517d6b636ebSDarrick J. Wong { 1518894ecacfSDarrick J. Wong struct xfs_perag *pag; 1519894ecacfSDarrick J. Wong xfs_agnumber_t agno; 1520894ecacfSDarrick J. Wong 1521894ecacfSDarrick J. Wong for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG) 1522894ecacfSDarrick J. Wong cancel_delayed_work_sync(&pag->pag_blockgc_work); 1523d6b636ebSDarrick J. Wong } 1524d6b636ebSDarrick J. Wong 1525d6b636ebSDarrick J. Wong /* Enable post-EOF and CoW block auto-reclamation. */ 1526d6b636ebSDarrick J. Wong void 1527c9a6526fSDarrick J. Wong xfs_blockgc_start( 1528d6b636ebSDarrick J. Wong struct xfs_mount *mp) 1529d6b636ebSDarrick J. Wong { 1530894ecacfSDarrick J. Wong struct xfs_perag *pag; 1531894ecacfSDarrick J. Wong xfs_agnumber_t agno; 1532894ecacfSDarrick J. Wong 1533894ecacfSDarrick J. Wong for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG) 1534894ecacfSDarrick J. Wong xfs_blockgc_queue(pag); 1535d6b636ebSDarrick J. Wong } 15363d4feec0SDarrick J. Wong 1537df600197SDarrick J. Wong /* 1538b9baaef4SDarrick J. Wong * Decide if the given @ip is eligible for garbage collection of speculative 1539b9baaef4SDarrick J. Wong * preallocations, and grab it if so. Returns true if it's ready to go or 1540b9baaef4SDarrick J. Wong * false if we should just ignore it. 1541df600197SDarrick J. Wong */ 1542df600197SDarrick J. Wong static bool 1543b9baaef4SDarrick J. Wong xfs_blockgc_igrab( 1544*7fdff526SDarrick J. Wong struct xfs_inode *ip) 1545df600197SDarrick J. Wong { 1546df600197SDarrick J. Wong struct inode *inode = VFS_I(ip); 1547df600197SDarrick J. Wong 1548df600197SDarrick J. Wong ASSERT(rcu_read_lock_held()); 1549df600197SDarrick J. Wong 1550df600197SDarrick J. Wong /* Check for stale RCU freed inode */ 1551df600197SDarrick J. Wong spin_lock(&ip->i_flags_lock); 1552df600197SDarrick J. Wong if (!ip->i_ino) 1553df600197SDarrick J. Wong goto out_unlock_noent; 1554df600197SDarrick J. Wong 1555df600197SDarrick J. Wong /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ 1556*7fdff526SDarrick J. Wong if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) 1557df600197SDarrick J. Wong goto out_unlock_noent; 1558df600197SDarrick J. Wong spin_unlock(&ip->i_flags_lock); 1559df600197SDarrick J. Wong 1560df600197SDarrick J. Wong /* nothing to sync during shutdown */ 1561df600197SDarrick J. Wong if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 1562df600197SDarrick J. Wong return false; 1563df600197SDarrick J. Wong 1564df600197SDarrick J. Wong /* If we can't grab the inode, it must on it's way to reclaim. */ 1565df600197SDarrick J. Wong if (!igrab(inode)) 1566df600197SDarrick J. Wong return false; 1567df600197SDarrick J. Wong 1568df600197SDarrick J. Wong /* inode is valid */ 1569df600197SDarrick J. Wong return true; 1570df600197SDarrick J. Wong 1571df600197SDarrick J. Wong out_unlock_noent: 1572df600197SDarrick J. Wong spin_unlock(&ip->i_flags_lock); 1573df600197SDarrick J. Wong return false; 1574df600197SDarrick J. Wong } 1575df600197SDarrick J. Wong 157641956753SDarrick J. Wong /* Scan one incore inode for block preallocations that we can remove. */ 157741956753SDarrick J. Wong static int 157841956753SDarrick J. Wong xfs_blockgc_scan_inode( 157941956753SDarrick J. Wong struct xfs_inode *ip, 158041956753SDarrick J. Wong void *args) 158185c5b270SDarrick J. Wong { 15820fa4a10aSDarrick J. Wong unsigned int lockflags = 0; 158385c5b270SDarrick J. Wong int error; 158485c5b270SDarrick J. Wong 15850fa4a10aSDarrick J. Wong error = xfs_inode_free_eofblocks(ip, args, &lockflags); 158685c5b270SDarrick J. Wong if (error) 15870fa4a10aSDarrick J. Wong goto unlock; 158885c5b270SDarrick J. Wong 15890fa4a10aSDarrick J. Wong error = xfs_inode_free_cowblocks(ip, args, &lockflags); 15900fa4a10aSDarrick J. Wong unlock: 15910fa4a10aSDarrick J. Wong if (lockflags) 15920fa4a10aSDarrick J. Wong xfs_iunlock(ip, lockflags); 159385c5b270SDarrick J. Wong return error; 159485c5b270SDarrick J. Wong } 159585c5b270SDarrick J. Wong 15969669f51dSDarrick J. Wong /* Background worker that trims preallocated space. */ 15979669f51dSDarrick J. Wong void 15989669f51dSDarrick J. Wong xfs_blockgc_worker( 15999669f51dSDarrick J. Wong struct work_struct *work) 16009669f51dSDarrick J. Wong { 1601894ecacfSDarrick J. Wong struct xfs_perag *pag = container_of(to_delayed_work(work), 1602894ecacfSDarrick J. Wong struct xfs_perag, pag_blockgc_work); 1603894ecacfSDarrick J. Wong struct xfs_mount *mp = pag->pag_mount; 16049669f51dSDarrick J. Wong int error; 16059669f51dSDarrick J. Wong 16069669f51dSDarrick J. Wong if (!sb_start_write_trylock(mp->m_super)) 16079669f51dSDarrick J. Wong return; 1608*7fdff526SDarrick J. Wong error = xfs_icwalk_ag(pag, xfs_blockgc_scan_inode, NULL, 1609c809d7e9SDarrick J. Wong XFS_ICWALK_BLOCKGC); 16109669f51dSDarrick J. Wong if (error) 1611894ecacfSDarrick J. Wong xfs_info(mp, "AG %u preallocation gc worker failed, err=%d", 1612894ecacfSDarrick J. Wong pag->pag_agno, error); 16139669f51dSDarrick J. Wong sb_end_write(mp->m_super); 1614894ecacfSDarrick J. Wong xfs_blockgc_queue(pag); 16159669f51dSDarrick J. Wong } 16169669f51dSDarrick J. Wong 161785c5b270SDarrick J. Wong /* 161885c5b270SDarrick J. Wong * Try to free space in the filesystem by purging eofblocks and cowblocks. 161985c5b270SDarrick J. Wong */ 162085c5b270SDarrick J. Wong int 162185c5b270SDarrick J. Wong xfs_blockgc_free_space( 162285c5b270SDarrick J. Wong struct xfs_mount *mp, 162385c5b270SDarrick J. Wong struct xfs_eofblocks *eofb) 162485c5b270SDarrick J. Wong { 162585c5b270SDarrick J. Wong trace_xfs_blockgc_free_space(mp, eofb, _RET_IP_); 162685c5b270SDarrick J. Wong 1627*7fdff526SDarrick J. Wong return xfs_icwalk(mp, xfs_blockgc_scan_inode, eofb, 1628c809d7e9SDarrick J. Wong XFS_ICWALK_BLOCKGC); 162985c5b270SDarrick J. Wong } 163085c5b270SDarrick J. Wong 16313d4feec0SDarrick J. Wong /* 1632c237dd7cSDarrick J. Wong * Run cow/eofblocks scans on the supplied dquots. We don't know exactly which 1633c237dd7cSDarrick J. Wong * quota caused an allocation failure, so we make a best effort by including 1634c237dd7cSDarrick J. Wong * each quota under low free space conditions (less than 1% free space) in the 1635c237dd7cSDarrick J. Wong * scan. 1636111068f8SDarrick J. Wong * 1637111068f8SDarrick J. Wong * Callers must not hold any inode's ILOCK. If requesting a synchronous scan 1638111068f8SDarrick J. Wong * (XFS_EOF_FLAGS_SYNC), the caller also must not hold any inode's IOLOCK or 1639111068f8SDarrick J. Wong * MMAPLOCK. 16403d4feec0SDarrick J. Wong */ 1641111068f8SDarrick J. Wong int 1642c237dd7cSDarrick J. Wong xfs_blockgc_free_dquots( 1643c237dd7cSDarrick J. Wong struct xfs_mount *mp, 1644c237dd7cSDarrick J. Wong struct xfs_dquot *udqp, 1645c237dd7cSDarrick J. Wong struct xfs_dquot *gdqp, 1646c237dd7cSDarrick J. Wong struct xfs_dquot *pdqp, 1647111068f8SDarrick J. Wong unsigned int eof_flags) 16483d4feec0SDarrick J. Wong { 16493d4feec0SDarrick J. Wong struct xfs_eofblocks eofb = {0}; 16503d4feec0SDarrick J. Wong bool do_work = false; 16513d4feec0SDarrick J. Wong 1652c237dd7cSDarrick J. Wong if (!udqp && !gdqp && !pdqp) 1653c237dd7cSDarrick J. Wong return 0; 1654c237dd7cSDarrick J. Wong 16553d4feec0SDarrick J. Wong /* 1656111068f8SDarrick J. Wong * Run a scan to free blocks using the union filter to cover all 1657111068f8SDarrick J. Wong * applicable quotas in a single scan. 16583d4feec0SDarrick J. Wong */ 1659111068f8SDarrick J. Wong eofb.eof_flags = XFS_EOF_FLAGS_UNION | eof_flags; 16603d4feec0SDarrick J. Wong 1661c237dd7cSDarrick J. Wong if (XFS_IS_UQUOTA_ENFORCED(mp) && udqp && xfs_dquot_lowsp(udqp)) { 1662c237dd7cSDarrick J. Wong eofb.eof_uid = make_kuid(mp->m_super->s_user_ns, udqp->q_id); 16633d4feec0SDarrick J. Wong eofb.eof_flags |= XFS_EOF_FLAGS_UID; 16643d4feec0SDarrick J. Wong do_work = true; 16653d4feec0SDarrick J. Wong } 16663d4feec0SDarrick J. Wong 1667c237dd7cSDarrick J. Wong if (XFS_IS_UQUOTA_ENFORCED(mp) && gdqp && xfs_dquot_lowsp(gdqp)) { 1668c237dd7cSDarrick J. Wong eofb.eof_gid = make_kgid(mp->m_super->s_user_ns, gdqp->q_id); 16693d4feec0SDarrick J. Wong eofb.eof_flags |= XFS_EOF_FLAGS_GID; 16703d4feec0SDarrick J. Wong do_work = true; 16713d4feec0SDarrick J. Wong } 16723d4feec0SDarrick J. Wong 1673c237dd7cSDarrick J. Wong if (XFS_IS_PQUOTA_ENFORCED(mp) && pdqp && xfs_dquot_lowsp(pdqp)) { 1674c237dd7cSDarrick J. Wong eofb.eof_prid = pdqp->q_id; 16753d4feec0SDarrick J. Wong eofb.eof_flags |= XFS_EOF_FLAGS_PRID; 16763d4feec0SDarrick J. Wong do_work = true; 16773d4feec0SDarrick J. Wong } 16783d4feec0SDarrick J. Wong 16793d4feec0SDarrick J. Wong if (!do_work) 1680111068f8SDarrick J. Wong return 0; 16813d4feec0SDarrick J. Wong 168285c5b270SDarrick J. Wong return xfs_blockgc_free_space(mp, &eofb); 1683c237dd7cSDarrick J. Wong } 1684c237dd7cSDarrick J. Wong 1685c237dd7cSDarrick J. Wong /* Run cow/eofblocks scans on the quotas attached to the inode. */ 1686c237dd7cSDarrick J. Wong int 1687c237dd7cSDarrick J. Wong xfs_blockgc_free_quota( 1688c237dd7cSDarrick J. Wong struct xfs_inode *ip, 1689c237dd7cSDarrick J. Wong unsigned int eof_flags) 1690c237dd7cSDarrick J. Wong { 1691c237dd7cSDarrick J. Wong return xfs_blockgc_free_dquots(ip->i_mount, 1692c237dd7cSDarrick J. Wong xfs_inode_dquot(ip, XFS_DQTYPE_USER), 1693c237dd7cSDarrick J. Wong xfs_inode_dquot(ip, XFS_DQTYPE_GROUP), 1694c237dd7cSDarrick J. Wong xfs_inode_dquot(ip, XFS_DQTYPE_PROJ), eof_flags); 16953d4feec0SDarrick J. Wong } 1696df600197SDarrick J. Wong 1697df600197SDarrick J. Wong /* XFS Inode Cache Walking Code */ 1698df600197SDarrick J. Wong 1699df600197SDarrick J. Wong /* 1700b9baaef4SDarrick J. Wong * Decide if we want to grab this inode in anticipation of doing work towards 1701b9baaef4SDarrick J. Wong * the goal. If selected, the VFS must hold a reference to this inode, which 1702b9baaef4SDarrick J. Wong * will be released after processing. 1703b9baaef4SDarrick J. Wong */ 1704b9baaef4SDarrick J. Wong static inline bool 1705b9baaef4SDarrick J. Wong xfs_icwalk_igrab( 1706b9baaef4SDarrick J. Wong enum xfs_icwalk_goal goal, 1707*7fdff526SDarrick J. Wong struct xfs_inode *ip) 1708b9baaef4SDarrick J. Wong { 1709b9baaef4SDarrick J. Wong switch (goal) { 1710b9baaef4SDarrick J. Wong case XFS_ICWALK_DQRELE: 1711b9baaef4SDarrick J. Wong return xfs_dqrele_igrab(ip); 1712b9baaef4SDarrick J. Wong case XFS_ICWALK_BLOCKGC: 1713*7fdff526SDarrick J. Wong return xfs_blockgc_igrab(ip); 1714b9baaef4SDarrick J. Wong default: 1715b9baaef4SDarrick J. Wong return false; 1716b9baaef4SDarrick J. Wong } 1717b9baaef4SDarrick J. Wong } 1718b9baaef4SDarrick J. Wong 1719b9baaef4SDarrick J. Wong /* 1720df600197SDarrick J. Wong * For a given per-AG structure @pag, grab, @execute, and rele all incore 1721df600197SDarrick J. Wong * inodes with the given radix tree @tag. 1722df600197SDarrick J. Wong */ 1723df600197SDarrick J. Wong static int 1724c1115c0cSDarrick J. Wong xfs_icwalk_ag( 1725df600197SDarrick J. Wong struct xfs_perag *pag, 1726df600197SDarrick J. Wong int (*execute)(struct xfs_inode *ip, void *args), 1727df600197SDarrick J. Wong void *args, 1728c809d7e9SDarrick J. Wong enum xfs_icwalk_goal goal) 1729df600197SDarrick J. Wong { 1730df600197SDarrick J. Wong struct xfs_mount *mp = pag->pag_mount; 1731df600197SDarrick J. Wong uint32_t first_index; 1732df600197SDarrick J. Wong int last_error = 0; 1733df600197SDarrick J. Wong int skipped; 1734df600197SDarrick J. Wong bool done; 1735df600197SDarrick J. Wong int nr_found; 1736df600197SDarrick J. Wong 1737df600197SDarrick J. Wong restart: 1738df600197SDarrick J. Wong done = false; 1739df600197SDarrick J. Wong skipped = 0; 1740df600197SDarrick J. Wong first_index = 0; 1741df600197SDarrick J. Wong nr_found = 0; 1742df600197SDarrick J. Wong do { 1743df600197SDarrick J. Wong struct xfs_inode *batch[XFS_LOOKUP_BATCH]; 1744c809d7e9SDarrick J. Wong unsigned int tag = xfs_icwalk_tag(goal); 1745df600197SDarrick J. Wong int error = 0; 1746df600197SDarrick J. Wong int i; 1747df600197SDarrick J. Wong 1748df600197SDarrick J. Wong rcu_read_lock(); 1749df600197SDarrick J. Wong 1750c809d7e9SDarrick J. Wong if (tag == XFS_ICWALK_NULL_TAG) 1751df600197SDarrick J. Wong nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, 1752df600197SDarrick J. Wong (void **)batch, first_index, 1753df600197SDarrick J. Wong XFS_LOOKUP_BATCH); 1754df600197SDarrick J. Wong else 1755df600197SDarrick J. Wong nr_found = radix_tree_gang_lookup_tag( 1756df600197SDarrick J. Wong &pag->pag_ici_root, 1757df600197SDarrick J. Wong (void **) batch, first_index, 1758df600197SDarrick J. Wong XFS_LOOKUP_BATCH, tag); 1759df600197SDarrick J. Wong 1760df600197SDarrick J. Wong if (!nr_found) { 1761df600197SDarrick J. Wong rcu_read_unlock(); 1762df600197SDarrick J. Wong break; 1763df600197SDarrick J. Wong } 1764df600197SDarrick J. Wong 1765df600197SDarrick J. Wong /* 1766df600197SDarrick J. Wong * Grab the inodes before we drop the lock. if we found 1767df600197SDarrick J. Wong * nothing, nr == 0 and the loop will be skipped. 1768df600197SDarrick J. Wong */ 1769df600197SDarrick J. Wong for (i = 0; i < nr_found; i++) { 1770df600197SDarrick J. Wong struct xfs_inode *ip = batch[i]; 1771df600197SDarrick J. Wong 1772*7fdff526SDarrick J. Wong if (done || !xfs_icwalk_igrab(goal, ip)) 1773df600197SDarrick J. Wong batch[i] = NULL; 1774df600197SDarrick J. Wong 1775df600197SDarrick J. Wong /* 1776df600197SDarrick J. Wong * Update the index for the next lookup. Catch 1777df600197SDarrick J. Wong * overflows into the next AG range which can occur if 1778df600197SDarrick J. Wong * we have inodes in the last block of the AG and we 1779df600197SDarrick J. Wong * are currently pointing to the last inode. 1780df600197SDarrick J. Wong * 1781df600197SDarrick J. Wong * Because we may see inodes that are from the wrong AG 1782df600197SDarrick J. Wong * due to RCU freeing and reallocation, only update the 1783df600197SDarrick J. Wong * index if it lies in this AG. It was a race that lead 1784df600197SDarrick J. Wong * us to see this inode, so another lookup from the 1785df600197SDarrick J. Wong * same index will not find it again. 1786df600197SDarrick J. Wong */ 1787df600197SDarrick J. Wong if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno) 1788df600197SDarrick J. Wong continue; 1789df600197SDarrick J. Wong first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 1790df600197SDarrick J. Wong if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 1791df600197SDarrick J. Wong done = true; 1792df600197SDarrick J. Wong } 1793df600197SDarrick J. Wong 1794df600197SDarrick J. Wong /* unlock now we've grabbed the inodes. */ 1795df600197SDarrick J. Wong rcu_read_unlock(); 1796df600197SDarrick J. Wong 1797df600197SDarrick J. Wong for (i = 0; i < nr_found; i++) { 1798df600197SDarrick J. Wong if (!batch[i]) 1799df600197SDarrick J. Wong continue; 1800df600197SDarrick J. Wong error = execute(batch[i], args); 1801df600197SDarrick J. Wong xfs_irele(batch[i]); 1802df600197SDarrick J. Wong if (error == -EAGAIN) { 1803df600197SDarrick J. Wong skipped++; 1804df600197SDarrick J. Wong continue; 1805df600197SDarrick J. Wong } 1806df600197SDarrick J. Wong if (error && last_error != -EFSCORRUPTED) 1807df600197SDarrick J. Wong last_error = error; 1808df600197SDarrick J. Wong } 1809df600197SDarrick J. Wong 1810df600197SDarrick J. Wong /* bail out if the filesystem is corrupted. */ 1811df600197SDarrick J. Wong if (error == -EFSCORRUPTED) 1812df600197SDarrick J. Wong break; 1813df600197SDarrick J. Wong 1814df600197SDarrick J. Wong cond_resched(); 1815df600197SDarrick J. Wong 1816df600197SDarrick J. Wong } while (nr_found && !done); 1817df600197SDarrick J. Wong 1818df600197SDarrick J. Wong if (skipped) { 1819df600197SDarrick J. Wong delay(1); 1820df600197SDarrick J. Wong goto restart; 1821df600197SDarrick J. Wong } 1822df600197SDarrick J. Wong return last_error; 1823df600197SDarrick J. Wong } 1824df600197SDarrick J. Wong 1825df600197SDarrick J. Wong /* Fetch the next (possibly tagged) per-AG structure. */ 1826df600197SDarrick J. Wong static inline struct xfs_perag * 1827c1115c0cSDarrick J. Wong xfs_icwalk_get_perag( 1828df600197SDarrick J. Wong struct xfs_mount *mp, 1829df600197SDarrick J. Wong xfs_agnumber_t agno, 1830c809d7e9SDarrick J. Wong enum xfs_icwalk_goal goal) 1831df600197SDarrick J. Wong { 1832c809d7e9SDarrick J. Wong unsigned int tag = xfs_icwalk_tag(goal); 1833c809d7e9SDarrick J. Wong 1834c809d7e9SDarrick J. Wong if (tag == XFS_ICWALK_NULL_TAG) 1835df600197SDarrick J. Wong return xfs_perag_get(mp, agno); 1836df600197SDarrick J. Wong return xfs_perag_get_tag(mp, agno, tag); 1837df600197SDarrick J. Wong } 1838df600197SDarrick J. Wong 1839df600197SDarrick J. Wong /* 1840df600197SDarrick J. Wong * Call the @execute function on all incore inodes matching the radix tree 1841df600197SDarrick J. Wong * @tag. 1842df600197SDarrick J. Wong */ 1843df600197SDarrick J. Wong static int 1844c1115c0cSDarrick J. Wong xfs_icwalk( 1845df600197SDarrick J. Wong struct xfs_mount *mp, 1846df600197SDarrick J. Wong int (*execute)(struct xfs_inode *ip, void *args), 1847df600197SDarrick J. Wong void *args, 1848c809d7e9SDarrick J. Wong enum xfs_icwalk_goal goal) 1849df600197SDarrick J. Wong { 1850df600197SDarrick J. Wong struct xfs_perag *pag; 1851df600197SDarrick J. Wong int error = 0; 1852df600197SDarrick J. Wong int last_error = 0; 1853df600197SDarrick J. Wong xfs_agnumber_t agno = 0; 1854df600197SDarrick J. Wong 1855c809d7e9SDarrick J. Wong while ((pag = xfs_icwalk_get_perag(mp, agno, goal))) { 1856df600197SDarrick J. Wong agno = pag->pag_agno + 1; 1857*7fdff526SDarrick J. Wong error = xfs_icwalk_ag(pag, execute, args, goal); 1858df600197SDarrick J. Wong xfs_perag_put(pag); 1859df600197SDarrick J. Wong if (error) { 1860df600197SDarrick J. Wong last_error = error; 1861df600197SDarrick J. Wong if (error == -EFSCORRUPTED) 1862df600197SDarrick J. Wong break; 1863df600197SDarrick J. Wong } 1864df600197SDarrick J. Wong } 1865df600197SDarrick J. Wong return last_error; 1866df600197SDarrick J. Wong BUILD_BUG_ON(XFS_ICWALK_PRIVATE_FLAGS & XFS_EOF_FLAGS_VALID); 1867df600197SDarrick J. Wong } 1868