10b61f8a4SDave Chinner // SPDX-License-Identifier: GPL-2.0 26d8b79cfSDave Chinner /* 36d8b79cfSDave Chinner * Copyright (c) 2000-2005 Silicon Graphics, Inc. 46d8b79cfSDave Chinner * All Rights Reserved. 56d8b79cfSDave Chinner */ 66d8b79cfSDave Chinner #include "xfs.h" 76d8b79cfSDave Chinner #include "xfs_fs.h" 85467b34bSDarrick J. Wong #include "xfs_shared.h" 96ca1c906SDave Chinner #include "xfs_format.h" 10239880efSDave Chinner #include "xfs_log_format.h" 11239880efSDave Chinner #include "xfs_trans_resv.h" 126d8b79cfSDave Chinner #include "xfs_mount.h" 136d8b79cfSDave Chinner #include "xfs_inode.h" 14239880efSDave Chinner #include "xfs_trans.h" 15239880efSDave Chinner #include "xfs_trans_priv.h" 166d8b79cfSDave Chinner #include "xfs_inode_item.h" 176d8b79cfSDave Chinner #include "xfs_quota.h" 186d8b79cfSDave Chinner #include "xfs_trace.h" 196d8b79cfSDave Chinner #include "xfs_icache.h" 20c24b5dfaSDave Chinner #include "xfs_bmap_util.h" 21dc06f398SBrian Foster #include "xfs_dquot_item.h" 22dc06f398SBrian Foster #include "xfs_dquot.h" 2383104d44SDarrick J. Wong #include "xfs_reflink.h" 24bb8a66afSChristoph Hellwig #include "xfs_ialloc.h" 259bbafc71SDave Chinner #include "xfs_ag.h" 2601728b44SDave Chinner #include "xfs_log_priv.h" 276d8b79cfSDave Chinner 28f0e28280SJeff Layton #include <linux/iversion.h> 296d8b79cfSDave Chinner 30c809d7e9SDarrick J. Wong /* Radix tree tags for incore inode tree. */ 31c809d7e9SDarrick J. Wong 32c809d7e9SDarrick J. Wong /* inode is to be reclaimed */ 33c809d7e9SDarrick J. Wong #define XFS_ICI_RECLAIM_TAG 0 34c809d7e9SDarrick J. Wong /* Inode has speculative preallocations (posteof or cow) to clean. */ 35c809d7e9SDarrick J. Wong #define XFS_ICI_BLOCKGC_TAG 1 36c809d7e9SDarrick J. Wong 37c809d7e9SDarrick J. Wong /* 38c809d7e9SDarrick J. Wong * The goal for walking incore inodes. These can correspond with incore inode 39c809d7e9SDarrick J. Wong * radix tree tags when convenient. Avoid existing XFS_IWALK namespace. 40c809d7e9SDarrick J. Wong */ 41c809d7e9SDarrick J. Wong enum xfs_icwalk_goal { 42c809d7e9SDarrick J. Wong /* Goals directly associated with tagged inodes. */ 43c809d7e9SDarrick J. Wong XFS_ICWALK_BLOCKGC = XFS_ICI_BLOCKGC_TAG, 44f1bc5c56SDarrick J. Wong XFS_ICWALK_RECLAIM = XFS_ICI_RECLAIM_TAG, 45c809d7e9SDarrick J. Wong }; 46c809d7e9SDarrick J. Wong 477fdff526SDarrick J. Wong static int xfs_icwalk(struct xfs_mount *mp, 48b26b2bf1SDarrick J. Wong enum xfs_icwalk_goal goal, struct xfs_icwalk *icw); 497fdff526SDarrick J. Wong static int xfs_icwalk_ag(struct xfs_perag *pag, 50b26b2bf1SDarrick J. Wong enum xfs_icwalk_goal goal, struct xfs_icwalk *icw); 51df600197SDarrick J. Wong 5233479e05SDave Chinner /* 53b26b2bf1SDarrick J. Wong * Private inode cache walk flags for struct xfs_icwalk. Must not 54b26b2bf1SDarrick J. Wong * coincide with XFS_ICWALK_FLAGS_VALID. 551ad2cfe0SDarrick J. Wong */ 561ad2cfe0SDarrick J. Wong 57f1bc5c56SDarrick J. Wong /* Stop scanning after icw_scan_limit inodes. */ 58f1bc5c56SDarrick J. Wong #define XFS_ICWALK_FLAG_SCAN_LIMIT (1U << 28) 59f1bc5c56SDarrick J. Wong 609492750aSDarrick J. Wong #define XFS_ICWALK_FLAG_RECLAIM_SICK (1U << 27) 612d53f66bSDarrick J. Wong #define XFS_ICWALK_FLAG_UNION (1U << 26) /* union filter algorithm */ 629492750aSDarrick J. Wong 63777eb1faSChristoph Hellwig #define XFS_ICWALK_PRIVATE_FLAGS (XFS_ICWALK_FLAG_SCAN_LIMIT | \ 642d53f66bSDarrick J. Wong XFS_ICWALK_FLAG_RECLAIM_SICK | \ 652d53f66bSDarrick J. Wong XFS_ICWALK_FLAG_UNION) 661ad2cfe0SDarrick J. Wong 6733479e05SDave Chinner /* 6833479e05SDave Chinner * Allocate and initialise an xfs_inode. 6933479e05SDave Chinner */ 70638f4416SDave Chinner struct xfs_inode * 7133479e05SDave Chinner xfs_inode_alloc( 7233479e05SDave Chinner struct xfs_mount *mp, 7333479e05SDave Chinner xfs_ino_t ino) 7433479e05SDave Chinner { 7533479e05SDave Chinner struct xfs_inode *ip; 7633479e05SDave Chinner 7733479e05SDave Chinner /* 783050bd0bSCarlos Maiolino * XXX: If this didn't occur in transactions, we could drop GFP_NOFAIL 793050bd0bSCarlos Maiolino * and return NULL here on ENOMEM. 8033479e05SDave Chinner */ 81fd60b288SMuchun Song ip = alloc_inode_sb(mp->m_super, xfs_inode_cache, GFP_KERNEL | __GFP_NOFAIL); 823050bd0bSCarlos Maiolino 8333479e05SDave Chinner if (inode_init_always(mp->m_super, VFS_I(ip))) { 84182696fbSDarrick J. Wong kmem_cache_free(xfs_inode_cache, ip); 8533479e05SDave Chinner return NULL; 8633479e05SDave Chinner } 8733479e05SDave Chinner 88f38a032bSDave Chinner /* VFS doesn't initialise i_mode or i_state! */ 89c19b3b05SDave Chinner VFS_I(ip)->i_mode = 0; 90f38a032bSDave Chinner VFS_I(ip)->i_state = 0; 9167958013SMatthew Wilcox (Oracle) mapping_set_large_folios(VFS_I(ip)->i_mapping); 92c19b3b05SDave Chinner 93ff6d6af2SBill O'Donnell XFS_STATS_INC(mp, vn_active); 9433479e05SDave Chinner ASSERT(atomic_read(&ip->i_pincount) == 0); 9533479e05SDave Chinner ASSERT(ip->i_ino == 0); 9633479e05SDave Chinner 9733479e05SDave Chinner /* initialise the xfs inode */ 9833479e05SDave Chinner ip->i_ino = ino; 9933479e05SDave Chinner ip->i_mount = mp; 10033479e05SDave Chinner memset(&ip->i_imap, 0, sizeof(struct xfs_imap)); 1013993baebSDarrick J. Wong ip->i_cowfp = NULL; 1022ed5b09bSDarrick J. Wong memset(&ip->i_af, 0, sizeof(ip->i_af)); 1032ed5b09bSDarrick J. Wong ip->i_af.if_format = XFS_DINODE_FMT_EXTENTS; 1043ba738dfSChristoph Hellwig memset(&ip->i_df, 0, sizeof(ip->i_df)); 10533479e05SDave Chinner ip->i_flags = 0; 10633479e05SDave Chinner ip->i_delayed_blks = 0; 1073e09ab8fSChristoph Hellwig ip->i_diflags2 = mp->m_ino_geo.new_diflags2; 1086e73a545SChristoph Hellwig ip->i_nblocks = 0; 1097821ea30SChristoph Hellwig ip->i_forkoff = 0; 1106772c1f1SDarrick J. Wong ip->i_sick = 0; 1116772c1f1SDarrick J. Wong ip->i_checked = 0; 112cb357bf3SDarrick J. Wong INIT_WORK(&ip->i_ioend_work, xfs_end_io); 113cb357bf3SDarrick J. Wong INIT_LIST_HEAD(&ip->i_ioend_list); 114cb357bf3SDarrick J. Wong spin_lock_init(&ip->i_ioend_lock); 1152fd26cc0SDave Chinner ip->i_next_unlinked = NULLAGINO; 1162fd26cc0SDave Chinner ip->i_prev_unlinked = NULLAGINO; 11733479e05SDave Chinner 11833479e05SDave Chinner return ip; 11933479e05SDave Chinner } 12033479e05SDave Chinner 12133479e05SDave Chinner STATIC void 12233479e05SDave Chinner xfs_inode_free_callback( 12333479e05SDave Chinner struct rcu_head *head) 12433479e05SDave Chinner { 12533479e05SDave Chinner struct inode *inode = container_of(head, struct inode, i_rcu); 12633479e05SDave Chinner struct xfs_inode *ip = XFS_I(inode); 12733479e05SDave Chinner 128c19b3b05SDave Chinner switch (VFS_I(ip)->i_mode & S_IFMT) { 12933479e05SDave Chinner case S_IFREG: 13033479e05SDave Chinner case S_IFDIR: 13133479e05SDave Chinner case S_IFLNK: 132ef838512SChristoph Hellwig xfs_idestroy_fork(&ip->i_df); 13333479e05SDave Chinner break; 13433479e05SDave Chinner } 13533479e05SDave Chinner 1362ed5b09bSDarrick J. Wong xfs_ifork_zap_attr(ip); 137e45d7cb2SDarrick J. Wong 138ef838512SChristoph Hellwig if (ip->i_cowfp) { 139ef838512SChristoph Hellwig xfs_idestroy_fork(ip->i_cowfp); 140182696fbSDarrick J. Wong kmem_cache_free(xfs_ifork_cache, ip->i_cowfp); 141ef838512SChristoph Hellwig } 14233479e05SDave Chinner if (ip->i_itemp) { 14322525c17SDave Chinner ASSERT(!test_bit(XFS_LI_IN_AIL, 14422525c17SDave Chinner &ip->i_itemp->ili_item.li_flags)); 14533479e05SDave Chinner xfs_inode_item_destroy(ip); 14633479e05SDave Chinner ip->i_itemp = NULL; 14733479e05SDave Chinner } 14833479e05SDave Chinner 149182696fbSDarrick J. Wong kmem_cache_free(xfs_inode_cache, ip); 1501f2dcfe8SDave Chinner } 1511f2dcfe8SDave Chinner 1528a17d7ddSDave Chinner static void 1538a17d7ddSDave Chinner __xfs_inode_free( 1548a17d7ddSDave Chinner struct xfs_inode *ip) 1558a17d7ddSDave Chinner { 1568a17d7ddSDave Chinner /* asserts to verify all state is correct here */ 1578a17d7ddSDave Chinner ASSERT(atomic_read(&ip->i_pincount) == 0); 15848d55e2aSDave Chinner ASSERT(!ip->i_itemp || list_empty(&ip->i_itemp->ili_item.li_bio_list)); 1598a17d7ddSDave Chinner XFS_STATS_DEC(ip->i_mount, vn_active); 1608a17d7ddSDave Chinner 1618a17d7ddSDave Chinner call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); 1628a17d7ddSDave Chinner } 1638a17d7ddSDave Chinner 1641f2dcfe8SDave Chinner void 1651f2dcfe8SDave Chinner xfs_inode_free( 1661f2dcfe8SDave Chinner struct xfs_inode *ip) 1671f2dcfe8SDave Chinner { 168718ecc50SDave Chinner ASSERT(!xfs_iflags_test(ip, XFS_IFLUSHING)); 16998efe8afSBrian Foster 17033479e05SDave Chinner /* 17133479e05SDave Chinner * Because we use RCU freeing we need to ensure the inode always 17233479e05SDave Chinner * appears to be reclaimed with an invalid inode number when in the 17333479e05SDave Chinner * free state. The ip->i_flags_lock provides the barrier against lookup 17433479e05SDave Chinner * races. 17533479e05SDave Chinner */ 17633479e05SDave Chinner spin_lock(&ip->i_flags_lock); 17733479e05SDave Chinner ip->i_flags = XFS_IRECLAIM; 17833479e05SDave Chinner ip->i_ino = 0; 17933479e05SDave Chinner spin_unlock(&ip->i_flags_lock); 18033479e05SDave Chinner 1818a17d7ddSDave Chinner __xfs_inode_free(ip); 18233479e05SDave Chinner } 18333479e05SDave Chinner 18433479e05SDave Chinner /* 18502511a5aSDave Chinner * Queue background inode reclaim work if there are reclaimable inodes and there 18602511a5aSDave Chinner * isn't reclaim work already scheduled or in progress. 187ad438c40SDave Chinner */ 188ad438c40SDave Chinner static void 189ad438c40SDave Chinner xfs_reclaim_work_queue( 190ad438c40SDave Chinner struct xfs_mount *mp) 191ad438c40SDave Chinner { 192ad438c40SDave Chinner 193ad438c40SDave Chinner rcu_read_lock(); 194ad438c40SDave Chinner if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { 195ad438c40SDave Chinner queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work, 196ad438c40SDave Chinner msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); 197ad438c40SDave Chinner } 198ad438c40SDave Chinner rcu_read_unlock(); 199ad438c40SDave Chinner } 200ad438c40SDave Chinner 201c076ae7aSDarrick J. Wong /* 202c076ae7aSDarrick J. Wong * Background scanning to trim preallocated space. This is queued based on the 203c076ae7aSDarrick J. Wong * 'speculative_prealloc_lifetime' tunable (5m by default). 204c076ae7aSDarrick J. Wong */ 205c076ae7aSDarrick J. Wong static inline void 206c076ae7aSDarrick J. Wong xfs_blockgc_queue( 207ad438c40SDave Chinner struct xfs_perag *pag) 208ad438c40SDave Chinner { 2096f649091SDarrick J. Wong struct xfs_mount *mp = pag->pag_mount; 2106f649091SDarrick J. Wong 2116f649091SDarrick J. Wong if (!xfs_is_blockgc_enabled(mp)) 2126f649091SDarrick J. Wong return; 2136f649091SDarrick J. Wong 214c076ae7aSDarrick J. Wong rcu_read_lock(); 215c076ae7aSDarrick J. Wong if (radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_BLOCKGC_TAG)) 216ab23a776SDave Chinner queue_delayed_work(pag->pag_mount->m_blockgc_wq, 217c076ae7aSDarrick J. Wong &pag->pag_blockgc_work, 218c076ae7aSDarrick J. Wong msecs_to_jiffies(xfs_blockgc_secs * 1000)); 219c076ae7aSDarrick J. Wong rcu_read_unlock(); 220c076ae7aSDarrick J. Wong } 221c076ae7aSDarrick J. Wong 222c076ae7aSDarrick J. Wong /* Set a tag on both the AG incore inode tree and the AG radix tree. */ 223c076ae7aSDarrick J. Wong static void 224c076ae7aSDarrick J. Wong xfs_perag_set_inode_tag( 225c076ae7aSDarrick J. Wong struct xfs_perag *pag, 226c076ae7aSDarrick J. Wong xfs_agino_t agino, 227c076ae7aSDarrick J. Wong unsigned int tag) 228c076ae7aSDarrick J. Wong { 229ad438c40SDave Chinner struct xfs_mount *mp = pag->pag_mount; 230c076ae7aSDarrick J. Wong bool was_tagged; 231ad438c40SDave Chinner 23295989c46SBrian Foster lockdep_assert_held(&pag->pag_ici_lock); 233c076ae7aSDarrick J. Wong 234c076ae7aSDarrick J. Wong was_tagged = radix_tree_tagged(&pag->pag_ici_root, tag); 235c076ae7aSDarrick J. Wong radix_tree_tag_set(&pag->pag_ici_root, agino, tag); 236c076ae7aSDarrick J. Wong 237c076ae7aSDarrick J. Wong if (tag == XFS_ICI_RECLAIM_TAG) 238c076ae7aSDarrick J. Wong pag->pag_ici_reclaimable++; 239c076ae7aSDarrick J. Wong 240c076ae7aSDarrick J. Wong if (was_tagged) 241ad438c40SDave Chinner return; 242ad438c40SDave Chinner 243c076ae7aSDarrick J. Wong /* propagate the tag up into the perag radix tree */ 244ad438c40SDave Chinner spin_lock(&mp->m_perag_lock); 245c076ae7aSDarrick J. Wong radix_tree_tag_set(&mp->m_perag_tree, pag->pag_agno, tag); 246ad438c40SDave Chinner spin_unlock(&mp->m_perag_lock); 247ad438c40SDave Chinner 248c076ae7aSDarrick J. Wong /* start background work */ 249c076ae7aSDarrick J. Wong switch (tag) { 250c076ae7aSDarrick J. Wong case XFS_ICI_RECLAIM_TAG: 251ad438c40SDave Chinner xfs_reclaim_work_queue(mp); 252c076ae7aSDarrick J. Wong break; 253c076ae7aSDarrick J. Wong case XFS_ICI_BLOCKGC_TAG: 254c076ae7aSDarrick J. Wong xfs_blockgc_queue(pag); 255c076ae7aSDarrick J. Wong break; 256ad438c40SDave Chinner } 257ad438c40SDave Chinner 258368e2d09SDave Chinner trace_xfs_perag_set_inode_tag(pag, _RET_IP_); 259c076ae7aSDarrick J. Wong } 260c076ae7aSDarrick J. Wong 261c076ae7aSDarrick J. Wong /* Clear a tag on both the AG incore inode tree and the AG radix tree. */ 262ad438c40SDave Chinner static void 263c076ae7aSDarrick J. Wong xfs_perag_clear_inode_tag( 264c076ae7aSDarrick J. Wong struct xfs_perag *pag, 265c076ae7aSDarrick J. Wong xfs_agino_t agino, 266c076ae7aSDarrick J. Wong unsigned int tag) 267ad438c40SDave Chinner { 268ad438c40SDave Chinner struct xfs_mount *mp = pag->pag_mount; 269ad438c40SDave Chinner 27095989c46SBrian Foster lockdep_assert_held(&pag->pag_ici_lock); 271c076ae7aSDarrick J. Wong 272c076ae7aSDarrick J. Wong /* 273c076ae7aSDarrick J. Wong * Reclaim can signal (with a null agino) that it cleared its own tag 274c076ae7aSDarrick J. Wong * by removing the inode from the radix tree. 275c076ae7aSDarrick J. Wong */ 276c076ae7aSDarrick J. Wong if (agino != NULLAGINO) 277c076ae7aSDarrick J. Wong radix_tree_tag_clear(&pag->pag_ici_root, agino, tag); 278c076ae7aSDarrick J. Wong else 279c076ae7aSDarrick J. Wong ASSERT(tag == XFS_ICI_RECLAIM_TAG); 280c076ae7aSDarrick J. Wong 281c076ae7aSDarrick J. Wong if (tag == XFS_ICI_RECLAIM_TAG) 282c076ae7aSDarrick J. Wong pag->pag_ici_reclaimable--; 283c076ae7aSDarrick J. Wong 284c076ae7aSDarrick J. Wong if (radix_tree_tagged(&pag->pag_ici_root, tag)) 285ad438c40SDave Chinner return; 286ad438c40SDave Chinner 287c076ae7aSDarrick J. Wong /* clear the tag from the perag radix tree */ 288ad438c40SDave Chinner spin_lock(&mp->m_perag_lock); 289c076ae7aSDarrick J. Wong radix_tree_tag_clear(&mp->m_perag_tree, pag->pag_agno, tag); 290ad438c40SDave Chinner spin_unlock(&mp->m_perag_lock); 291ad438c40SDave Chinner 292368e2d09SDave Chinner trace_xfs_perag_clear_inode_tag(pag, _RET_IP_); 293c076ae7aSDarrick J. Wong } 294ad438c40SDave Chinner 295ad438c40SDave Chinner /* 29650997470SDave Chinner * When we recycle a reclaimable inode, we need to re-initialise the VFS inode 29750997470SDave Chinner * part of the structure. This is made more complex by the fact we store 29850997470SDave Chinner * information about the on-disk values in the VFS inode and so we can't just 29983e06f21SDave Chinner * overwrite the values unconditionally. Hence we save the parameters we 30050997470SDave Chinner * need to retain across reinitialisation, and rewrite them into the VFS inode 30183e06f21SDave Chinner * after reinitialisation even if it fails. 30250997470SDave Chinner */ 30350997470SDave Chinner static int 30450997470SDave Chinner xfs_reinit_inode( 30550997470SDave Chinner struct xfs_mount *mp, 30650997470SDave Chinner struct inode *inode) 30750997470SDave Chinner { 30850997470SDave Chinner int error; 30954d7b5c1SDave Chinner uint32_t nlink = inode->i_nlink; 3109e9a2674SDave Chinner uint32_t generation = inode->i_generation; 311f0e28280SJeff Layton uint64_t version = inode_peek_iversion(inode); 312c19b3b05SDave Chinner umode_t mode = inode->i_mode; 313acd1d715SAmir Goldstein dev_t dev = inode->i_rdev; 3143d8f2821SChristoph Hellwig kuid_t uid = inode->i_uid; 3153d8f2821SChristoph Hellwig kgid_t gid = inode->i_gid; 31650997470SDave Chinner 31750997470SDave Chinner error = inode_init_always(mp->m_super, inode); 31850997470SDave Chinner 31954d7b5c1SDave Chinner set_nlink(inode, nlink); 3209e9a2674SDave Chinner inode->i_generation = generation; 321f0e28280SJeff Layton inode_set_iversion_queried(inode, version); 322c19b3b05SDave Chinner inode->i_mode = mode; 323acd1d715SAmir Goldstein inode->i_rdev = dev; 3243d8f2821SChristoph Hellwig inode->i_uid = uid; 3253d8f2821SChristoph Hellwig inode->i_gid = gid; 32667958013SMatthew Wilcox (Oracle) mapping_set_large_folios(inode->i_mapping); 32750997470SDave Chinner return error; 32850997470SDave Chinner } 32950997470SDave Chinner 33050997470SDave Chinner /* 331ff7bebebSDarrick J. Wong * Carefully nudge an inode whose VFS state has been torn down back into a 332ff7bebebSDarrick J. Wong * usable state. Drops the i_flags_lock and the rcu read lock. 333ff7bebebSDarrick J. Wong */ 334ff7bebebSDarrick J. Wong static int 335ff7bebebSDarrick J. Wong xfs_iget_recycle( 336ff7bebebSDarrick J. Wong struct xfs_perag *pag, 337ff7bebebSDarrick J. Wong struct xfs_inode *ip) __releases(&ip->i_flags_lock) 338ff7bebebSDarrick J. Wong { 339ff7bebebSDarrick J. Wong struct xfs_mount *mp = ip->i_mount; 340ff7bebebSDarrick J. Wong struct inode *inode = VFS_I(ip); 341ff7bebebSDarrick J. Wong int error; 342ff7bebebSDarrick J. Wong 343ff7bebebSDarrick J. Wong trace_xfs_iget_recycle(ip); 344ff7bebebSDarrick J. Wong 34528b4b059SLong Li if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) 34628b4b059SLong Li return -EAGAIN; 34728b4b059SLong Li 348ff7bebebSDarrick J. Wong /* 349ff7bebebSDarrick J. Wong * We need to make it look like the inode is being reclaimed to prevent 350ff7bebebSDarrick J. Wong * the actual reclaim workers from stomping over us while we recycle 351ff7bebebSDarrick J. Wong * the inode. We can't clear the radix tree tag yet as it requires 352ff7bebebSDarrick J. Wong * pag_ici_lock to be held exclusive. 353ff7bebebSDarrick J. Wong */ 354ff7bebebSDarrick J. Wong ip->i_flags |= XFS_IRECLAIM; 355ff7bebebSDarrick J. Wong 356ff7bebebSDarrick J. Wong spin_unlock(&ip->i_flags_lock); 357ff7bebebSDarrick J. Wong rcu_read_unlock(); 358ff7bebebSDarrick J. Wong 359ff7bebebSDarrick J. Wong ASSERT(!rwsem_is_locked(&inode->i_rwsem)); 360ff7bebebSDarrick J. Wong error = xfs_reinit_inode(mp, inode); 36128b4b059SLong Li xfs_iunlock(ip, XFS_ILOCK_EXCL); 362ff7bebebSDarrick J. Wong if (error) { 363ff7bebebSDarrick J. Wong /* 364ff7bebebSDarrick J. Wong * Re-initializing the inode failed, and we are in deep 365ff7bebebSDarrick J. Wong * trouble. Try to re-add it to the reclaim list. 366ff7bebebSDarrick J. Wong */ 367ff7bebebSDarrick J. Wong rcu_read_lock(); 368ff7bebebSDarrick J. Wong spin_lock(&ip->i_flags_lock); 369ff7bebebSDarrick J. Wong ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM); 370ff7bebebSDarrick J. Wong ASSERT(ip->i_flags & XFS_IRECLAIMABLE); 371ff7bebebSDarrick J. Wong spin_unlock(&ip->i_flags_lock); 372ff7bebebSDarrick J. Wong rcu_read_unlock(); 373ff7bebebSDarrick J. Wong 374ff7bebebSDarrick J. Wong trace_xfs_iget_recycle_fail(ip); 375ff7bebebSDarrick J. Wong return error; 376ff7bebebSDarrick J. Wong } 377ff7bebebSDarrick J. Wong 378ff7bebebSDarrick J. Wong spin_lock(&pag->pag_ici_lock); 379ff7bebebSDarrick J. Wong spin_lock(&ip->i_flags_lock); 380ff7bebebSDarrick J. Wong 381ff7bebebSDarrick J. Wong /* 382ff7bebebSDarrick J. Wong * Clear the per-lifetime state in the inode as we are now effectively 383ff7bebebSDarrick J. Wong * a new inode and need to return to the initial state before reuse 384ff7bebebSDarrick J. Wong * occurs. 385ff7bebebSDarrick J. Wong */ 386ff7bebebSDarrick J. Wong ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS; 387ff7bebebSDarrick J. Wong ip->i_flags |= XFS_INEW; 388ff7bebebSDarrick J. Wong xfs_perag_clear_inode_tag(pag, XFS_INO_TO_AGINO(mp, ip->i_ino), 389ff7bebebSDarrick J. Wong XFS_ICI_RECLAIM_TAG); 390ff7bebebSDarrick J. Wong inode->i_state = I_NEW; 391ff7bebebSDarrick J. Wong spin_unlock(&ip->i_flags_lock); 392ff7bebebSDarrick J. Wong spin_unlock(&pag->pag_ici_lock); 393ff7bebebSDarrick J. Wong 394ff7bebebSDarrick J. Wong return 0; 395ff7bebebSDarrick J. Wong } 396ff7bebebSDarrick J. Wong 397ff7bebebSDarrick J. Wong /* 398afca6c5bSDave Chinner * If we are allocating a new inode, then check what was returned is 399afca6c5bSDave Chinner * actually a free, empty inode. If we are not allocating an inode, 400afca6c5bSDave Chinner * then check we didn't find a free inode. 401afca6c5bSDave Chinner * 402afca6c5bSDave Chinner * Returns: 403afca6c5bSDave Chinner * 0 if the inode free state matches the lookup context 404afca6c5bSDave Chinner * -ENOENT if the inode is free and we are not allocating 405afca6c5bSDave Chinner * -EFSCORRUPTED if there is any state mismatch at all 406afca6c5bSDave Chinner */ 407afca6c5bSDave Chinner static int 408afca6c5bSDave Chinner xfs_iget_check_free_state( 409afca6c5bSDave Chinner struct xfs_inode *ip, 410afca6c5bSDave Chinner int flags) 411afca6c5bSDave Chinner { 412afca6c5bSDave Chinner if (flags & XFS_IGET_CREATE) { 413afca6c5bSDave Chinner /* should be a free inode */ 414afca6c5bSDave Chinner if (VFS_I(ip)->i_mode != 0) { 415afca6c5bSDave Chinner xfs_warn(ip->i_mount, 416afca6c5bSDave Chinner "Corruption detected! Free inode 0x%llx not marked free! (mode 0x%x)", 417afca6c5bSDave Chinner ip->i_ino, VFS_I(ip)->i_mode); 418afca6c5bSDave Chinner return -EFSCORRUPTED; 419afca6c5bSDave Chinner } 420afca6c5bSDave Chinner 4216e73a545SChristoph Hellwig if (ip->i_nblocks != 0) { 422afca6c5bSDave Chinner xfs_warn(ip->i_mount, 423afca6c5bSDave Chinner "Corruption detected! Free inode 0x%llx has blocks allocated!", 424afca6c5bSDave Chinner ip->i_ino); 425afca6c5bSDave Chinner return -EFSCORRUPTED; 426afca6c5bSDave Chinner } 427afca6c5bSDave Chinner return 0; 428afca6c5bSDave Chinner } 429afca6c5bSDave Chinner 430afca6c5bSDave Chinner /* should be an allocated inode */ 431afca6c5bSDave Chinner if (VFS_I(ip)->i_mode == 0) 432afca6c5bSDave Chinner return -ENOENT; 433afca6c5bSDave Chinner 434afca6c5bSDave Chinner return 0; 435afca6c5bSDave Chinner } 436afca6c5bSDave Chinner 437ab23a776SDave Chinner /* Make all pending inactivation work start immediately. */ 438ab23a776SDave Chinner static void 439ab23a776SDave Chinner xfs_inodegc_queue_all( 440ab23a776SDave Chinner struct xfs_mount *mp) 441ab23a776SDave Chinner { 442ab23a776SDave Chinner struct xfs_inodegc *gc; 443ab23a776SDave Chinner int cpu; 444ab23a776SDave Chinner 445ab23a776SDave Chinner for_each_online_cpu(cpu) { 446ab23a776SDave Chinner gc = per_cpu_ptr(mp->m_inodegc, cpu); 447ab23a776SDave Chinner if (!llist_empty(&gc->list)) 4487cf2b0f9SDave Chinner mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0); 449ab23a776SDave Chinner } 450ab23a776SDave Chinner } 451ab23a776SDave Chinner 452afca6c5bSDave Chinner /* 45333479e05SDave Chinner * Check the validity of the inode we just found it the cache 45433479e05SDave Chinner */ 45533479e05SDave Chinner static int 45633479e05SDave Chinner xfs_iget_cache_hit( 45733479e05SDave Chinner struct xfs_perag *pag, 45833479e05SDave Chinner struct xfs_inode *ip, 45933479e05SDave Chinner xfs_ino_t ino, 46033479e05SDave Chinner int flags, 46133479e05SDave Chinner int lock_flags) __releases(RCU) 46233479e05SDave Chinner { 46333479e05SDave Chinner struct inode *inode = VFS_I(ip); 46433479e05SDave Chinner struct xfs_mount *mp = ip->i_mount; 46533479e05SDave Chinner int error; 46633479e05SDave Chinner 46733479e05SDave Chinner /* 46833479e05SDave Chinner * check for re-use of an inode within an RCU grace period due to the 46933479e05SDave Chinner * radix tree nodes not being updated yet. We monitor for this by 47033479e05SDave Chinner * setting the inode number to zero before freeing the inode structure. 47133479e05SDave Chinner * If the inode has been reallocated and set up, then the inode number 47233479e05SDave Chinner * will not match, so check for that, too. 47333479e05SDave Chinner */ 47433479e05SDave Chinner spin_lock(&ip->i_flags_lock); 47577b4d286SDarrick J. Wong if (ip->i_ino != ino) 47677b4d286SDarrick J. Wong goto out_skip; 47733479e05SDave Chinner 47833479e05SDave Chinner /* 47933479e05SDave Chinner * If we are racing with another cache hit that is currently 48033479e05SDave Chinner * instantiating this inode or currently recycling it out of 481ff7bebebSDarrick J. Wong * reclaimable state, wait for the initialisation to complete 48233479e05SDave Chinner * before continuing. 48333479e05SDave Chinner * 484ab23a776SDave Chinner * If we're racing with the inactivation worker we also want to wait. 485ab23a776SDave Chinner * If we're creating a new file, it's possible that the worker 486ab23a776SDave Chinner * previously marked the inode as free on disk but hasn't finished 487ab23a776SDave Chinner * updating the incore state yet. The AGI buffer will be dirty and 488ab23a776SDave Chinner * locked to the icreate transaction, so a synchronous push of the 489ab23a776SDave Chinner * inodegc workers would result in deadlock. For a regular iget, the 490ab23a776SDave Chinner * worker is running already, so we might as well wait. 491ab23a776SDave Chinner * 49233479e05SDave Chinner * XXX(hch): eventually we should do something equivalent to 49333479e05SDave Chinner * wait_on_inode to wait for these flags to be cleared 49433479e05SDave Chinner * instead of polling for it. 49533479e05SDave Chinner */ 496ab23a776SDave Chinner if (ip->i_flags & (XFS_INEW | XFS_IRECLAIM | XFS_INACTIVATING)) 49777b4d286SDarrick J. Wong goto out_skip; 49833479e05SDave Chinner 499ab23a776SDave Chinner if (ip->i_flags & XFS_NEED_INACTIVE) { 500ab23a776SDave Chinner /* Unlinked inodes cannot be re-grabbed. */ 501ab23a776SDave Chinner if (VFS_I(ip)->i_nlink == 0) { 502ab23a776SDave Chinner error = -ENOENT; 503ab23a776SDave Chinner goto out_error; 504ab23a776SDave Chinner } 505ab23a776SDave Chinner goto out_inodegc_flush; 506ab23a776SDave Chinner } 507ab23a776SDave Chinner 50833479e05SDave Chinner /* 509afca6c5bSDave Chinner * Check the inode free state is valid. This also detects lookup 510afca6c5bSDave Chinner * racing with unlinks. 51133479e05SDave Chinner */ 512afca6c5bSDave Chinner error = xfs_iget_check_free_state(ip, flags); 513afca6c5bSDave Chinner if (error) 51433479e05SDave Chinner goto out_error; 51533479e05SDave Chinner 51677b4d286SDarrick J. Wong /* Skip inodes that have no vfs state. */ 51777b4d286SDarrick J. Wong if ((flags & XFS_IGET_INCORE) && 51877b4d286SDarrick J. Wong (ip->i_flags & XFS_IRECLAIMABLE)) 51977b4d286SDarrick J. Wong goto out_skip; 520378f681cSDarrick J. Wong 52177b4d286SDarrick J. Wong /* The inode fits the selection criteria; process it. */ 52277b4d286SDarrick J. Wong if (ip->i_flags & XFS_IRECLAIMABLE) { 523ff7bebebSDarrick J. Wong /* Drops i_flags_lock and RCU read lock. */ 524ff7bebebSDarrick J. Wong error = xfs_iget_recycle(pag, ip); 52528b4b059SLong Li if (error == -EAGAIN) 52628b4b059SLong Li goto out_skip; 527ff7bebebSDarrick J. Wong if (error) 528ff7bebebSDarrick J. Wong return error; 52933479e05SDave Chinner } else { 53033479e05SDave Chinner /* If the VFS inode is being torn down, pause and try again. */ 53177b4d286SDarrick J. Wong if (!igrab(inode)) 53277b4d286SDarrick J. Wong goto out_skip; 53333479e05SDave Chinner 53433479e05SDave Chinner /* We've got a live one. */ 53533479e05SDave Chinner spin_unlock(&ip->i_flags_lock); 53633479e05SDave Chinner rcu_read_unlock(); 53733479e05SDave Chinner trace_xfs_iget_hit(ip); 53833479e05SDave Chinner } 53933479e05SDave Chinner 54033479e05SDave Chinner if (lock_flags != 0) 54133479e05SDave Chinner xfs_ilock(ip, lock_flags); 54233479e05SDave Chinner 543378f681cSDarrick J. Wong if (!(flags & XFS_IGET_INCORE)) 544dae2f8edSIra Weiny xfs_iflags_clear(ip, XFS_ISTALE); 545ff6d6af2SBill O'Donnell XFS_STATS_INC(mp, xs_ig_found); 54633479e05SDave Chinner 54733479e05SDave Chinner return 0; 54833479e05SDave Chinner 54977b4d286SDarrick J. Wong out_skip: 55077b4d286SDarrick J. Wong trace_xfs_iget_skip(ip); 55177b4d286SDarrick J. Wong XFS_STATS_INC(mp, xs_ig_frecycle); 55277b4d286SDarrick J. Wong error = -EAGAIN; 55333479e05SDave Chinner out_error: 55433479e05SDave Chinner spin_unlock(&ip->i_flags_lock); 55533479e05SDave Chinner rcu_read_unlock(); 55633479e05SDave Chinner return error; 557ab23a776SDave Chinner 558ab23a776SDave Chinner out_inodegc_flush: 559ab23a776SDave Chinner spin_unlock(&ip->i_flags_lock); 560ab23a776SDave Chinner rcu_read_unlock(); 561ab23a776SDave Chinner /* 562ab23a776SDave Chinner * Do not wait for the workers, because the caller could hold an AGI 563ab23a776SDave Chinner * buffer lock. We're just going to sleep in a loop anyway. 564ab23a776SDave Chinner */ 565ab23a776SDave Chinner if (xfs_is_inodegc_enabled(mp)) 566ab23a776SDave Chinner xfs_inodegc_queue_all(mp); 567ab23a776SDave Chinner return -EAGAIN; 56833479e05SDave Chinner } 56933479e05SDave Chinner 57033479e05SDave Chinner static int 57133479e05SDave Chinner xfs_iget_cache_miss( 57233479e05SDave Chinner struct xfs_mount *mp, 57333479e05SDave Chinner struct xfs_perag *pag, 57433479e05SDave Chinner xfs_trans_t *tp, 57533479e05SDave Chinner xfs_ino_t ino, 57633479e05SDave Chinner struct xfs_inode **ipp, 57733479e05SDave Chinner int flags, 57833479e05SDave Chinner int lock_flags) 57933479e05SDave Chinner { 58033479e05SDave Chinner struct xfs_inode *ip; 58133479e05SDave Chinner int error; 58233479e05SDave Chinner xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino); 58333479e05SDave Chinner int iflags; 58433479e05SDave Chinner 58533479e05SDave Chinner ip = xfs_inode_alloc(mp, ino); 58633479e05SDave Chinner if (!ip) 5872451337dSDave Chinner return -ENOMEM; 58833479e05SDave Chinner 589498f0adbSDave Chinner error = xfs_imap(pag, tp, ip->i_ino, &ip->i_imap, flags); 59033479e05SDave Chinner if (error) 59133479e05SDave Chinner goto out_destroy; 59233479e05SDave Chinner 593bb8a66afSChristoph Hellwig /* 594bb8a66afSChristoph Hellwig * For version 5 superblocks, if we are initialising a new inode and we 5950560f31aSDave Chinner * are not utilising the XFS_FEAT_IKEEP inode cluster mode, we can 596bb8a66afSChristoph Hellwig * simply build the new inode core with a random generation number. 597bb8a66afSChristoph Hellwig * 598bb8a66afSChristoph Hellwig * For version 4 (and older) superblocks, log recovery is dependent on 599965e0a1aSChristoph Hellwig * the i_flushiter field being initialised from the current on-disk 600bb8a66afSChristoph Hellwig * value and hence we must also read the inode off disk even when 601bb8a66afSChristoph Hellwig * initializing new inodes. 602bb8a66afSChristoph Hellwig */ 60338c26bfdSDave Chinner if (xfs_has_v3inodes(mp) && 6040560f31aSDave Chinner (flags & XFS_IGET_CREATE) && !xfs_has_ikeep(mp)) { 605a251c17aSJason A. Donenfeld VFS_I(ip)->i_generation = get_random_u32(); 606bb8a66afSChristoph Hellwig } else { 607bb8a66afSChristoph Hellwig struct xfs_buf *bp; 608bb8a66afSChristoph Hellwig 609af9dcddeSChristoph Hellwig error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp); 610bb8a66afSChristoph Hellwig if (error) 611bb8a66afSChristoph Hellwig goto out_destroy; 612bb8a66afSChristoph Hellwig 613af9dcddeSChristoph Hellwig error = xfs_inode_from_disk(ip, 614af9dcddeSChristoph Hellwig xfs_buf_offset(bp, ip->i_imap.im_boffset)); 615bb8a66afSChristoph Hellwig if (!error) 616bb8a66afSChristoph Hellwig xfs_buf_set_ref(bp, XFS_INO_REF); 617bb8a66afSChristoph Hellwig xfs_trans_brelse(tp, bp); 618bb8a66afSChristoph Hellwig 619bb8a66afSChristoph Hellwig if (error) 620bb8a66afSChristoph Hellwig goto out_destroy; 621bb8a66afSChristoph Hellwig } 622bb8a66afSChristoph Hellwig 62333479e05SDave Chinner trace_xfs_iget_miss(ip); 62433479e05SDave Chinner 625ee457001SDave Chinner /* 626afca6c5bSDave Chinner * Check the inode free state is valid. This also detects lookup 627afca6c5bSDave Chinner * racing with unlinks. 628ee457001SDave Chinner */ 629afca6c5bSDave Chinner error = xfs_iget_check_free_state(ip, flags); 630afca6c5bSDave Chinner if (error) 631ee457001SDave Chinner goto out_destroy; 63233479e05SDave Chinner 63333479e05SDave Chinner /* 63433479e05SDave Chinner * Preload the radix tree so we can insert safely under the 63533479e05SDave Chinner * write spinlock. Note that we cannot sleep inside the preload 63633479e05SDave Chinner * region. Since we can be called from transaction context, don't 63733479e05SDave Chinner * recurse into the file system. 63833479e05SDave Chinner */ 63933479e05SDave Chinner if (radix_tree_preload(GFP_NOFS)) { 6402451337dSDave Chinner error = -EAGAIN; 64133479e05SDave Chinner goto out_destroy; 64233479e05SDave Chinner } 64333479e05SDave Chinner 64433479e05SDave Chinner /* 64533479e05SDave Chinner * Because the inode hasn't been added to the radix-tree yet it can't 64633479e05SDave Chinner * be found by another thread, so we can do the non-sleeping lock here. 64733479e05SDave Chinner */ 64833479e05SDave Chinner if (lock_flags) { 64933479e05SDave Chinner if (!xfs_ilock_nowait(ip, lock_flags)) 65033479e05SDave Chinner BUG(); 65133479e05SDave Chinner } 65233479e05SDave Chinner 65333479e05SDave Chinner /* 65433479e05SDave Chinner * These values must be set before inserting the inode into the radix 65533479e05SDave Chinner * tree as the moment it is inserted a concurrent lookup (allowed by the 65633479e05SDave Chinner * RCU locking mechanism) can find it and that lookup must see that this 65733479e05SDave Chinner * is an inode currently under construction (i.e. that XFS_INEW is set). 65833479e05SDave Chinner * The ip->i_flags_lock that protects the XFS_INEW flag forms the 65933479e05SDave Chinner * memory barrier that ensures this detection works correctly at lookup 66033479e05SDave Chinner * time. 66133479e05SDave Chinner */ 66233479e05SDave Chinner iflags = XFS_INEW; 66333479e05SDave Chinner if (flags & XFS_IGET_DONTCACHE) 6642c567af4SIra Weiny d_mark_dontcache(VFS_I(ip)); 665113a5683SChandra Seetharaman ip->i_udquot = NULL; 666113a5683SChandra Seetharaman ip->i_gdquot = NULL; 66792f8ff73SChandra Seetharaman ip->i_pdquot = NULL; 66833479e05SDave Chinner xfs_iflags_set(ip, iflags); 66933479e05SDave Chinner 67033479e05SDave Chinner /* insert the new inode */ 67133479e05SDave Chinner spin_lock(&pag->pag_ici_lock); 67233479e05SDave Chinner error = radix_tree_insert(&pag->pag_ici_root, agino, ip); 67333479e05SDave Chinner if (unlikely(error)) { 67433479e05SDave Chinner WARN_ON(error != -EEXIST); 675ff6d6af2SBill O'Donnell XFS_STATS_INC(mp, xs_ig_dup); 6762451337dSDave Chinner error = -EAGAIN; 67733479e05SDave Chinner goto out_preload_end; 67833479e05SDave Chinner } 67933479e05SDave Chinner spin_unlock(&pag->pag_ici_lock); 68033479e05SDave Chinner radix_tree_preload_end(); 68133479e05SDave Chinner 68233479e05SDave Chinner *ipp = ip; 68333479e05SDave Chinner return 0; 68433479e05SDave Chinner 68533479e05SDave Chinner out_preload_end: 68633479e05SDave Chinner spin_unlock(&pag->pag_ici_lock); 68733479e05SDave Chinner radix_tree_preload_end(); 68833479e05SDave Chinner if (lock_flags) 68933479e05SDave Chinner xfs_iunlock(ip, lock_flags); 69033479e05SDave Chinner out_destroy: 69133479e05SDave Chinner __destroy_inode(VFS_I(ip)); 69233479e05SDave Chinner xfs_inode_free(ip); 69333479e05SDave Chinner return error; 69433479e05SDave Chinner } 69533479e05SDave Chinner 69633479e05SDave Chinner /* 69702511a5aSDave Chinner * Look up an inode by number in the given file system. The inode is looked up 69802511a5aSDave Chinner * in the cache held in each AG. If the inode is found in the cache, initialise 69902511a5aSDave Chinner * the vfs inode if necessary. 70033479e05SDave Chinner * 70102511a5aSDave Chinner * If it is not in core, read it in from the file system's device, add it to the 70202511a5aSDave Chinner * cache and initialise the vfs inode. 70333479e05SDave Chinner * 70433479e05SDave Chinner * The inode is locked according to the value of the lock_flags parameter. 70502511a5aSDave Chinner * Inode lookup is only done during metadata operations and not as part of the 70602511a5aSDave Chinner * data IO path. Hence we only allow locking of the XFS_ILOCK during lookup. 70733479e05SDave Chinner */ 70833479e05SDave Chinner int 70933479e05SDave Chinner xfs_iget( 71002511a5aSDave Chinner struct xfs_mount *mp, 71102511a5aSDave Chinner struct xfs_trans *tp, 71233479e05SDave Chinner xfs_ino_t ino, 71333479e05SDave Chinner uint flags, 71433479e05SDave Chinner uint lock_flags, 71502511a5aSDave Chinner struct xfs_inode **ipp) 71633479e05SDave Chinner { 71702511a5aSDave Chinner struct xfs_inode *ip; 71802511a5aSDave Chinner struct xfs_perag *pag; 71933479e05SDave Chinner xfs_agino_t agino; 72002511a5aSDave Chinner int error; 72133479e05SDave Chinner 72233479e05SDave Chinner ASSERT((lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) == 0); 72333479e05SDave Chinner 72433479e05SDave Chinner /* reject inode numbers outside existing AGs */ 72533479e05SDave Chinner if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) 7262451337dSDave Chinner return -EINVAL; 72733479e05SDave Chinner 728ff6d6af2SBill O'Donnell XFS_STATS_INC(mp, xs_ig_attempts); 7298774cf8bSLucas Stach 73033479e05SDave Chinner /* get the perag structure and ensure that it's inode capable */ 73133479e05SDave Chinner pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); 73233479e05SDave Chinner agino = XFS_INO_TO_AGINO(mp, ino); 73333479e05SDave Chinner 73433479e05SDave Chinner again: 73533479e05SDave Chinner error = 0; 73633479e05SDave Chinner rcu_read_lock(); 73733479e05SDave Chinner ip = radix_tree_lookup(&pag->pag_ici_root, agino); 73833479e05SDave Chinner 73933479e05SDave Chinner if (ip) { 74033479e05SDave Chinner error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags); 74133479e05SDave Chinner if (error) 74233479e05SDave Chinner goto out_error_or_again; 74333479e05SDave Chinner } else { 74433479e05SDave Chinner rcu_read_unlock(); 745378f681cSDarrick J. Wong if (flags & XFS_IGET_INCORE) { 746ed438b47SDarrick J. Wong error = -ENODATA; 747378f681cSDarrick J. Wong goto out_error_or_again; 748378f681cSDarrick J. Wong } 749ff6d6af2SBill O'Donnell XFS_STATS_INC(mp, xs_ig_missed); 75033479e05SDave Chinner 75133479e05SDave Chinner error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, 75233479e05SDave Chinner flags, lock_flags); 75333479e05SDave Chinner if (error) 75433479e05SDave Chinner goto out_error_or_again; 75533479e05SDave Chinner } 75633479e05SDave Chinner xfs_perag_put(pag); 75733479e05SDave Chinner 75833479e05SDave Chinner *ipp = ip; 75933479e05SDave Chinner 76033479e05SDave Chinner /* 76158c90473SDave Chinner * If we have a real type for an on-disk inode, we can setup the inode 762132c460eSYang Xu * now. If it's a new inode being created, xfs_init_new_inode will 763132c460eSYang Xu * handle it. 76433479e05SDave Chinner */ 765c19b3b05SDave Chinner if (xfs_iflags_test(ip, XFS_INEW) && VFS_I(ip)->i_mode != 0) 76658c90473SDave Chinner xfs_setup_existing_inode(ip); 76733479e05SDave Chinner return 0; 76833479e05SDave Chinner 76933479e05SDave Chinner out_error_or_again: 770302436c2SDarrick J. Wong if (!(flags & (XFS_IGET_INCORE | XFS_IGET_NORETRY)) && 771302436c2SDarrick J. Wong error == -EAGAIN) { 77233479e05SDave Chinner delay(1); 77333479e05SDave Chinner goto again; 77433479e05SDave Chinner } 77533479e05SDave Chinner xfs_perag_put(pag); 77633479e05SDave Chinner return error; 77733479e05SDave Chinner } 77833479e05SDave Chinner 7796d8b79cfSDave Chinner /* 780378f681cSDarrick J. Wong * "Is this a cached inode that's also allocated?" 781378f681cSDarrick J. Wong * 782378f681cSDarrick J. Wong * Look up an inode by number in the given file system. If the inode is 783378f681cSDarrick J. Wong * in cache and isn't in purgatory, return 1 if the inode is allocated 784378f681cSDarrick J. Wong * and 0 if it is not. For all other cases (not in cache, being torn 785378f681cSDarrick J. Wong * down, etc.), return a negative error code. 786378f681cSDarrick J. Wong * 787378f681cSDarrick J. Wong * The caller has to prevent inode allocation and freeing activity, 788378f681cSDarrick J. Wong * presumably by locking the AGI buffer. This is to ensure that an 789378f681cSDarrick J. Wong * inode cannot transition from allocated to freed until the caller is 790378f681cSDarrick J. Wong * ready to allow that. If the inode is in an intermediate state (new, 791378f681cSDarrick J. Wong * reclaimable, or being reclaimed), -EAGAIN will be returned; if the 792378f681cSDarrick J. Wong * inode is not in the cache, -ENOENT will be returned. The caller must 793378f681cSDarrick J. Wong * deal with these scenarios appropriately. 794378f681cSDarrick J. Wong * 795378f681cSDarrick J. Wong * This is a specialized use case for the online scrubber; if you're 796378f681cSDarrick J. Wong * reading this, you probably want xfs_iget. 797378f681cSDarrick J. Wong */ 798378f681cSDarrick J. Wong int 799378f681cSDarrick J. Wong xfs_icache_inode_is_allocated( 800378f681cSDarrick J. Wong struct xfs_mount *mp, 801378f681cSDarrick J. Wong struct xfs_trans *tp, 802378f681cSDarrick J. Wong xfs_ino_t ino, 803378f681cSDarrick J. Wong bool *inuse) 804378f681cSDarrick J. Wong { 805378f681cSDarrick J. Wong struct xfs_inode *ip; 806378f681cSDarrick J. Wong int error; 807378f681cSDarrick J. Wong 808378f681cSDarrick J. Wong error = xfs_iget(mp, tp, ino, XFS_IGET_INCORE, 0, &ip); 809378f681cSDarrick J. Wong if (error) 810378f681cSDarrick J. Wong return error; 811378f681cSDarrick J. Wong 812378f681cSDarrick J. Wong *inuse = !!(VFS_I(ip)->i_mode); 81344a8736bSDarrick J. Wong xfs_irele(ip); 814378f681cSDarrick J. Wong return 0; 815378f681cSDarrick J. Wong } 816378f681cSDarrick J. Wong 817579b62faSBrian Foster /* 8186d8b79cfSDave Chinner * Grab the inode for reclaim exclusively. 81950718b8dSDave Chinner * 82050718b8dSDave Chinner * We have found this inode via a lookup under RCU, so the inode may have 82150718b8dSDave Chinner * already been freed, or it may be in the process of being recycled by 82250718b8dSDave Chinner * xfs_iget(). In both cases, the inode will have XFS_IRECLAIM set. If the inode 82350718b8dSDave Chinner * has been fully recycled by the time we get the i_flags_lock, XFS_IRECLAIMABLE 82450718b8dSDave Chinner * will not be set. Hence we need to check for both these flag conditions to 82550718b8dSDave Chinner * avoid inodes that are no longer reclaim candidates. 82650718b8dSDave Chinner * 82750718b8dSDave Chinner * Note: checking for other state flags here, under the i_flags_lock or not, is 82850718b8dSDave Chinner * racy and should be avoided. Those races should be resolved only after we have 82950718b8dSDave Chinner * ensured that we are able to reclaim this inode and the world can see that we 83050718b8dSDave Chinner * are going to reclaim it. 83150718b8dSDave Chinner * 83250718b8dSDave Chinner * Return true if we grabbed it, false otherwise. 8336d8b79cfSDave Chinner */ 83450718b8dSDave Chinner static bool 835f1bc5c56SDarrick J. Wong xfs_reclaim_igrab( 8369492750aSDarrick J. Wong struct xfs_inode *ip, 837b26b2bf1SDarrick J. Wong struct xfs_icwalk *icw) 8386d8b79cfSDave Chinner { 8396d8b79cfSDave Chinner ASSERT(rcu_read_lock_held()); 8406d8b79cfSDave Chinner 8416d8b79cfSDave Chinner spin_lock(&ip->i_flags_lock); 8426d8b79cfSDave Chinner if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) || 8436d8b79cfSDave Chinner __xfs_iflags_test(ip, XFS_IRECLAIM)) { 8446d8b79cfSDave Chinner /* not a reclaim candidate. */ 8456d8b79cfSDave Chinner spin_unlock(&ip->i_flags_lock); 84650718b8dSDave Chinner return false; 8476d8b79cfSDave Chinner } 8489492750aSDarrick J. Wong 8499492750aSDarrick J. Wong /* Don't reclaim a sick inode unless the caller asked for it. */ 8509492750aSDarrick J. Wong if (ip->i_sick && 851b26b2bf1SDarrick J. Wong (!icw || !(icw->icw_flags & XFS_ICWALK_FLAG_RECLAIM_SICK))) { 8529492750aSDarrick J. Wong spin_unlock(&ip->i_flags_lock); 8539492750aSDarrick J. Wong return false; 8549492750aSDarrick J. Wong } 8559492750aSDarrick J. Wong 8566d8b79cfSDave Chinner __xfs_iflags_set(ip, XFS_IRECLAIM); 8576d8b79cfSDave Chinner spin_unlock(&ip->i_flags_lock); 85850718b8dSDave Chinner return true; 8596d8b79cfSDave Chinner } 8606d8b79cfSDave Chinner 8616d8b79cfSDave Chinner /* 86202511a5aSDave Chinner * Inode reclaim is non-blocking, so the default action if progress cannot be 86302511a5aSDave Chinner * made is to "requeue" the inode for reclaim by unlocking it and clearing the 86402511a5aSDave Chinner * XFS_IRECLAIM flag. If we are in a shutdown state, we don't care about 86502511a5aSDave Chinner * blocking anymore and hence we can wait for the inode to be able to reclaim 86602511a5aSDave Chinner * it. 8676d8b79cfSDave Chinner * 86802511a5aSDave Chinner * We do no IO here - if callers require inodes to be cleaned they must push the 86902511a5aSDave Chinner * AIL first to trigger writeback of dirty inodes. This enables writeback to be 87002511a5aSDave Chinner * done in the background in a non-blocking manner, and enables memory reclaim 87102511a5aSDave Chinner * to make progress without blocking. 8726d8b79cfSDave Chinner */ 8734d0bab3aSDave Chinner static void 8746d8b79cfSDave Chinner xfs_reclaim_inode( 8756d8b79cfSDave Chinner struct xfs_inode *ip, 87650718b8dSDave Chinner struct xfs_perag *pag) 8776d8b79cfSDave Chinner { 8788a17d7ddSDave Chinner xfs_ino_t ino = ip->i_ino; /* for radix_tree_delete */ 8796d8b79cfSDave Chinner 8809552e14dSDave Chinner if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) 8816d8b79cfSDave Chinner goto out; 882718ecc50SDave Chinner if (xfs_iflags_test_and_set(ip, XFS_IFLUSHING)) 8839552e14dSDave Chinner goto out_iunlock; 8846d8b79cfSDave Chinner 88501728b44SDave Chinner /* 88601728b44SDave Chinner * Check for log shutdown because aborting the inode can move the log 88701728b44SDave Chinner * tail and corrupt in memory state. This is fine if the log is shut 88801728b44SDave Chinner * down, but if the log is still active and only the mount is shut down 88901728b44SDave Chinner * then the in-memory log tail movement caused by the abort can be 89001728b44SDave Chinner * incorrectly propagated to disk. 89101728b44SDave Chinner */ 89201728b44SDave Chinner if (xlog_is_shutdown(ip->i_mount->m_log)) { 8936d8b79cfSDave Chinner xfs_iunpin_wait(ip); 894d2d7c047SDave Chinner xfs_iflush_shutdown_abort(ip); 8956d8b79cfSDave Chinner goto reclaim; 8966d8b79cfSDave Chinner } 897617825feSDave Chinner if (xfs_ipincount(ip)) 898718ecc50SDave Chinner goto out_clear_flush; 899617825feSDave Chinner if (!xfs_inode_clean(ip)) 900718ecc50SDave Chinner goto out_clear_flush; 901617825feSDave Chinner 902718ecc50SDave Chinner xfs_iflags_clear(ip, XFS_IFLUSHING); 9036d8b79cfSDave Chinner reclaim: 904ab23a776SDave Chinner trace_xfs_inode_reclaiming(ip); 90598efe8afSBrian Foster 9068a17d7ddSDave Chinner /* 9078a17d7ddSDave Chinner * Because we use RCU freeing we need to ensure the inode always appears 9088a17d7ddSDave Chinner * to be reclaimed with an invalid inode number when in the free state. 90998efe8afSBrian Foster * We do this as early as possible under the ILOCK so that 910f2e9ad21SOmar Sandoval * xfs_iflush_cluster() and xfs_ifree_cluster() can be guaranteed to 911f2e9ad21SOmar Sandoval * detect races with us here. By doing this, we guarantee that once 912f2e9ad21SOmar Sandoval * xfs_iflush_cluster() or xfs_ifree_cluster() has locked XFS_ILOCK that 913f2e9ad21SOmar Sandoval * it will see either a valid inode that will serialise correctly, or it 914f2e9ad21SOmar Sandoval * will see an invalid inode that it can skip. 9158a17d7ddSDave Chinner */ 9168a17d7ddSDave Chinner spin_lock(&ip->i_flags_lock); 9178a17d7ddSDave Chinner ip->i_flags = XFS_IRECLAIM; 9188a17d7ddSDave Chinner ip->i_ino = 0; 919255794c7SDarrick J. Wong ip->i_sick = 0; 920255794c7SDarrick J. Wong ip->i_checked = 0; 9218a17d7ddSDave Chinner spin_unlock(&ip->i_flags_lock); 9228a17d7ddSDave Chinner 923fad743d7SDave Chinner ASSERT(!ip->i_itemp || ip->i_itemp->ili_item.li_buf == NULL); 9246d8b79cfSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 9256d8b79cfSDave Chinner 926ff6d6af2SBill O'Donnell XFS_STATS_INC(ip->i_mount, xs_ig_reclaims); 9276d8b79cfSDave Chinner /* 9286d8b79cfSDave Chinner * Remove the inode from the per-AG radix tree. 9296d8b79cfSDave Chinner * 9306d8b79cfSDave Chinner * Because radix_tree_delete won't complain even if the item was never 9316d8b79cfSDave Chinner * added to the tree assert that it's been there before to catch 9326d8b79cfSDave Chinner * problems with the inode life time early on. 9336d8b79cfSDave Chinner */ 9346d8b79cfSDave Chinner spin_lock(&pag->pag_ici_lock); 9356d8b79cfSDave Chinner if (!radix_tree_delete(&pag->pag_ici_root, 9368a17d7ddSDave Chinner XFS_INO_TO_AGINO(ip->i_mount, ino))) 9376d8b79cfSDave Chinner ASSERT(0); 938c076ae7aSDarrick J. Wong xfs_perag_clear_inode_tag(pag, NULLAGINO, XFS_ICI_RECLAIM_TAG); 9396d8b79cfSDave Chinner spin_unlock(&pag->pag_ici_lock); 9406d8b79cfSDave Chinner 9416d8b79cfSDave Chinner /* 9426d8b79cfSDave Chinner * Here we do an (almost) spurious inode lock in order to coordinate 9436d8b79cfSDave Chinner * with inode cache radix tree lookups. This is because the lookup 9446d8b79cfSDave Chinner * can reference the inodes in the cache without taking references. 9456d8b79cfSDave Chinner * 9466d8b79cfSDave Chinner * We make that OK here by ensuring that we wait until the inode is 9476d8b79cfSDave Chinner * unlocked after the lookup before we go ahead and free it. 9486d8b79cfSDave Chinner */ 9496d8b79cfSDave Chinner xfs_ilock(ip, XFS_ILOCK_EXCL); 9503ea06d73SDarrick J. Wong ASSERT(!ip->i_udquot && !ip->i_gdquot && !ip->i_pdquot); 9516d8b79cfSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 95296355d5aSDave Chinner ASSERT(xfs_inode_clean(ip)); 9536d8b79cfSDave Chinner 9548a17d7ddSDave Chinner __xfs_inode_free(ip); 9554d0bab3aSDave Chinner return; 9566d8b79cfSDave Chinner 957718ecc50SDave Chinner out_clear_flush: 958718ecc50SDave Chinner xfs_iflags_clear(ip, XFS_IFLUSHING); 9599552e14dSDave Chinner out_iunlock: 9606d8b79cfSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 9619552e14dSDave Chinner out: 962617825feSDave Chinner xfs_iflags_clear(ip, XFS_IRECLAIM); 9636d8b79cfSDave Chinner } 9646d8b79cfSDave Chinner 9659492750aSDarrick J. Wong /* Reclaim sick inodes if we're unmounting or the fs went down. */ 9669492750aSDarrick J. Wong static inline bool 9679492750aSDarrick J. Wong xfs_want_reclaim_sick( 9689492750aSDarrick J. Wong struct xfs_mount *mp) 9699492750aSDarrick J. Wong { 9702e973b2cSDave Chinner return xfs_is_unmounting(mp) || xfs_has_norecovery(mp) || 97175c8c50fSDave Chinner xfs_is_shutdown(mp); 9729492750aSDarrick J. Wong } 9739492750aSDarrick J. Wong 9744d0bab3aSDave Chinner void 9756d8b79cfSDave Chinner xfs_reclaim_inodes( 9764d0bab3aSDave Chinner struct xfs_mount *mp) 9776d8b79cfSDave Chinner { 978b26b2bf1SDarrick J. Wong struct xfs_icwalk icw = { 979b26b2bf1SDarrick J. Wong .icw_flags = 0, 9809492750aSDarrick J. Wong }; 9819492750aSDarrick J. Wong 9829492750aSDarrick J. Wong if (xfs_want_reclaim_sick(mp)) 983b26b2bf1SDarrick J. Wong icw.icw_flags |= XFS_ICWALK_FLAG_RECLAIM_SICK; 9849492750aSDarrick J. Wong 9854d0bab3aSDave Chinner while (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { 986617825feSDave Chinner xfs_ail_push_all_sync(mp->m_ail); 987b26b2bf1SDarrick J. Wong xfs_icwalk(mp, XFS_ICWALK_RECLAIM, &icw); 9880f4ec0f1SZheng Bin } 9896d8b79cfSDave Chinner } 9906d8b79cfSDave Chinner 9916d8b79cfSDave Chinner /* 99202511a5aSDave Chinner * The shrinker infrastructure determines how many inodes we should scan for 99302511a5aSDave Chinner * reclaim. We want as many clean inodes ready to reclaim as possible, so we 99402511a5aSDave Chinner * push the AIL here. We also want to proactively free up memory if we can to 99502511a5aSDave Chinner * minimise the amount of work memory reclaim has to do so we kick the 99602511a5aSDave Chinner * background reclaim if it isn't already scheduled. 9976d8b79cfSDave Chinner */ 9980a234c6dSDave Chinner long 9996d8b79cfSDave Chinner xfs_reclaim_inodes_nr( 10006d8b79cfSDave Chinner struct xfs_mount *mp, 100110be350bSDarrick J. Wong unsigned long nr_to_scan) 10026d8b79cfSDave Chinner { 1003b26b2bf1SDarrick J. Wong struct xfs_icwalk icw = { 1004b26b2bf1SDarrick J. Wong .icw_flags = XFS_ICWALK_FLAG_SCAN_LIMIT, 100510be350bSDarrick J. Wong .icw_scan_limit = min_t(unsigned long, LONG_MAX, nr_to_scan), 1006f1bc5c56SDarrick J. Wong }; 1007f1bc5c56SDarrick J. Wong 10089492750aSDarrick J. Wong if (xfs_want_reclaim_sick(mp)) 1009b26b2bf1SDarrick J. Wong icw.icw_flags |= XFS_ICWALK_FLAG_RECLAIM_SICK; 10109492750aSDarrick J. Wong 10116d8b79cfSDave Chinner /* kick background reclaimer and push the AIL */ 10126d8b79cfSDave Chinner xfs_reclaim_work_queue(mp); 10136d8b79cfSDave Chinner xfs_ail_push_all(mp->m_ail); 10146d8b79cfSDave Chinner 1015b26b2bf1SDarrick J. Wong xfs_icwalk(mp, XFS_ICWALK_RECLAIM, &icw); 1016617825feSDave Chinner return 0; 10176d8b79cfSDave Chinner } 10186d8b79cfSDave Chinner 10196d8b79cfSDave Chinner /* 10206d8b79cfSDave Chinner * Return the number of reclaimable inodes in the filesystem for 10216d8b79cfSDave Chinner * the shrinker to determine how much to reclaim. 10226d8b79cfSDave Chinner */ 102310be350bSDarrick J. Wong long 10246d8b79cfSDave Chinner xfs_reclaim_inodes_count( 10256d8b79cfSDave Chinner struct xfs_mount *mp) 10266d8b79cfSDave Chinner { 10276d8b79cfSDave Chinner struct xfs_perag *pag; 10286d8b79cfSDave Chinner xfs_agnumber_t ag = 0; 102910be350bSDarrick J. Wong long reclaimable = 0; 10306d8b79cfSDave Chinner 10316d8b79cfSDave Chinner while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { 10326d8b79cfSDave Chinner ag = pag->pag_agno + 1; 10336d8b79cfSDave Chinner reclaimable += pag->pag_ici_reclaimable; 10346d8b79cfSDave Chinner xfs_perag_put(pag); 10356d8b79cfSDave Chinner } 10366d8b79cfSDave Chinner return reclaimable; 10376d8b79cfSDave Chinner } 10386d8b79cfSDave Chinner 103939b1cfd7SDarrick J. Wong STATIC bool 1040b26b2bf1SDarrick J. Wong xfs_icwalk_match_id( 10413e3f9f58SBrian Foster struct xfs_inode *ip, 1042b26b2bf1SDarrick J. Wong struct xfs_icwalk *icw) 10433e3f9f58SBrian Foster { 1044b26b2bf1SDarrick J. Wong if ((icw->icw_flags & XFS_ICWALK_FLAG_UID) && 1045b26b2bf1SDarrick J. Wong !uid_eq(VFS_I(ip)->i_uid, icw->icw_uid)) 104639b1cfd7SDarrick J. Wong return false; 10471b556048SBrian Foster 1048b26b2bf1SDarrick J. Wong if ((icw->icw_flags & XFS_ICWALK_FLAG_GID) && 1049b26b2bf1SDarrick J. Wong !gid_eq(VFS_I(ip)->i_gid, icw->icw_gid)) 105039b1cfd7SDarrick J. Wong return false; 10511b556048SBrian Foster 1052b26b2bf1SDarrick J. Wong if ((icw->icw_flags & XFS_ICWALK_FLAG_PRID) && 1053b26b2bf1SDarrick J. Wong ip->i_projid != icw->icw_prid) 105439b1cfd7SDarrick J. Wong return false; 10551b556048SBrian Foster 105639b1cfd7SDarrick J. Wong return true; 10573e3f9f58SBrian Foster } 10583e3f9f58SBrian Foster 1059f4526397SBrian Foster /* 1060f4526397SBrian Foster * A union-based inode filtering algorithm. Process the inode if any of the 1061f4526397SBrian Foster * criteria match. This is for global/internal scans only. 1062f4526397SBrian Foster */ 106339b1cfd7SDarrick J. Wong STATIC bool 1064b26b2bf1SDarrick J. Wong xfs_icwalk_match_id_union( 1065f4526397SBrian Foster struct xfs_inode *ip, 1066b26b2bf1SDarrick J. Wong struct xfs_icwalk *icw) 1067f4526397SBrian Foster { 1068b26b2bf1SDarrick J. Wong if ((icw->icw_flags & XFS_ICWALK_FLAG_UID) && 1069b26b2bf1SDarrick J. Wong uid_eq(VFS_I(ip)->i_uid, icw->icw_uid)) 107039b1cfd7SDarrick J. Wong return true; 1071f4526397SBrian Foster 1072b26b2bf1SDarrick J. Wong if ((icw->icw_flags & XFS_ICWALK_FLAG_GID) && 1073b26b2bf1SDarrick J. Wong gid_eq(VFS_I(ip)->i_gid, icw->icw_gid)) 107439b1cfd7SDarrick J. Wong return true; 1075f4526397SBrian Foster 1076b26b2bf1SDarrick J. Wong if ((icw->icw_flags & XFS_ICWALK_FLAG_PRID) && 1077b26b2bf1SDarrick J. Wong ip->i_projid == icw->icw_prid) 107839b1cfd7SDarrick J. Wong return true; 1079f4526397SBrian Foster 108039b1cfd7SDarrick J. Wong return false; 1081f4526397SBrian Foster } 1082f4526397SBrian Foster 1083a91bf992SDarrick J. Wong /* 1084a91bf992SDarrick J. Wong * Is this inode @ip eligible for eof/cow block reclamation, given some 1085b26b2bf1SDarrick J. Wong * filtering parameters @icw? The inode is eligible if @icw is null or 1086a91bf992SDarrick J. Wong * if the predicate functions match. 1087a91bf992SDarrick J. Wong */ 1088a91bf992SDarrick J. Wong static bool 1089b26b2bf1SDarrick J. Wong xfs_icwalk_match( 1090a91bf992SDarrick J. Wong struct xfs_inode *ip, 1091b26b2bf1SDarrick J. Wong struct xfs_icwalk *icw) 1092a91bf992SDarrick J. Wong { 109339b1cfd7SDarrick J. Wong bool match; 1094a91bf992SDarrick J. Wong 1095b26b2bf1SDarrick J. Wong if (!icw) 1096a91bf992SDarrick J. Wong return true; 1097a91bf992SDarrick J. Wong 1098b26b2bf1SDarrick J. Wong if (icw->icw_flags & XFS_ICWALK_FLAG_UNION) 1099b26b2bf1SDarrick J. Wong match = xfs_icwalk_match_id_union(ip, icw); 1100a91bf992SDarrick J. Wong else 1101b26b2bf1SDarrick J. Wong match = xfs_icwalk_match_id(ip, icw); 1102a91bf992SDarrick J. Wong if (!match) 1103a91bf992SDarrick J. Wong return false; 1104a91bf992SDarrick J. Wong 1105a91bf992SDarrick J. Wong /* skip the inode if the file size is too small */ 1106b26b2bf1SDarrick J. Wong if ((icw->icw_flags & XFS_ICWALK_FLAG_MINFILESIZE) && 1107b26b2bf1SDarrick J. Wong XFS_ISIZE(ip) < icw->icw_min_file_size) 1108a91bf992SDarrick J. Wong return false; 1109a91bf992SDarrick J. Wong 1110a91bf992SDarrick J. Wong return true; 1111a91bf992SDarrick J. Wong } 1112a91bf992SDarrick J. Wong 11134d0bab3aSDave Chinner /* 11144d0bab3aSDave Chinner * This is a fast pass over the inode cache to try to get reclaim moving on as 11154d0bab3aSDave Chinner * many inodes as possible in a short period of time. It kicks itself every few 11164d0bab3aSDave Chinner * seconds, as well as being kicked by the inode cache shrinker when memory 111702511a5aSDave Chinner * goes low. 11184d0bab3aSDave Chinner */ 11194d0bab3aSDave Chinner void 11204d0bab3aSDave Chinner xfs_reclaim_worker( 11214d0bab3aSDave Chinner struct work_struct *work) 11224d0bab3aSDave Chinner { 11234d0bab3aSDave Chinner struct xfs_mount *mp = container_of(to_delayed_work(work), 11244d0bab3aSDave Chinner struct xfs_mount, m_reclaim_work); 11254d0bab3aSDave Chinner 1126f1bc5c56SDarrick J. Wong xfs_icwalk(mp, XFS_ICWALK_RECLAIM, NULL); 11274d0bab3aSDave Chinner xfs_reclaim_work_queue(mp); 11284d0bab3aSDave Chinner } 11294d0bab3aSDave Chinner 11303e3f9f58SBrian Foster STATIC int 113141176a68SBrian Foster xfs_inode_free_eofblocks( 113241176a68SBrian Foster struct xfs_inode *ip, 1133b26b2bf1SDarrick J. Wong struct xfs_icwalk *icw, 11340fa4a10aSDarrick J. Wong unsigned int *lockflags) 113541176a68SBrian Foster { 1136390600f8SDarrick J. Wong bool wait; 1137390600f8SDarrick J. Wong 1138b26b2bf1SDarrick J. Wong wait = icw && (icw->icw_flags & XFS_ICWALK_FLAG_SYNC); 11395400da7dSBrian Foster 1140ce2d3bbeSDarrick J. Wong if (!xfs_iflags_test(ip, XFS_IEOFBLOCKS)) 1141ce2d3bbeSDarrick J. Wong return 0; 1142ce2d3bbeSDarrick J. Wong 114341176a68SBrian Foster /* 114441176a68SBrian Foster * If the mapping is dirty the operation can block and wait for some 114541176a68SBrian Foster * time. Unless we are waiting, skip it. 114641176a68SBrian Foster */ 1147390600f8SDarrick J. Wong if (!wait && mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY)) 114841176a68SBrian Foster return 0; 114941176a68SBrian Foster 1150b26b2bf1SDarrick J. Wong if (!xfs_icwalk_match(ip, icw)) 11513e3f9f58SBrian Foster return 0; 11523e3f9f58SBrian Foster 1153a36b9261SBrian Foster /* 1154a36b9261SBrian Foster * If the caller is waiting, return -EAGAIN to keep the background 1155a36b9261SBrian Foster * scanner moving and revisit the inode in a subsequent pass. 1156a36b9261SBrian Foster */ 1157c3155097SBrian Foster if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { 1158390600f8SDarrick J. Wong if (wait) 1159390600f8SDarrick J. Wong return -EAGAIN; 1160390600f8SDarrick J. Wong return 0; 1161a36b9261SBrian Foster } 11620fa4a10aSDarrick J. Wong *lockflags |= XFS_IOLOCK_EXCL; 1163390600f8SDarrick J. Wong 11642b156ff8SDarrick J. Wong if (xfs_can_free_eofblocks(ip, false)) 11650fa4a10aSDarrick J. Wong return xfs_free_eofblocks(ip); 11662b156ff8SDarrick J. Wong 11672b156ff8SDarrick J. Wong /* inode could be preallocated or append-only */ 11682b156ff8SDarrick J. Wong trace_xfs_inode_free_eofblocks_invalid(ip); 11692b156ff8SDarrick J. Wong xfs_inode_clear_eofblocks_tag(ip); 11702b156ff8SDarrick J. Wong return 0; 117141176a68SBrian Foster } 117241176a68SBrian Foster 117383104d44SDarrick J. Wong static void 1174ce2d3bbeSDarrick J. Wong xfs_blockgc_set_iflag( 1175ce2d3bbeSDarrick J. Wong struct xfs_inode *ip, 1176ce2d3bbeSDarrick J. Wong unsigned long iflag) 117727b52867SBrian Foster { 117827b52867SBrian Foster struct xfs_mount *mp = ip->i_mount; 117927b52867SBrian Foster struct xfs_perag *pag; 118027b52867SBrian Foster 1181ce2d3bbeSDarrick J. Wong ASSERT((iflag & ~(XFS_IEOFBLOCKS | XFS_ICOWBLOCKS)) == 0); 1182ce2d3bbeSDarrick J. Wong 118385a6e764SChristoph Hellwig /* 118485a6e764SChristoph Hellwig * Don't bother locking the AG and looking up in the radix trees 118585a6e764SChristoph Hellwig * if we already know that we have the tag set. 118685a6e764SChristoph Hellwig */ 1187ce2d3bbeSDarrick J. Wong if (ip->i_flags & iflag) 118885a6e764SChristoph Hellwig return; 118985a6e764SChristoph Hellwig spin_lock(&ip->i_flags_lock); 1190ce2d3bbeSDarrick J. Wong ip->i_flags |= iflag; 119185a6e764SChristoph Hellwig spin_unlock(&ip->i_flags_lock); 119285a6e764SChristoph Hellwig 119327b52867SBrian Foster pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 119427b52867SBrian Foster spin_lock(&pag->pag_ici_lock); 119527b52867SBrian Foster 1196c076ae7aSDarrick J. Wong xfs_perag_set_inode_tag(pag, XFS_INO_TO_AGINO(mp, ip->i_ino), 1197ce2d3bbeSDarrick J. Wong XFS_ICI_BLOCKGC_TAG); 119827b52867SBrian Foster 119927b52867SBrian Foster spin_unlock(&pag->pag_ici_lock); 120027b52867SBrian Foster xfs_perag_put(pag); 120127b52867SBrian Foster } 120227b52867SBrian Foster 120327b52867SBrian Foster void 120483104d44SDarrick J. Wong xfs_inode_set_eofblocks_tag( 120527b52867SBrian Foster xfs_inode_t *ip) 120627b52867SBrian Foster { 120783104d44SDarrick J. Wong trace_xfs_inode_set_eofblocks_tag(ip); 12089669f51dSDarrick J. Wong return xfs_blockgc_set_iflag(ip, XFS_IEOFBLOCKS); 120983104d44SDarrick J. Wong } 121083104d44SDarrick J. Wong 121183104d44SDarrick J. Wong static void 1212ce2d3bbeSDarrick J. Wong xfs_blockgc_clear_iflag( 1213ce2d3bbeSDarrick J. Wong struct xfs_inode *ip, 1214ce2d3bbeSDarrick J. Wong unsigned long iflag) 121583104d44SDarrick J. Wong { 121627b52867SBrian Foster struct xfs_mount *mp = ip->i_mount; 121727b52867SBrian Foster struct xfs_perag *pag; 1218ce2d3bbeSDarrick J. Wong bool clear_tag; 1219ce2d3bbeSDarrick J. Wong 1220ce2d3bbeSDarrick J. Wong ASSERT((iflag & ~(XFS_IEOFBLOCKS | XFS_ICOWBLOCKS)) == 0); 122127b52867SBrian Foster 122285a6e764SChristoph Hellwig spin_lock(&ip->i_flags_lock); 1223ce2d3bbeSDarrick J. Wong ip->i_flags &= ~iflag; 1224ce2d3bbeSDarrick J. Wong clear_tag = (ip->i_flags & (XFS_IEOFBLOCKS | XFS_ICOWBLOCKS)) == 0; 122585a6e764SChristoph Hellwig spin_unlock(&ip->i_flags_lock); 122685a6e764SChristoph Hellwig 1227ce2d3bbeSDarrick J. Wong if (!clear_tag) 1228ce2d3bbeSDarrick J. Wong return; 1229ce2d3bbeSDarrick J. Wong 123027b52867SBrian Foster pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 123127b52867SBrian Foster spin_lock(&pag->pag_ici_lock); 123227b52867SBrian Foster 1233c076ae7aSDarrick J. Wong xfs_perag_clear_inode_tag(pag, XFS_INO_TO_AGINO(mp, ip->i_ino), 1234ce2d3bbeSDarrick J. Wong XFS_ICI_BLOCKGC_TAG); 123527b52867SBrian Foster 123627b52867SBrian Foster spin_unlock(&pag->pag_ici_lock); 123727b52867SBrian Foster xfs_perag_put(pag); 123827b52867SBrian Foster } 123927b52867SBrian Foster 124083104d44SDarrick J. Wong void 124183104d44SDarrick J. Wong xfs_inode_clear_eofblocks_tag( 124283104d44SDarrick J. Wong xfs_inode_t *ip) 124383104d44SDarrick J. Wong { 124483104d44SDarrick J. Wong trace_xfs_inode_clear_eofblocks_tag(ip); 1245ce2d3bbeSDarrick J. Wong return xfs_blockgc_clear_iflag(ip, XFS_IEOFBLOCKS); 124683104d44SDarrick J. Wong } 124783104d44SDarrick J. Wong 124883104d44SDarrick J. Wong /* 1249be78ff0eSDarrick J. Wong * Set ourselves up to free CoW blocks from this file. If it's already clean 1250be78ff0eSDarrick J. Wong * then we can bail out quickly, but otherwise we must back off if the file 1251be78ff0eSDarrick J. Wong * is undergoing some kind of write. 1252be78ff0eSDarrick J. Wong */ 1253be78ff0eSDarrick J. Wong static bool 1254be78ff0eSDarrick J. Wong xfs_prep_free_cowblocks( 125551d62690SChristoph Hellwig struct xfs_inode *ip) 1256be78ff0eSDarrick J. Wong { 1257be78ff0eSDarrick J. Wong /* 1258be78ff0eSDarrick J. Wong * Just clear the tag if we have an empty cow fork or none at all. It's 1259be78ff0eSDarrick J. Wong * possible the inode was fully unshared since it was originally tagged. 1260be78ff0eSDarrick J. Wong */ 126151d62690SChristoph Hellwig if (!xfs_inode_has_cow_data(ip)) { 1262be78ff0eSDarrick J. Wong trace_xfs_inode_free_cowblocks_invalid(ip); 1263be78ff0eSDarrick J. Wong xfs_inode_clear_cowblocks_tag(ip); 1264be78ff0eSDarrick J. Wong return false; 1265be78ff0eSDarrick J. Wong } 1266be78ff0eSDarrick J. Wong 1267be78ff0eSDarrick J. Wong /* 1268be78ff0eSDarrick J. Wong * If the mapping is dirty or under writeback we cannot touch the 1269be78ff0eSDarrick J. Wong * CoW fork. Leave it alone if we're in the midst of a directio. 1270be78ff0eSDarrick J. Wong */ 1271be78ff0eSDarrick J. Wong if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) || 1272be78ff0eSDarrick J. Wong mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) || 1273be78ff0eSDarrick J. Wong mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) || 1274be78ff0eSDarrick J. Wong atomic_read(&VFS_I(ip)->i_dio_count)) 1275be78ff0eSDarrick J. Wong return false; 1276be78ff0eSDarrick J. Wong 1277be78ff0eSDarrick J. Wong return true; 1278be78ff0eSDarrick J. Wong } 1279be78ff0eSDarrick J. Wong 1280be78ff0eSDarrick J. Wong /* 128183104d44SDarrick J. Wong * Automatic CoW Reservation Freeing 128283104d44SDarrick J. Wong * 128383104d44SDarrick J. Wong * These functions automatically garbage collect leftover CoW reservations 128483104d44SDarrick J. Wong * that were made on behalf of a cowextsize hint when we start to run out 128583104d44SDarrick J. Wong * of quota or when the reservations sit around for too long. If the file 128683104d44SDarrick J. Wong * has dirty pages or is undergoing writeback, its CoW reservations will 128783104d44SDarrick J. Wong * be retained. 128883104d44SDarrick J. Wong * 128983104d44SDarrick J. Wong * The actual garbage collection piggybacks off the same code that runs 129083104d44SDarrick J. Wong * the speculative EOF preallocation garbage collector. 129183104d44SDarrick J. Wong */ 129283104d44SDarrick J. Wong STATIC int 129383104d44SDarrick J. Wong xfs_inode_free_cowblocks( 129483104d44SDarrick J. Wong struct xfs_inode *ip, 1295b26b2bf1SDarrick J. Wong struct xfs_icwalk *icw, 12960fa4a10aSDarrick J. Wong unsigned int *lockflags) 129783104d44SDarrick J. Wong { 1298f41a0716SDarrick J. Wong bool wait; 1299be78ff0eSDarrick J. Wong int ret = 0; 130083104d44SDarrick J. Wong 1301b26b2bf1SDarrick J. Wong wait = icw && (icw->icw_flags & XFS_ICWALK_FLAG_SYNC); 1302f41a0716SDarrick J. Wong 1303ce2d3bbeSDarrick J. Wong if (!xfs_iflags_test(ip, XFS_ICOWBLOCKS)) 1304ce2d3bbeSDarrick J. Wong return 0; 1305ce2d3bbeSDarrick J. Wong 130651d62690SChristoph Hellwig if (!xfs_prep_free_cowblocks(ip)) 130783104d44SDarrick J. Wong return 0; 130883104d44SDarrick J. Wong 1309b26b2bf1SDarrick J. Wong if (!xfs_icwalk_match(ip, icw)) 131083104d44SDarrick J. Wong return 0; 131183104d44SDarrick J. Wong 1312f41a0716SDarrick J. Wong /* 1313f41a0716SDarrick J. Wong * If the caller is waiting, return -EAGAIN to keep the background 1314f41a0716SDarrick J. Wong * scanner moving and revisit the inode in a subsequent pass. 1315f41a0716SDarrick J. Wong */ 13160fa4a10aSDarrick J. Wong if (!(*lockflags & XFS_IOLOCK_EXCL) && 13170fa4a10aSDarrick J. Wong !xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { 1318f41a0716SDarrick J. Wong if (wait) 1319f41a0716SDarrick J. Wong return -EAGAIN; 1320f41a0716SDarrick J. Wong return 0; 1321f41a0716SDarrick J. Wong } 13220fa4a10aSDarrick J. Wong *lockflags |= XFS_IOLOCK_EXCL; 13230fa4a10aSDarrick J. Wong 1324f41a0716SDarrick J. Wong if (!xfs_ilock_nowait(ip, XFS_MMAPLOCK_EXCL)) { 1325f41a0716SDarrick J. Wong if (wait) 13260fa4a10aSDarrick J. Wong return -EAGAIN; 13270fa4a10aSDarrick J. Wong return 0; 1328f41a0716SDarrick J. Wong } 13290fa4a10aSDarrick J. Wong *lockflags |= XFS_MMAPLOCK_EXCL; 133083104d44SDarrick J. Wong 1331be78ff0eSDarrick J. Wong /* 1332be78ff0eSDarrick J. Wong * Check again, nobody else should be able to dirty blocks or change 1333be78ff0eSDarrick J. Wong * the reflink iflag now that we have the first two locks held. 1334be78ff0eSDarrick J. Wong */ 133551d62690SChristoph Hellwig if (xfs_prep_free_cowblocks(ip)) 13363802a345SChristoph Hellwig ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); 133783104d44SDarrick J. Wong return ret; 133883104d44SDarrick J. Wong } 133983104d44SDarrick J. Wong 134083104d44SDarrick J. Wong void 134183104d44SDarrick J. Wong xfs_inode_set_cowblocks_tag( 134283104d44SDarrick J. Wong xfs_inode_t *ip) 134383104d44SDarrick J. Wong { 13447b7381f0SBrian Foster trace_xfs_inode_set_cowblocks_tag(ip); 13459669f51dSDarrick J. Wong return xfs_blockgc_set_iflag(ip, XFS_ICOWBLOCKS); 134683104d44SDarrick J. Wong } 134783104d44SDarrick J. Wong 134883104d44SDarrick J. Wong void 134983104d44SDarrick J. Wong xfs_inode_clear_cowblocks_tag( 135083104d44SDarrick J. Wong xfs_inode_t *ip) 135183104d44SDarrick J. Wong { 13527b7381f0SBrian Foster trace_xfs_inode_clear_cowblocks_tag(ip); 1353ce2d3bbeSDarrick J. Wong return xfs_blockgc_clear_iflag(ip, XFS_ICOWBLOCKS); 135483104d44SDarrick J. Wong } 1355d6b636ebSDarrick J. Wong 1356d6b636ebSDarrick J. Wong /* Disable post-EOF and CoW block auto-reclamation. */ 1357d6b636ebSDarrick J. Wong void 1358c9a6526fSDarrick J. Wong xfs_blockgc_stop( 1359d6b636ebSDarrick J. Wong struct xfs_mount *mp) 1360d6b636ebSDarrick J. Wong { 1361894ecacfSDarrick J. Wong struct xfs_perag *pag; 1362894ecacfSDarrick J. Wong xfs_agnumber_t agno; 1363894ecacfSDarrick J. Wong 13646f649091SDarrick J. Wong if (!xfs_clear_blockgc_enabled(mp)) 13656f649091SDarrick J. Wong return; 13666f649091SDarrick J. Wong 13676f649091SDarrick J. Wong for_each_perag(mp, agno, pag) 1368894ecacfSDarrick J. Wong cancel_delayed_work_sync(&pag->pag_blockgc_work); 13696f649091SDarrick J. Wong trace_xfs_blockgc_stop(mp, __return_address); 1370d6b636ebSDarrick J. Wong } 1371d6b636ebSDarrick J. Wong 1372d6b636ebSDarrick J. Wong /* Enable post-EOF and CoW block auto-reclamation. */ 1373d6b636ebSDarrick J. Wong void 1374c9a6526fSDarrick J. Wong xfs_blockgc_start( 1375d6b636ebSDarrick J. Wong struct xfs_mount *mp) 1376d6b636ebSDarrick J. Wong { 1377894ecacfSDarrick J. Wong struct xfs_perag *pag; 1378894ecacfSDarrick J. Wong xfs_agnumber_t agno; 1379894ecacfSDarrick J. Wong 13806f649091SDarrick J. Wong if (xfs_set_blockgc_enabled(mp)) 13816f649091SDarrick J. Wong return; 13826f649091SDarrick J. Wong 13836f649091SDarrick J. Wong trace_xfs_blockgc_start(mp, __return_address); 1384894ecacfSDarrick J. Wong for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG) 1385894ecacfSDarrick J. Wong xfs_blockgc_queue(pag); 1386d6b636ebSDarrick J. Wong } 13873d4feec0SDarrick J. Wong 1388d20d5edcSDarrick J. Wong /* Don't try to run block gc on an inode that's in any of these states. */ 1389d20d5edcSDarrick J. Wong #define XFS_BLOCKGC_NOGRAB_IFLAGS (XFS_INEW | \ 1390ab23a776SDave Chinner XFS_NEED_INACTIVE | \ 1391ab23a776SDave Chinner XFS_INACTIVATING | \ 1392d20d5edcSDarrick J. Wong XFS_IRECLAIMABLE | \ 1393d20d5edcSDarrick J. Wong XFS_IRECLAIM) 1394df600197SDarrick J. Wong /* 1395b9baaef4SDarrick J. Wong * Decide if the given @ip is eligible for garbage collection of speculative 1396b9baaef4SDarrick J. Wong * preallocations, and grab it if so. Returns true if it's ready to go or 1397b9baaef4SDarrick J. Wong * false if we should just ignore it. 1398df600197SDarrick J. Wong */ 1399df600197SDarrick J. Wong static bool 1400b9baaef4SDarrick J. Wong xfs_blockgc_igrab( 14017fdff526SDarrick J. Wong struct xfs_inode *ip) 1402df600197SDarrick J. Wong { 1403df600197SDarrick J. Wong struct inode *inode = VFS_I(ip); 1404df600197SDarrick J. Wong 1405df600197SDarrick J. Wong ASSERT(rcu_read_lock_held()); 1406df600197SDarrick J. Wong 1407df600197SDarrick J. Wong /* Check for stale RCU freed inode */ 1408df600197SDarrick J. Wong spin_lock(&ip->i_flags_lock); 1409df600197SDarrick J. Wong if (!ip->i_ino) 1410df600197SDarrick J. Wong goto out_unlock_noent; 1411df600197SDarrick J. Wong 1412d20d5edcSDarrick J. Wong if (ip->i_flags & XFS_BLOCKGC_NOGRAB_IFLAGS) 1413df600197SDarrick J. Wong goto out_unlock_noent; 1414df600197SDarrick J. Wong spin_unlock(&ip->i_flags_lock); 1415df600197SDarrick J. Wong 1416df600197SDarrick J. Wong /* nothing to sync during shutdown */ 141775c8c50fSDave Chinner if (xfs_is_shutdown(ip->i_mount)) 1418df600197SDarrick J. Wong return false; 1419df600197SDarrick J. Wong 1420df600197SDarrick J. Wong /* If we can't grab the inode, it must on it's way to reclaim. */ 1421df600197SDarrick J. Wong if (!igrab(inode)) 1422df600197SDarrick J. Wong return false; 1423df600197SDarrick J. Wong 1424df600197SDarrick J. Wong /* inode is valid */ 1425df600197SDarrick J. Wong return true; 1426df600197SDarrick J. Wong 1427df600197SDarrick J. Wong out_unlock_noent: 1428df600197SDarrick J. Wong spin_unlock(&ip->i_flags_lock); 1429df600197SDarrick J. Wong return false; 1430df600197SDarrick J. Wong } 1431df600197SDarrick J. Wong 143241956753SDarrick J. Wong /* Scan one incore inode for block preallocations that we can remove. */ 143341956753SDarrick J. Wong static int 143441956753SDarrick J. Wong xfs_blockgc_scan_inode( 143541956753SDarrick J. Wong struct xfs_inode *ip, 1436b26b2bf1SDarrick J. Wong struct xfs_icwalk *icw) 143785c5b270SDarrick J. Wong { 14380fa4a10aSDarrick J. Wong unsigned int lockflags = 0; 143985c5b270SDarrick J. Wong int error; 144085c5b270SDarrick J. Wong 1441b26b2bf1SDarrick J. Wong error = xfs_inode_free_eofblocks(ip, icw, &lockflags); 144285c5b270SDarrick J. Wong if (error) 14430fa4a10aSDarrick J. Wong goto unlock; 144485c5b270SDarrick J. Wong 1445b26b2bf1SDarrick J. Wong error = xfs_inode_free_cowblocks(ip, icw, &lockflags); 14460fa4a10aSDarrick J. Wong unlock: 14470fa4a10aSDarrick J. Wong if (lockflags) 14480fa4a10aSDarrick J. Wong xfs_iunlock(ip, lockflags); 1449594ab00bSDarrick J. Wong xfs_irele(ip); 145085c5b270SDarrick J. Wong return error; 145185c5b270SDarrick J. Wong } 145285c5b270SDarrick J. Wong 14539669f51dSDarrick J. Wong /* Background worker that trims preallocated space. */ 14549669f51dSDarrick J. Wong void 14559669f51dSDarrick J. Wong xfs_blockgc_worker( 14569669f51dSDarrick J. Wong struct work_struct *work) 14579669f51dSDarrick J. Wong { 1458894ecacfSDarrick J. Wong struct xfs_perag *pag = container_of(to_delayed_work(work), 1459894ecacfSDarrick J. Wong struct xfs_perag, pag_blockgc_work); 1460894ecacfSDarrick J. Wong struct xfs_mount *mp = pag->pag_mount; 14619669f51dSDarrick J. Wong int error; 14629669f51dSDarrick J. Wong 14636f649091SDarrick J. Wong trace_xfs_blockgc_worker(mp, __return_address); 14646f649091SDarrick J. Wong 1465f427cf5cSDarrick J. Wong error = xfs_icwalk_ag(pag, XFS_ICWALK_BLOCKGC, NULL); 14669669f51dSDarrick J. Wong if (error) 1467894ecacfSDarrick J. Wong xfs_info(mp, "AG %u preallocation gc worker failed, err=%d", 1468894ecacfSDarrick J. Wong pag->pag_agno, error); 1469894ecacfSDarrick J. Wong xfs_blockgc_queue(pag); 14709669f51dSDarrick J. Wong } 14719669f51dSDarrick J. Wong 147285c5b270SDarrick J. Wong /* 14732eb66502SDarrick J. Wong * Try to free space in the filesystem by purging inactive inodes, eofblocks 14742eb66502SDarrick J. Wong * and cowblocks. 147585c5b270SDarrick J. Wong */ 147685c5b270SDarrick J. Wong int 147785c5b270SDarrick J. Wong xfs_blockgc_free_space( 147885c5b270SDarrick J. Wong struct xfs_mount *mp, 1479b26b2bf1SDarrick J. Wong struct xfs_icwalk *icw) 148085c5b270SDarrick J. Wong { 14812eb66502SDarrick J. Wong int error; 14822eb66502SDarrick J. Wong 1483b26b2bf1SDarrick J. Wong trace_xfs_blockgc_free_space(mp, icw, _RET_IP_); 148485c5b270SDarrick J. Wong 14852eb66502SDarrick J. Wong error = xfs_icwalk(mp, XFS_ICWALK_BLOCKGC, icw); 14862eb66502SDarrick J. Wong if (error) 14872eb66502SDarrick J. Wong return error; 14882eb66502SDarrick J. Wong 14892eb66502SDarrick J. Wong xfs_inodegc_flush(mp); 14902eb66502SDarrick J. Wong return 0; 149185c5b270SDarrick J. Wong } 149285c5b270SDarrick J. Wong 14933d4feec0SDarrick J. Wong /* 1494e8d04c2aSDarrick J. Wong * Reclaim all the free space that we can by scheduling the background blockgc 1495e8d04c2aSDarrick J. Wong * and inodegc workers immediately and waiting for them all to clear. 1496e8d04c2aSDarrick J. Wong */ 1497e8d04c2aSDarrick J. Wong void 1498e8d04c2aSDarrick J. Wong xfs_blockgc_flush_all( 1499e8d04c2aSDarrick J. Wong struct xfs_mount *mp) 1500e8d04c2aSDarrick J. Wong { 1501e8d04c2aSDarrick J. Wong struct xfs_perag *pag; 1502e8d04c2aSDarrick J. Wong xfs_agnumber_t agno; 1503e8d04c2aSDarrick J. Wong 1504e8d04c2aSDarrick J. Wong trace_xfs_blockgc_flush_all(mp, __return_address); 1505e8d04c2aSDarrick J. Wong 1506e8d04c2aSDarrick J. Wong /* 1507e8d04c2aSDarrick J. Wong * For each blockgc worker, move its queue time up to now. If it 1508e8d04c2aSDarrick J. Wong * wasn't queued, it will not be requeued. Then flush whatever's 1509e8d04c2aSDarrick J. Wong * left. 1510e8d04c2aSDarrick J. Wong */ 1511e8d04c2aSDarrick J. Wong for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG) 1512e8d04c2aSDarrick J. Wong mod_delayed_work(pag->pag_mount->m_blockgc_wq, 1513e8d04c2aSDarrick J. Wong &pag->pag_blockgc_work, 0); 1514e8d04c2aSDarrick J. Wong 1515e8d04c2aSDarrick J. Wong for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG) 1516e8d04c2aSDarrick J. Wong flush_delayed_work(&pag->pag_blockgc_work); 1517e8d04c2aSDarrick J. Wong 1518e8d04c2aSDarrick J. Wong xfs_inodegc_flush(mp); 1519e8d04c2aSDarrick J. Wong } 1520e8d04c2aSDarrick J. Wong 1521e8d04c2aSDarrick J. Wong /* 1522c237dd7cSDarrick J. Wong * Run cow/eofblocks scans on the supplied dquots. We don't know exactly which 1523c237dd7cSDarrick J. Wong * quota caused an allocation failure, so we make a best effort by including 1524c237dd7cSDarrick J. Wong * each quota under low free space conditions (less than 1% free space) in the 1525c237dd7cSDarrick J. Wong * scan. 1526111068f8SDarrick J. Wong * 1527111068f8SDarrick J. Wong * Callers must not hold any inode's ILOCK. If requesting a synchronous scan 15282d53f66bSDarrick J. Wong * (XFS_ICWALK_FLAG_SYNC), the caller also must not hold any inode's IOLOCK or 1529111068f8SDarrick J. Wong * MMAPLOCK. 15303d4feec0SDarrick J. Wong */ 1531111068f8SDarrick J. Wong int 1532c237dd7cSDarrick J. Wong xfs_blockgc_free_dquots( 1533c237dd7cSDarrick J. Wong struct xfs_mount *mp, 1534c237dd7cSDarrick J. Wong struct xfs_dquot *udqp, 1535c237dd7cSDarrick J. Wong struct xfs_dquot *gdqp, 1536c237dd7cSDarrick J. Wong struct xfs_dquot *pdqp, 15372d53f66bSDarrick J. Wong unsigned int iwalk_flags) 15383d4feec0SDarrick J. Wong { 1539b26b2bf1SDarrick J. Wong struct xfs_icwalk icw = {0}; 15403d4feec0SDarrick J. Wong bool do_work = false; 15413d4feec0SDarrick J. Wong 1542c237dd7cSDarrick J. Wong if (!udqp && !gdqp && !pdqp) 1543c237dd7cSDarrick J. Wong return 0; 1544c237dd7cSDarrick J. Wong 15453d4feec0SDarrick J. Wong /* 1546111068f8SDarrick J. Wong * Run a scan to free blocks using the union filter to cover all 1547111068f8SDarrick J. Wong * applicable quotas in a single scan. 15483d4feec0SDarrick J. Wong */ 1549b26b2bf1SDarrick J. Wong icw.icw_flags = XFS_ICWALK_FLAG_UNION | iwalk_flags; 15503d4feec0SDarrick J. Wong 1551c237dd7cSDarrick J. Wong if (XFS_IS_UQUOTA_ENFORCED(mp) && udqp && xfs_dquot_lowsp(udqp)) { 1552b26b2bf1SDarrick J. Wong icw.icw_uid = make_kuid(mp->m_super->s_user_ns, udqp->q_id); 1553b26b2bf1SDarrick J. Wong icw.icw_flags |= XFS_ICWALK_FLAG_UID; 15543d4feec0SDarrick J. Wong do_work = true; 15553d4feec0SDarrick J. Wong } 15563d4feec0SDarrick J. Wong 1557c237dd7cSDarrick J. Wong if (XFS_IS_UQUOTA_ENFORCED(mp) && gdqp && xfs_dquot_lowsp(gdqp)) { 1558b26b2bf1SDarrick J. Wong icw.icw_gid = make_kgid(mp->m_super->s_user_ns, gdqp->q_id); 1559b26b2bf1SDarrick J. Wong icw.icw_flags |= XFS_ICWALK_FLAG_GID; 15603d4feec0SDarrick J. Wong do_work = true; 15613d4feec0SDarrick J. Wong } 15623d4feec0SDarrick J. Wong 1563c237dd7cSDarrick J. Wong if (XFS_IS_PQUOTA_ENFORCED(mp) && pdqp && xfs_dquot_lowsp(pdqp)) { 1564b26b2bf1SDarrick J. Wong icw.icw_prid = pdqp->q_id; 1565b26b2bf1SDarrick J. Wong icw.icw_flags |= XFS_ICWALK_FLAG_PRID; 15663d4feec0SDarrick J. Wong do_work = true; 15673d4feec0SDarrick J. Wong } 15683d4feec0SDarrick J. Wong 15693d4feec0SDarrick J. Wong if (!do_work) 1570111068f8SDarrick J. Wong return 0; 15713d4feec0SDarrick J. Wong 1572b26b2bf1SDarrick J. Wong return xfs_blockgc_free_space(mp, &icw); 1573c237dd7cSDarrick J. Wong } 1574c237dd7cSDarrick J. Wong 1575c237dd7cSDarrick J. Wong /* Run cow/eofblocks scans on the quotas attached to the inode. */ 1576c237dd7cSDarrick J. Wong int 1577c237dd7cSDarrick J. Wong xfs_blockgc_free_quota( 1578c237dd7cSDarrick J. Wong struct xfs_inode *ip, 15792d53f66bSDarrick J. Wong unsigned int iwalk_flags) 1580c237dd7cSDarrick J. Wong { 1581c237dd7cSDarrick J. Wong return xfs_blockgc_free_dquots(ip->i_mount, 1582c237dd7cSDarrick J. Wong xfs_inode_dquot(ip, XFS_DQTYPE_USER), 1583c237dd7cSDarrick J. Wong xfs_inode_dquot(ip, XFS_DQTYPE_GROUP), 15842d53f66bSDarrick J. Wong xfs_inode_dquot(ip, XFS_DQTYPE_PROJ), iwalk_flags); 15853d4feec0SDarrick J. Wong } 1586df600197SDarrick J. Wong 1587df600197SDarrick J. Wong /* XFS Inode Cache Walking Code */ 1588df600197SDarrick J. Wong 1589df600197SDarrick J. Wong /* 1590f1bc5c56SDarrick J. Wong * The inode lookup is done in batches to keep the amount of lock traffic and 1591f1bc5c56SDarrick J. Wong * radix tree lookups to a minimum. The batch size is a trade off between 1592f1bc5c56SDarrick J. Wong * lookup reduction and stack usage. This is in the reclaim path, so we can't 1593f1bc5c56SDarrick J. Wong * be too greedy. 1594f1bc5c56SDarrick J. Wong */ 1595f1bc5c56SDarrick J. Wong #define XFS_LOOKUP_BATCH 32 1596f1bc5c56SDarrick J. Wong 1597f1bc5c56SDarrick J. Wong 1598f1bc5c56SDarrick J. Wong /* 1599b9baaef4SDarrick J. Wong * Decide if we want to grab this inode in anticipation of doing work towards 1600594ab00bSDarrick J. Wong * the goal. 1601b9baaef4SDarrick J. Wong */ 1602b9baaef4SDarrick J. Wong static inline bool 1603b9baaef4SDarrick J. Wong xfs_icwalk_igrab( 1604b9baaef4SDarrick J. Wong enum xfs_icwalk_goal goal, 16059492750aSDarrick J. Wong struct xfs_inode *ip, 1606b26b2bf1SDarrick J. Wong struct xfs_icwalk *icw) 1607b9baaef4SDarrick J. Wong { 1608b9baaef4SDarrick J. Wong switch (goal) { 1609b9baaef4SDarrick J. Wong case XFS_ICWALK_BLOCKGC: 16107fdff526SDarrick J. Wong return xfs_blockgc_igrab(ip); 1611f1bc5c56SDarrick J. Wong case XFS_ICWALK_RECLAIM: 1612b26b2bf1SDarrick J. Wong return xfs_reclaim_igrab(ip, icw); 1613b9baaef4SDarrick J. Wong default: 1614b9baaef4SDarrick J. Wong return false; 1615b9baaef4SDarrick J. Wong } 1616b9baaef4SDarrick J. Wong } 1617b9baaef4SDarrick J. Wong 1618594ab00bSDarrick J. Wong /* 1619594ab00bSDarrick J. Wong * Process an inode. Each processing function must handle any state changes 1620594ab00bSDarrick J. Wong * made by the icwalk igrab function. Return -EAGAIN to skip an inode. 1621594ab00bSDarrick J. Wong */ 1622f427cf5cSDarrick J. Wong static inline int 1623f427cf5cSDarrick J. Wong xfs_icwalk_process_inode( 1624f427cf5cSDarrick J. Wong enum xfs_icwalk_goal goal, 1625f427cf5cSDarrick J. Wong struct xfs_inode *ip, 1626f1bc5c56SDarrick J. Wong struct xfs_perag *pag, 1627b26b2bf1SDarrick J. Wong struct xfs_icwalk *icw) 1628f427cf5cSDarrick J. Wong { 1629594ab00bSDarrick J. Wong int error = 0; 1630f427cf5cSDarrick J. Wong 1631f427cf5cSDarrick J. Wong switch (goal) { 1632f427cf5cSDarrick J. Wong case XFS_ICWALK_BLOCKGC: 1633b26b2bf1SDarrick J. Wong error = xfs_blockgc_scan_inode(ip, icw); 1634f427cf5cSDarrick J. Wong break; 1635f1bc5c56SDarrick J. Wong case XFS_ICWALK_RECLAIM: 1636f1bc5c56SDarrick J. Wong xfs_reclaim_inode(ip, pag); 1637f1bc5c56SDarrick J. Wong break; 1638f427cf5cSDarrick J. Wong } 1639f427cf5cSDarrick J. Wong return error; 1640f427cf5cSDarrick J. Wong } 1641f427cf5cSDarrick J. Wong 1642b9baaef4SDarrick J. Wong /* 1643f427cf5cSDarrick J. Wong * For a given per-AG structure @pag and a goal, grab qualifying inodes and 1644f427cf5cSDarrick J. Wong * process them in some manner. 1645df600197SDarrick J. Wong */ 1646df600197SDarrick J. Wong static int 1647c1115c0cSDarrick J. Wong xfs_icwalk_ag( 1648df600197SDarrick J. Wong struct xfs_perag *pag, 1649f427cf5cSDarrick J. Wong enum xfs_icwalk_goal goal, 1650b26b2bf1SDarrick J. Wong struct xfs_icwalk *icw) 1651df600197SDarrick J. Wong { 1652df600197SDarrick J. Wong struct xfs_mount *mp = pag->pag_mount; 1653df600197SDarrick J. Wong uint32_t first_index; 1654df600197SDarrick J. Wong int last_error = 0; 1655df600197SDarrick J. Wong int skipped; 1656df600197SDarrick J. Wong bool done; 1657df600197SDarrick J. Wong int nr_found; 1658df600197SDarrick J. Wong 1659df600197SDarrick J. Wong restart: 1660df600197SDarrick J. Wong done = false; 1661df600197SDarrick J. Wong skipped = 0; 1662f1bc5c56SDarrick J. Wong if (goal == XFS_ICWALK_RECLAIM) 1663f1bc5c56SDarrick J. Wong first_index = READ_ONCE(pag->pag_ici_reclaim_cursor); 1664f1bc5c56SDarrick J. Wong else 1665df600197SDarrick J. Wong first_index = 0; 1666df600197SDarrick J. Wong nr_found = 0; 1667df600197SDarrick J. Wong do { 1668df600197SDarrick J. Wong struct xfs_inode *batch[XFS_LOOKUP_BATCH]; 1669df600197SDarrick J. Wong int error = 0; 1670df600197SDarrick J. Wong int i; 1671df600197SDarrick J. Wong 1672df600197SDarrick J. Wong rcu_read_lock(); 1673df600197SDarrick J. Wong 1674a437b9b4SChristoph Hellwig nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root, 1675df600197SDarrick J. Wong (void **) batch, first_index, 1676a437b9b4SChristoph Hellwig XFS_LOOKUP_BATCH, goal); 1677df600197SDarrick J. Wong if (!nr_found) { 1678f1bc5c56SDarrick J. Wong done = true; 1679df600197SDarrick J. Wong rcu_read_unlock(); 1680df600197SDarrick J. Wong break; 1681df600197SDarrick J. Wong } 1682df600197SDarrick J. Wong 1683df600197SDarrick J. Wong /* 1684df600197SDarrick J. Wong * Grab the inodes before we drop the lock. if we found 1685df600197SDarrick J. Wong * nothing, nr == 0 and the loop will be skipped. 1686df600197SDarrick J. Wong */ 1687df600197SDarrick J. Wong for (i = 0; i < nr_found; i++) { 1688df600197SDarrick J. Wong struct xfs_inode *ip = batch[i]; 1689df600197SDarrick J. Wong 1690b26b2bf1SDarrick J. Wong if (done || !xfs_icwalk_igrab(goal, ip, icw)) 1691df600197SDarrick J. Wong batch[i] = NULL; 1692df600197SDarrick J. Wong 1693df600197SDarrick J. Wong /* 1694df600197SDarrick J. Wong * Update the index for the next lookup. Catch 1695df600197SDarrick J. Wong * overflows into the next AG range which can occur if 1696df600197SDarrick J. Wong * we have inodes in the last block of the AG and we 1697df600197SDarrick J. Wong * are currently pointing to the last inode. 1698df600197SDarrick J. Wong * 1699df600197SDarrick J. Wong * Because we may see inodes that are from the wrong AG 1700df600197SDarrick J. Wong * due to RCU freeing and reallocation, only update the 1701df600197SDarrick J. Wong * index if it lies in this AG. It was a race that lead 1702df600197SDarrick J. Wong * us to see this inode, so another lookup from the 1703df600197SDarrick J. Wong * same index will not find it again. 1704df600197SDarrick J. Wong */ 1705df600197SDarrick J. Wong if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno) 1706df600197SDarrick J. Wong continue; 1707df600197SDarrick J. Wong first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 1708df600197SDarrick J. Wong if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 1709df600197SDarrick J. Wong done = true; 1710df600197SDarrick J. Wong } 1711df600197SDarrick J. Wong 1712df600197SDarrick J. Wong /* unlock now we've grabbed the inodes. */ 1713df600197SDarrick J. Wong rcu_read_unlock(); 1714df600197SDarrick J. Wong 1715df600197SDarrick J. Wong for (i = 0; i < nr_found; i++) { 1716df600197SDarrick J. Wong if (!batch[i]) 1717df600197SDarrick J. Wong continue; 1718f1bc5c56SDarrick J. Wong error = xfs_icwalk_process_inode(goal, batch[i], pag, 1719b26b2bf1SDarrick J. Wong icw); 1720df600197SDarrick J. Wong if (error == -EAGAIN) { 1721df600197SDarrick J. Wong skipped++; 1722df600197SDarrick J. Wong continue; 1723df600197SDarrick J. Wong } 1724df600197SDarrick J. Wong if (error && last_error != -EFSCORRUPTED) 1725df600197SDarrick J. Wong last_error = error; 1726df600197SDarrick J. Wong } 1727df600197SDarrick J. Wong 1728df600197SDarrick J. Wong /* bail out if the filesystem is corrupted. */ 1729df600197SDarrick J. Wong if (error == -EFSCORRUPTED) 1730df600197SDarrick J. Wong break; 1731df600197SDarrick J. Wong 1732df600197SDarrick J. Wong cond_resched(); 1733df600197SDarrick J. Wong 1734b26b2bf1SDarrick J. Wong if (icw && (icw->icw_flags & XFS_ICWALK_FLAG_SCAN_LIMIT)) { 1735b26b2bf1SDarrick J. Wong icw->icw_scan_limit -= XFS_LOOKUP_BATCH; 1736b26b2bf1SDarrick J. Wong if (icw->icw_scan_limit <= 0) 1737f1bc5c56SDarrick J. Wong break; 1738f1bc5c56SDarrick J. Wong } 1739df600197SDarrick J. Wong } while (nr_found && !done); 1740df600197SDarrick J. Wong 1741f1bc5c56SDarrick J. Wong if (goal == XFS_ICWALK_RECLAIM) { 1742f1bc5c56SDarrick J. Wong if (done) 1743f1bc5c56SDarrick J. Wong first_index = 0; 1744f1bc5c56SDarrick J. Wong WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index); 1745f1bc5c56SDarrick J. Wong } 1746f1bc5c56SDarrick J. Wong 1747df600197SDarrick J. Wong if (skipped) { 1748df600197SDarrick J. Wong delay(1); 1749df600197SDarrick J. Wong goto restart; 1750df600197SDarrick J. Wong } 1751df600197SDarrick J. Wong return last_error; 1752df600197SDarrick J. Wong } 1753df600197SDarrick J. Wong 1754f427cf5cSDarrick J. Wong /* Walk all incore inodes to achieve a given goal. */ 1755df600197SDarrick J. Wong static int 1756c1115c0cSDarrick J. Wong xfs_icwalk( 1757df600197SDarrick J. Wong struct xfs_mount *mp, 1758f427cf5cSDarrick J. Wong enum xfs_icwalk_goal goal, 1759b26b2bf1SDarrick J. Wong struct xfs_icwalk *icw) 1760df600197SDarrick J. Wong { 1761df600197SDarrick J. Wong struct xfs_perag *pag; 1762df600197SDarrick J. Wong int error = 0; 1763df600197SDarrick J. Wong int last_error = 0; 1764a437b9b4SChristoph Hellwig xfs_agnumber_t agno; 1765df600197SDarrick J. Wong 1766a437b9b4SChristoph Hellwig for_each_perag_tag(mp, agno, pag, goal) { 1767b26b2bf1SDarrick J. Wong error = xfs_icwalk_ag(pag, goal, icw); 1768df600197SDarrick J. Wong if (error) { 1769df600197SDarrick J. Wong last_error = error; 1770a437b9b4SChristoph Hellwig if (error == -EFSCORRUPTED) { 1771c4d5660aSDave Chinner xfs_perag_rele(pag); 1772df600197SDarrick J. Wong break; 1773df600197SDarrick J. Wong } 1774df600197SDarrick J. Wong } 1775a437b9b4SChristoph Hellwig } 1776df600197SDarrick J. Wong return last_error; 17772d53f66bSDarrick J. Wong BUILD_BUG_ON(XFS_ICWALK_PRIVATE_FLAGS & XFS_ICWALK_FLAGS_VALID); 1778df600197SDarrick J. Wong } 1779c6c2066dSDarrick J. Wong 1780c6c2066dSDarrick J. Wong #ifdef DEBUG 1781c6c2066dSDarrick J. Wong static void 1782c6c2066dSDarrick J. Wong xfs_check_delalloc( 1783c6c2066dSDarrick J. Wong struct xfs_inode *ip, 1784c6c2066dSDarrick J. Wong int whichfork) 1785c6c2066dSDarrick J. Wong { 1786732436efSDarrick J. Wong struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 1787c6c2066dSDarrick J. Wong struct xfs_bmbt_irec got; 1788c6c2066dSDarrick J. Wong struct xfs_iext_cursor icur; 1789c6c2066dSDarrick J. Wong 1790c6c2066dSDarrick J. Wong if (!ifp || !xfs_iext_lookup_extent(ip, ifp, 0, &icur, &got)) 1791c6c2066dSDarrick J. Wong return; 1792c6c2066dSDarrick J. Wong do { 1793c6c2066dSDarrick J. Wong if (isnullstartblock(got.br_startblock)) { 1794c6c2066dSDarrick J. Wong xfs_warn(ip->i_mount, 1795c6c2066dSDarrick J. Wong "ino %llx %s fork has delalloc extent at [0x%llx:0x%llx]", 1796c6c2066dSDarrick J. Wong ip->i_ino, 1797c6c2066dSDarrick J. Wong whichfork == XFS_DATA_FORK ? "data" : "cow", 1798c6c2066dSDarrick J. Wong got.br_startoff, got.br_blockcount); 1799c6c2066dSDarrick J. Wong } 1800c6c2066dSDarrick J. Wong } while (xfs_iext_next_extent(ifp, &icur, &got)); 1801c6c2066dSDarrick J. Wong } 1802c6c2066dSDarrick J. Wong #else 1803c6c2066dSDarrick J. Wong #define xfs_check_delalloc(ip, whichfork) do { } while (0) 1804c6c2066dSDarrick J. Wong #endif 1805c6c2066dSDarrick J. Wong 1806ab23a776SDave Chinner /* Schedule the inode for reclaim. */ 1807ab23a776SDave Chinner static void 1808ab23a776SDave Chinner xfs_inodegc_set_reclaimable( 1809c6c2066dSDarrick J. Wong struct xfs_inode *ip) 1810c6c2066dSDarrick J. Wong { 1811c6c2066dSDarrick J. Wong struct xfs_mount *mp = ip->i_mount; 1812c6c2066dSDarrick J. Wong struct xfs_perag *pag; 1813c6c2066dSDarrick J. Wong 181475c8c50fSDave Chinner if (!xfs_is_shutdown(mp) && ip->i_delayed_blks) { 1815c6c2066dSDarrick J. Wong xfs_check_delalloc(ip, XFS_DATA_FORK); 1816c6c2066dSDarrick J. Wong xfs_check_delalloc(ip, XFS_COW_FORK); 1817c6c2066dSDarrick J. Wong ASSERT(0); 1818c6c2066dSDarrick J. Wong } 1819c6c2066dSDarrick J. Wong 1820c6c2066dSDarrick J. Wong pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 1821c6c2066dSDarrick J. Wong spin_lock(&pag->pag_ici_lock); 1822c6c2066dSDarrick J. Wong spin_lock(&ip->i_flags_lock); 1823c6c2066dSDarrick J. Wong 1824ab23a776SDave Chinner trace_xfs_inode_set_reclaimable(ip); 1825ab23a776SDave Chinner ip->i_flags &= ~(XFS_NEED_INACTIVE | XFS_INACTIVATING); 1826ab23a776SDave Chinner ip->i_flags |= XFS_IRECLAIMABLE; 1827c6c2066dSDarrick J. Wong xfs_perag_set_inode_tag(pag, XFS_INO_TO_AGINO(mp, ip->i_ino), 1828c6c2066dSDarrick J. Wong XFS_ICI_RECLAIM_TAG); 1829c6c2066dSDarrick J. Wong 1830c6c2066dSDarrick J. Wong spin_unlock(&ip->i_flags_lock); 1831c6c2066dSDarrick J. Wong spin_unlock(&pag->pag_ici_lock); 1832c6c2066dSDarrick J. Wong xfs_perag_put(pag); 1833c6c2066dSDarrick J. Wong } 1834ab23a776SDave Chinner 1835ab23a776SDave Chinner /* 1836ab23a776SDave Chinner * Free all speculative preallocations and possibly even the inode itself. 1837ab23a776SDave Chinner * This is the last chance to make changes to an otherwise unreferenced file 1838ab23a776SDave Chinner * before incore reclamation happens. 1839ab23a776SDave Chinner */ 1840ab23a776SDave Chinner static void 1841ab23a776SDave Chinner xfs_inodegc_inactivate( 1842ab23a776SDave Chinner struct xfs_inode *ip) 1843ab23a776SDave Chinner { 1844ab23a776SDave Chinner trace_xfs_inode_inactivating(ip); 1845ab23a776SDave Chinner xfs_inactive(ip); 1846ab23a776SDave Chinner xfs_inodegc_set_reclaimable(ip); 1847ab23a776SDave Chinner } 1848ab23a776SDave Chinner 1849ab23a776SDave Chinner void 1850ab23a776SDave Chinner xfs_inodegc_worker( 1851ab23a776SDave Chinner struct work_struct *work) 1852ab23a776SDave Chinner { 18537cf2b0f9SDave Chinner struct xfs_inodegc *gc = container_of(to_delayed_work(work), 18547cf2b0f9SDave Chinner struct xfs_inodegc, work); 1855ab23a776SDave Chinner struct llist_node *node = llist_del_all(&gc->list); 1856ab23a776SDave Chinner struct xfs_inode *ip, *n; 18574da11251SWu Guanghao unsigned int nofs_flag; 1858ab23a776SDave Chinner 1859*b37c4c83SDarrick J. Wong ASSERT(gc->cpu == smp_processor_id()); 1860*b37c4c83SDarrick J. Wong 1861ab23a776SDave Chinner WRITE_ONCE(gc->items, 0); 1862ab23a776SDave Chinner 1863ab23a776SDave Chinner if (!node) 1864ab23a776SDave Chinner return; 1865ab23a776SDave Chinner 18664da11251SWu Guanghao /* 18674da11251SWu Guanghao * We can allocate memory here while doing writeback on behalf of 18684da11251SWu Guanghao * memory reclaim. To avoid memory allocation deadlocks set the 18694da11251SWu Guanghao * task-wide nofs context for the following operations. 18704da11251SWu Guanghao */ 18714da11251SWu Guanghao nofs_flag = memalloc_nofs_save(); 18724da11251SWu Guanghao 1873ab23a776SDave Chinner ip = llist_entry(node, struct xfs_inode, i_gclist); 187440b1de00SDarrick J. Wong trace_xfs_inodegc_worker(ip->i_mount, READ_ONCE(gc->shrinker_hits)); 1875ab23a776SDave Chinner 187640b1de00SDarrick J. Wong WRITE_ONCE(gc->shrinker_hits, 0); 1877ab23a776SDave Chinner llist_for_each_entry_safe(ip, n, node, i_gclist) { 1878ab23a776SDave Chinner xfs_iflags_set(ip, XFS_INACTIVATING); 1879ab23a776SDave Chinner xfs_inodegc_inactivate(ip); 1880ab23a776SDave Chinner } 18814da11251SWu Guanghao 18824da11251SWu Guanghao memalloc_nofs_restore(nofs_flag); 1883ab23a776SDave Chinner } 1884ab23a776SDave Chinner 1885ab23a776SDave Chinner /* 18865e672cd6SDave Chinner * Expedite all pending inodegc work to run immediately. This does not wait for 18875e672cd6SDave Chinner * completion of the work. 18885e672cd6SDave Chinner */ 18895e672cd6SDave Chinner void 18905e672cd6SDave Chinner xfs_inodegc_push( 18915e672cd6SDave Chinner struct xfs_mount *mp) 18925e672cd6SDave Chinner { 18935e672cd6SDave Chinner if (!xfs_is_inodegc_enabled(mp)) 18945e672cd6SDave Chinner return; 18955e672cd6SDave Chinner trace_xfs_inodegc_push(mp, __return_address); 18965e672cd6SDave Chinner xfs_inodegc_queue_all(mp); 18975e672cd6SDave Chinner } 18985e672cd6SDave Chinner 18995e672cd6SDave Chinner /* 19006191cf3aSBrian Foster * Force all currently queued inode inactivation work to run immediately and 19016191cf3aSBrian Foster * wait for the work to finish. 1902ab23a776SDave Chinner */ 1903ab23a776SDave Chinner void 1904ab23a776SDave Chinner xfs_inodegc_flush( 1905ab23a776SDave Chinner struct xfs_mount *mp) 1906ab23a776SDave Chinner { 19075e672cd6SDave Chinner xfs_inodegc_push(mp); 1908ab23a776SDave Chinner trace_xfs_inodegc_flush(mp, __return_address); 19096191cf3aSBrian Foster flush_workqueue(mp->m_inodegc_wq); 1910ab23a776SDave Chinner } 1911ab23a776SDave Chinner 1912ab23a776SDave Chinner /* 1913ab23a776SDave Chinner * Flush all the pending work and then disable the inode inactivation background 1914ab23a776SDave Chinner * workers and wait for them to stop. 1915ab23a776SDave Chinner */ 1916ab23a776SDave Chinner void 1917ab23a776SDave Chinner xfs_inodegc_stop( 1918ab23a776SDave Chinner struct xfs_mount *mp) 1919ab23a776SDave Chinner { 1920ab23a776SDave Chinner if (!xfs_clear_inodegc_enabled(mp)) 1921ab23a776SDave Chinner return; 1922ab23a776SDave Chinner 1923ab23a776SDave Chinner xfs_inodegc_queue_all(mp); 19246191cf3aSBrian Foster drain_workqueue(mp->m_inodegc_wq); 1925ab23a776SDave Chinner 1926ab23a776SDave Chinner trace_xfs_inodegc_stop(mp, __return_address); 1927ab23a776SDave Chinner } 1928ab23a776SDave Chinner 1929ab23a776SDave Chinner /* 1930ab23a776SDave Chinner * Enable the inode inactivation background workers and schedule deferred inode 1931ab23a776SDave Chinner * inactivation work if there is any. 1932ab23a776SDave Chinner */ 1933ab23a776SDave Chinner void 1934ab23a776SDave Chinner xfs_inodegc_start( 1935ab23a776SDave Chinner struct xfs_mount *mp) 1936ab23a776SDave Chinner { 1937ab23a776SDave Chinner if (xfs_set_inodegc_enabled(mp)) 1938ab23a776SDave Chinner return; 1939ab23a776SDave Chinner 1940ab23a776SDave Chinner trace_xfs_inodegc_start(mp, __return_address); 1941ab23a776SDave Chinner xfs_inodegc_queue_all(mp); 1942ab23a776SDave Chinner } 1943ab23a776SDave Chinner 194465f03d86SDarrick J. Wong #ifdef CONFIG_XFS_RT 194565f03d86SDarrick J. Wong static inline bool 194665f03d86SDarrick J. Wong xfs_inodegc_want_queue_rt_file( 194765f03d86SDarrick J. Wong struct xfs_inode *ip) 194865f03d86SDarrick J. Wong { 194965f03d86SDarrick J. Wong struct xfs_mount *mp = ip->i_mount; 195065f03d86SDarrick J. Wong 195165f03d86SDarrick J. Wong if (!XFS_IS_REALTIME_INODE(ip)) 195265f03d86SDarrick J. Wong return false; 195365f03d86SDarrick J. Wong 19542229276cSDarrick J. Wong if (__percpu_counter_compare(&mp->m_frextents, 19552229276cSDarrick J. Wong mp->m_low_rtexts[XFS_LOWSP_5_PCNT], 19562229276cSDarrick J. Wong XFS_FDBLOCKS_BATCH) < 0) 19572229276cSDarrick J. Wong return true; 19582229276cSDarrick J. Wong 19592229276cSDarrick J. Wong return false; 196065f03d86SDarrick J. Wong } 196165f03d86SDarrick J. Wong #else 196265f03d86SDarrick J. Wong # define xfs_inodegc_want_queue_rt_file(ip) (false) 196365f03d86SDarrick J. Wong #endif /* CONFIG_XFS_RT */ 196465f03d86SDarrick J. Wong 1965ab23a776SDave Chinner /* 1966ab23a776SDave Chinner * Schedule the inactivation worker when: 1967ab23a776SDave Chinner * 1968ab23a776SDave Chinner * - We've accumulated more than one inode cluster buffer's worth of inodes. 19697d6f07d2SDarrick J. Wong * - There is less than 5% free space left. 1970108523b8SDarrick J. Wong * - Any of the quotas for this inode are near an enforcement limit. 1971ab23a776SDave Chinner */ 1972ab23a776SDave Chinner static inline bool 1973ab23a776SDave Chinner xfs_inodegc_want_queue_work( 1974ab23a776SDave Chinner struct xfs_inode *ip, 1975ab23a776SDave Chinner unsigned int items) 1976ab23a776SDave Chinner { 1977ab23a776SDave Chinner struct xfs_mount *mp = ip->i_mount; 1978ab23a776SDave Chinner 1979ab23a776SDave Chinner if (items > mp->m_ino_geo.inodes_per_cluster) 1980ab23a776SDave Chinner return true; 1981ab23a776SDave Chinner 19827d6f07d2SDarrick J. Wong if (__percpu_counter_compare(&mp->m_fdblocks, 19837d6f07d2SDarrick J. Wong mp->m_low_space[XFS_LOWSP_5_PCNT], 19847d6f07d2SDarrick J. Wong XFS_FDBLOCKS_BATCH) < 0) 19857d6f07d2SDarrick J. Wong return true; 19867d6f07d2SDarrick J. Wong 198765f03d86SDarrick J. Wong if (xfs_inodegc_want_queue_rt_file(ip)) 198865f03d86SDarrick J. Wong return true; 198965f03d86SDarrick J. Wong 1990108523b8SDarrick J. Wong if (xfs_inode_near_dquot_enforcement(ip, XFS_DQTYPE_USER)) 1991108523b8SDarrick J. Wong return true; 1992108523b8SDarrick J. Wong 1993108523b8SDarrick J. Wong if (xfs_inode_near_dquot_enforcement(ip, XFS_DQTYPE_GROUP)) 1994108523b8SDarrick J. Wong return true; 1995108523b8SDarrick J. Wong 1996108523b8SDarrick J. Wong if (xfs_inode_near_dquot_enforcement(ip, XFS_DQTYPE_PROJ)) 1997108523b8SDarrick J. Wong return true; 1998108523b8SDarrick J. Wong 1999ab23a776SDave Chinner return false; 2000ab23a776SDave Chinner } 2001ab23a776SDave Chinner 2002ab23a776SDave Chinner /* 2003ab23a776SDave Chinner * Upper bound on the number of inodes in each AG that can be queued for 2004ab23a776SDave Chinner * inactivation at any given time, to avoid monopolizing the workqueue. 2005ab23a776SDave Chinner */ 2006ab23a776SDave Chinner #define XFS_INODEGC_MAX_BACKLOG (4 * XFS_INODES_PER_CHUNK) 2007ab23a776SDave Chinner 2008ab23a776SDave Chinner /* 2009ab23a776SDave Chinner * Make the frontend wait for inactivations when: 2010ab23a776SDave Chinner * 201140b1de00SDarrick J. Wong * - Memory shrinkers queued the inactivation worker and it hasn't finished. 2012ab23a776SDave Chinner * - The queue depth exceeds the maximum allowable percpu backlog. 2013ab23a776SDave Chinner * 2014ab23a776SDave Chinner * Note: If the current thread is running a transaction, we don't ever want to 2015ab23a776SDave Chinner * wait for other transactions because that could introduce a deadlock. 2016ab23a776SDave Chinner */ 2017ab23a776SDave Chinner static inline bool 2018ab23a776SDave Chinner xfs_inodegc_want_flush_work( 2019ab23a776SDave Chinner struct xfs_inode *ip, 202040b1de00SDarrick J. Wong unsigned int items, 202140b1de00SDarrick J. Wong unsigned int shrinker_hits) 2022ab23a776SDave Chinner { 2023ab23a776SDave Chinner if (current->journal_info) 2024ab23a776SDave Chinner return false; 2025ab23a776SDave Chinner 202640b1de00SDarrick J. Wong if (shrinker_hits > 0) 202740b1de00SDarrick J. Wong return true; 202840b1de00SDarrick J. Wong 2029ab23a776SDave Chinner if (items > XFS_INODEGC_MAX_BACKLOG) 2030ab23a776SDave Chinner return true; 2031ab23a776SDave Chinner 2032ab23a776SDave Chinner return false; 2033ab23a776SDave Chinner } 2034ab23a776SDave Chinner 2035ab23a776SDave Chinner /* 2036ab23a776SDave Chinner * Queue a background inactivation worker if there are inodes that need to be 2037ab23a776SDave Chinner * inactivated and higher level xfs code hasn't disabled the background 2038ab23a776SDave Chinner * workers. 2039ab23a776SDave Chinner */ 2040ab23a776SDave Chinner static void 2041ab23a776SDave Chinner xfs_inodegc_queue( 2042ab23a776SDave Chinner struct xfs_inode *ip) 2043ab23a776SDave Chinner { 2044ab23a776SDave Chinner struct xfs_mount *mp = ip->i_mount; 2045ab23a776SDave Chinner struct xfs_inodegc *gc; 2046ab23a776SDave Chinner int items; 204740b1de00SDarrick J. Wong unsigned int shrinker_hits; 20487cf2b0f9SDave Chinner unsigned long queue_delay = 1; 2049ab23a776SDave Chinner 2050ab23a776SDave Chinner trace_xfs_inode_set_need_inactive(ip); 2051ab23a776SDave Chinner spin_lock(&ip->i_flags_lock); 2052ab23a776SDave Chinner ip->i_flags |= XFS_NEED_INACTIVE; 2053ab23a776SDave Chinner spin_unlock(&ip->i_flags_lock); 2054ab23a776SDave Chinner 2055ab23a776SDave Chinner gc = get_cpu_ptr(mp->m_inodegc); 2056ab23a776SDave Chinner llist_add(&ip->i_gclist, &gc->list); 2057ab23a776SDave Chinner items = READ_ONCE(gc->items); 2058ab23a776SDave Chinner WRITE_ONCE(gc->items, items + 1); 205940b1de00SDarrick J. Wong shrinker_hits = READ_ONCE(gc->shrinker_hits); 20607cf2b0f9SDave Chinner 20617cf2b0f9SDave Chinner /* 20627cf2b0f9SDave Chinner * We queue the work while holding the current CPU so that the work 20637cf2b0f9SDave Chinner * is scheduled to run on this CPU. 20647cf2b0f9SDave Chinner */ 20657cf2b0f9SDave Chinner if (!xfs_is_inodegc_enabled(mp)) { 2066ab23a776SDave Chinner put_cpu_ptr(gc); 2067ab23a776SDave Chinner return; 2068ab23a776SDave Chinner } 2069ab23a776SDave Chinner 20707cf2b0f9SDave Chinner if (xfs_inodegc_want_queue_work(ip, items)) 20717cf2b0f9SDave Chinner queue_delay = 0; 20727cf2b0f9SDave Chinner 20737cf2b0f9SDave Chinner trace_xfs_inodegc_queue(mp, __return_address); 207403e0add8SDarrick J. Wong mod_delayed_work_on(current_cpu(), mp->m_inodegc_wq, &gc->work, 207503e0add8SDarrick J. Wong queue_delay); 20767cf2b0f9SDave Chinner put_cpu_ptr(gc); 20777cf2b0f9SDave Chinner 207840b1de00SDarrick J. Wong if (xfs_inodegc_want_flush_work(ip, items, shrinker_hits)) { 2079ab23a776SDave Chinner trace_xfs_inodegc_throttle(mp, __return_address); 20807cf2b0f9SDave Chinner flush_delayed_work(&gc->work); 2081ab23a776SDave Chinner } 2082ab23a776SDave Chinner } 2083ab23a776SDave Chinner 2084ab23a776SDave Chinner /* 2085ab23a776SDave Chinner * Fold the dead CPU inodegc queue into the current CPUs queue. 2086ab23a776SDave Chinner */ 2087ab23a776SDave Chinner void 2088ab23a776SDave Chinner xfs_inodegc_cpu_dead( 2089ab23a776SDave Chinner struct xfs_mount *mp, 2090ab23a776SDave Chinner unsigned int dead_cpu) 2091ab23a776SDave Chinner { 2092ab23a776SDave Chinner struct xfs_inodegc *dead_gc, *gc; 2093ab23a776SDave Chinner struct llist_node *first, *last; 2094ab23a776SDave Chinner unsigned int count = 0; 2095ab23a776SDave Chinner 2096ab23a776SDave Chinner dead_gc = per_cpu_ptr(mp->m_inodegc, dead_cpu); 20977cf2b0f9SDave Chinner cancel_delayed_work_sync(&dead_gc->work); 2098ab23a776SDave Chinner 2099ab23a776SDave Chinner if (llist_empty(&dead_gc->list)) 2100ab23a776SDave Chinner return; 2101ab23a776SDave Chinner 2102ab23a776SDave Chinner first = dead_gc->list.first; 2103ab23a776SDave Chinner last = first; 2104ab23a776SDave Chinner while (last->next) { 2105ab23a776SDave Chinner last = last->next; 2106ab23a776SDave Chinner count++; 2107ab23a776SDave Chinner } 2108ab23a776SDave Chinner dead_gc->list.first = NULL; 2109ab23a776SDave Chinner dead_gc->items = 0; 2110ab23a776SDave Chinner 2111ab23a776SDave Chinner /* Add pending work to current CPU */ 2112ab23a776SDave Chinner gc = get_cpu_ptr(mp->m_inodegc); 2113ab23a776SDave Chinner llist_add_batch(first, last, &gc->list); 2114ab23a776SDave Chinner count += READ_ONCE(gc->items); 2115ab23a776SDave Chinner WRITE_ONCE(gc->items, count); 2116ab23a776SDave Chinner 2117ab23a776SDave Chinner if (xfs_is_inodegc_enabled(mp)) { 2118ab23a776SDave Chinner trace_xfs_inodegc_queue(mp, __return_address); 211903e0add8SDarrick J. Wong mod_delayed_work_on(current_cpu(), mp->m_inodegc_wq, &gc->work, 212003e0add8SDarrick J. Wong 0); 2121ab23a776SDave Chinner } 21227cf2b0f9SDave Chinner put_cpu_ptr(gc); 2123ab23a776SDave Chinner } 2124ab23a776SDave Chinner 2125ab23a776SDave Chinner /* 2126ab23a776SDave Chinner * We set the inode flag atomically with the radix tree tag. Once we get tag 2127ab23a776SDave Chinner * lookups on the radix tree, this inode flag can go away. 2128ab23a776SDave Chinner * 2129ab23a776SDave Chinner * We always use background reclaim here because even if the inode is clean, it 2130ab23a776SDave Chinner * still may be under IO and hence we have wait for IO completion to occur 2131ab23a776SDave Chinner * before we can reclaim the inode. The background reclaim path handles this 2132ab23a776SDave Chinner * more efficiently than we can here, so simply let background reclaim tear down 2133ab23a776SDave Chinner * all inodes. 2134ab23a776SDave Chinner */ 2135ab23a776SDave Chinner void 2136ab23a776SDave Chinner xfs_inode_mark_reclaimable( 2137ab23a776SDave Chinner struct xfs_inode *ip) 2138ab23a776SDave Chinner { 2139ab23a776SDave Chinner struct xfs_mount *mp = ip->i_mount; 2140ab23a776SDave Chinner bool need_inactive; 2141ab23a776SDave Chinner 2142ab23a776SDave Chinner XFS_STATS_INC(mp, vn_reclaim); 2143ab23a776SDave Chinner 2144ab23a776SDave Chinner /* 2145ab23a776SDave Chinner * We should never get here with any of the reclaim flags already set. 2146ab23a776SDave Chinner */ 2147ab23a776SDave Chinner ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_ALL_IRECLAIM_FLAGS)); 2148ab23a776SDave Chinner 2149ab23a776SDave Chinner need_inactive = xfs_inode_needs_inactive(ip); 2150ab23a776SDave Chinner if (need_inactive) { 2151ab23a776SDave Chinner xfs_inodegc_queue(ip); 2152ab23a776SDave Chinner return; 2153ab23a776SDave Chinner } 2154ab23a776SDave Chinner 2155ab23a776SDave Chinner /* Going straight to reclaim, so drop the dquots. */ 2156ab23a776SDave Chinner xfs_qm_dqdetach(ip); 2157ab23a776SDave Chinner xfs_inodegc_set_reclaimable(ip); 2158ab23a776SDave Chinner } 215940b1de00SDarrick J. Wong 216040b1de00SDarrick J. Wong /* 216140b1de00SDarrick J. Wong * Register a phony shrinker so that we can run background inodegc sooner when 216240b1de00SDarrick J. Wong * there's memory pressure. Inactivation does not itself free any memory but 216340b1de00SDarrick J. Wong * it does make inodes reclaimable, which eventually frees memory. 216440b1de00SDarrick J. Wong * 216540b1de00SDarrick J. Wong * The count function, seek value, and batch value are crafted to trigger the 216640b1de00SDarrick J. Wong * scan function during the second round of scanning. Hopefully this means 216740b1de00SDarrick J. Wong * that we reclaimed enough memory that initiating metadata transactions won't 216840b1de00SDarrick J. Wong * make things worse. 216940b1de00SDarrick J. Wong */ 217040b1de00SDarrick J. Wong #define XFS_INODEGC_SHRINKER_COUNT (1UL << DEF_PRIORITY) 217140b1de00SDarrick J. Wong #define XFS_INODEGC_SHRINKER_BATCH ((XFS_INODEGC_SHRINKER_COUNT / 2) + 1) 217240b1de00SDarrick J. Wong 217340b1de00SDarrick J. Wong static unsigned long 217440b1de00SDarrick J. Wong xfs_inodegc_shrinker_count( 217540b1de00SDarrick J. Wong struct shrinker *shrink, 217640b1de00SDarrick J. Wong struct shrink_control *sc) 217740b1de00SDarrick J. Wong { 217840b1de00SDarrick J. Wong struct xfs_mount *mp = container_of(shrink, struct xfs_mount, 217940b1de00SDarrick J. Wong m_inodegc_shrinker); 218040b1de00SDarrick J. Wong struct xfs_inodegc *gc; 218140b1de00SDarrick J. Wong int cpu; 218240b1de00SDarrick J. Wong 218340b1de00SDarrick J. Wong if (!xfs_is_inodegc_enabled(mp)) 218440b1de00SDarrick J. Wong return 0; 218540b1de00SDarrick J. Wong 218640b1de00SDarrick J. Wong for_each_online_cpu(cpu) { 218740b1de00SDarrick J. Wong gc = per_cpu_ptr(mp->m_inodegc, cpu); 218840b1de00SDarrick J. Wong if (!llist_empty(&gc->list)) 218940b1de00SDarrick J. Wong return XFS_INODEGC_SHRINKER_COUNT; 219040b1de00SDarrick J. Wong } 219140b1de00SDarrick J. Wong 219240b1de00SDarrick J. Wong return 0; 219340b1de00SDarrick J. Wong } 219440b1de00SDarrick J. Wong 219540b1de00SDarrick J. Wong static unsigned long 219640b1de00SDarrick J. Wong xfs_inodegc_shrinker_scan( 219740b1de00SDarrick J. Wong struct shrinker *shrink, 219840b1de00SDarrick J. Wong struct shrink_control *sc) 219940b1de00SDarrick J. Wong { 220040b1de00SDarrick J. Wong struct xfs_mount *mp = container_of(shrink, struct xfs_mount, 220140b1de00SDarrick J. Wong m_inodegc_shrinker); 220240b1de00SDarrick J. Wong struct xfs_inodegc *gc; 220340b1de00SDarrick J. Wong int cpu; 220440b1de00SDarrick J. Wong bool no_items = true; 220540b1de00SDarrick J. Wong 220640b1de00SDarrick J. Wong if (!xfs_is_inodegc_enabled(mp)) 220740b1de00SDarrick J. Wong return SHRINK_STOP; 220840b1de00SDarrick J. Wong 220940b1de00SDarrick J. Wong trace_xfs_inodegc_shrinker_scan(mp, sc, __return_address); 221040b1de00SDarrick J. Wong 221140b1de00SDarrick J. Wong for_each_online_cpu(cpu) { 221240b1de00SDarrick J. Wong gc = per_cpu_ptr(mp->m_inodegc, cpu); 221340b1de00SDarrick J. Wong if (!llist_empty(&gc->list)) { 221440b1de00SDarrick J. Wong unsigned int h = READ_ONCE(gc->shrinker_hits); 221540b1de00SDarrick J. Wong 221640b1de00SDarrick J. Wong WRITE_ONCE(gc->shrinker_hits, h + 1); 22177cf2b0f9SDave Chinner mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0); 221840b1de00SDarrick J. Wong no_items = false; 221940b1de00SDarrick J. Wong } 222040b1de00SDarrick J. Wong } 222140b1de00SDarrick J. Wong 222240b1de00SDarrick J. Wong /* 222340b1de00SDarrick J. Wong * If there are no inodes to inactivate, we don't want the shrinker 222440b1de00SDarrick J. Wong * to think there's deferred work to call us back about. 222540b1de00SDarrick J. Wong */ 222640b1de00SDarrick J. Wong if (no_items) 222740b1de00SDarrick J. Wong return LONG_MAX; 222840b1de00SDarrick J. Wong 222940b1de00SDarrick J. Wong return SHRINK_STOP; 223040b1de00SDarrick J. Wong } 223140b1de00SDarrick J. Wong 223240b1de00SDarrick J. Wong /* Register a shrinker so we can accelerate inodegc and throttle queuing. */ 223340b1de00SDarrick J. Wong int 223440b1de00SDarrick J. Wong xfs_inodegc_register_shrinker( 223540b1de00SDarrick J. Wong struct xfs_mount *mp) 223640b1de00SDarrick J. Wong { 223740b1de00SDarrick J. Wong struct shrinker *shrink = &mp->m_inodegc_shrinker; 223840b1de00SDarrick J. Wong 223940b1de00SDarrick J. Wong shrink->count_objects = xfs_inodegc_shrinker_count; 224040b1de00SDarrick J. Wong shrink->scan_objects = xfs_inodegc_shrinker_scan; 224140b1de00SDarrick J. Wong shrink->seeks = 0; 224240b1de00SDarrick J. Wong shrink->flags = SHRINKER_NONSLAB; 224340b1de00SDarrick J. Wong shrink->batch = XFS_INODEGC_SHRINKER_BATCH; 224440b1de00SDarrick J. Wong 2245e33c267aSRoman Gushchin return register_shrinker(shrink, "xfs-inodegc:%s", mp->m_super->s_id); 224640b1de00SDarrick J. Wong } 2247