xfs_icache.c (6d8b79cfca39399ef9115fb65dde85993455c9a3) | xfs_icache.c (33479e0542df066fb0b47df18780e93bfe6e0dc5) |
---|---|
1/* 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * --- 27 unchanged lines hidden (view full) --- 36#include "xfs_quota.h" 37#include "xfs_trace.h" 38#include "xfs_fsops.h" 39#include "xfs_icache.h" 40 41#include <linux/kthread.h> 42#include <linux/freezer.h> 43 | 1/* 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * --- 27 unchanged lines hidden (view full) --- 36#include "xfs_quota.h" 37#include "xfs_trace.h" 38#include "xfs_fsops.h" 39#include "xfs_icache.h" 40 41#include <linux/kthread.h> 42#include <linux/freezer.h> 43 |
44STATIC void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, 45 struct xfs_perag *pag, struct xfs_inode *ip); 46 |
|
44/* | 47/* |
48 * Allocate and initialise an xfs_inode. 49 */ 50STATIC struct xfs_inode * 51xfs_inode_alloc( 52 struct xfs_mount *mp, 53 xfs_ino_t ino) 54{ 55 struct xfs_inode *ip; 56 57 /* 58 * if this didn't occur in transactions, we could use 59 * KM_MAYFAIL and return NULL here on ENOMEM. Set the 60 * code up to do this anyway. 61 */ 62 ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); 63 if (!ip) 64 return NULL; 65 if (inode_init_always(mp->m_super, VFS_I(ip))) { 66 kmem_zone_free(xfs_inode_zone, ip); 67 return NULL; 68 } 69 70 ASSERT(atomic_read(&ip->i_pincount) == 0); 71 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 72 ASSERT(!xfs_isiflocked(ip)); 73 ASSERT(ip->i_ino == 0); 74 75 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 76 77 /* initialise the xfs inode */ 78 ip->i_ino = ino; 79 ip->i_mount = mp; 80 memset(&ip->i_imap, 0, sizeof(struct xfs_imap)); 81 ip->i_afp = NULL; 82 memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); 83 ip->i_flags = 0; 84 ip->i_delayed_blks = 0; 85 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); 86 87 return ip; 88} 89 90STATIC void 91xfs_inode_free_callback( 92 struct rcu_head *head) 93{ 94 struct inode *inode = container_of(head, struct inode, i_rcu); 95 struct xfs_inode *ip = XFS_I(inode); 96 97 kmem_zone_free(xfs_inode_zone, ip); 98} 99 100STATIC void 101xfs_inode_free( 102 struct xfs_inode *ip) 103{ 104 switch (ip->i_d.di_mode & S_IFMT) { 105 case S_IFREG: 106 case S_IFDIR: 107 case S_IFLNK: 108 xfs_idestroy_fork(ip, XFS_DATA_FORK); 109 break; 110 } 111 112 if (ip->i_afp) 113 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 114 115 if (ip->i_itemp) { 116 ASSERT(!(ip->i_itemp->ili_item.li_flags & XFS_LI_IN_AIL)); 117 xfs_inode_item_destroy(ip); 118 ip->i_itemp = NULL; 119 } 120 121 /* asserts to verify all state is correct here */ 122 ASSERT(atomic_read(&ip->i_pincount) == 0); 123 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 124 ASSERT(!xfs_isiflocked(ip)); 125 126 /* 127 * Because we use RCU freeing we need to ensure the inode always 128 * appears to be reclaimed with an invalid inode number when in the 129 * free state. The ip->i_flags_lock provides the barrier against lookup 130 * races. 131 */ 132 spin_lock(&ip->i_flags_lock); 133 ip->i_flags = XFS_IRECLAIM; 134 ip->i_ino = 0; 135 spin_unlock(&ip->i_flags_lock); 136 137 call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); 138} 139 140/* 141 * Check the validity of the inode we just found it the cache 142 */ 143static int 144xfs_iget_cache_hit( 145 struct xfs_perag *pag, 146 struct xfs_inode *ip, 147 xfs_ino_t ino, 148 int flags, 149 int lock_flags) __releases(RCU) 150{ 151 struct inode *inode = VFS_I(ip); 152 struct xfs_mount *mp = ip->i_mount; 153 int error; 154 155 /* 156 * check for re-use of an inode within an RCU grace period due to the 157 * radix tree nodes not being updated yet. We monitor for this by 158 * setting the inode number to zero before freeing the inode structure. 159 * If the inode has been reallocated and set up, then the inode number 160 * will not match, so check for that, too. 161 */ 162 spin_lock(&ip->i_flags_lock); 163 if (ip->i_ino != ino) { 164 trace_xfs_iget_skip(ip); 165 XFS_STATS_INC(xs_ig_frecycle); 166 error = EAGAIN; 167 goto out_error; 168 } 169 170 171 /* 172 * If we are racing with another cache hit that is currently 173 * instantiating this inode or currently recycling it out of 174 * reclaimabe state, wait for the initialisation to complete 175 * before continuing. 176 * 177 * XXX(hch): eventually we should do something equivalent to 178 * wait_on_inode to wait for these flags to be cleared 179 * instead of polling for it. 180 */ 181 if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { 182 trace_xfs_iget_skip(ip); 183 XFS_STATS_INC(xs_ig_frecycle); 184 error = EAGAIN; 185 goto out_error; 186 } 187 188 /* 189 * If lookup is racing with unlink return an error immediately. 190 */ 191 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { 192 error = ENOENT; 193 goto out_error; 194 } 195 196 /* 197 * If IRECLAIMABLE is set, we've torn down the VFS inode already. 198 * Need to carefully get it back into useable state. 199 */ 200 if (ip->i_flags & XFS_IRECLAIMABLE) { 201 trace_xfs_iget_reclaim(ip); 202 203 /* 204 * We need to set XFS_IRECLAIM to prevent xfs_reclaim_inode 205 * from stomping over us while we recycle the inode. We can't 206 * clear the radix tree reclaimable tag yet as it requires 207 * pag_ici_lock to be held exclusive. 208 */ 209 ip->i_flags |= XFS_IRECLAIM; 210 211 spin_unlock(&ip->i_flags_lock); 212 rcu_read_unlock(); 213 214 error = -inode_init_always(mp->m_super, inode); 215 if (error) { 216 /* 217 * Re-initializing the inode failed, and we are in deep 218 * trouble. Try to re-add it to the reclaim list. 219 */ 220 rcu_read_lock(); 221 spin_lock(&ip->i_flags_lock); 222 223 ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM); 224 ASSERT(ip->i_flags & XFS_IRECLAIMABLE); 225 trace_xfs_iget_reclaim_fail(ip); 226 goto out_error; 227 } 228 229 spin_lock(&pag->pag_ici_lock); 230 spin_lock(&ip->i_flags_lock); 231 232 /* 233 * Clear the per-lifetime state in the inode as we are now 234 * effectively a new inode and need to return to the initial 235 * state before reuse occurs. 236 */ 237 ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS; 238 ip->i_flags |= XFS_INEW; 239 __xfs_inode_clear_reclaim_tag(mp, pag, ip); 240 inode->i_state = I_NEW; 241 242 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); 243 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 244 245 spin_unlock(&ip->i_flags_lock); 246 spin_unlock(&pag->pag_ici_lock); 247 } else { 248 /* If the VFS inode is being torn down, pause and try again. */ 249 if (!igrab(inode)) { 250 trace_xfs_iget_skip(ip); 251 error = EAGAIN; 252 goto out_error; 253 } 254 255 /* We've got a live one. */ 256 spin_unlock(&ip->i_flags_lock); 257 rcu_read_unlock(); 258 trace_xfs_iget_hit(ip); 259 } 260 261 if (lock_flags != 0) 262 xfs_ilock(ip, lock_flags); 263 264 xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE); 265 XFS_STATS_INC(xs_ig_found); 266 267 return 0; 268 269out_error: 270 spin_unlock(&ip->i_flags_lock); 271 rcu_read_unlock(); 272 return error; 273} 274 275 276static int 277xfs_iget_cache_miss( 278 struct xfs_mount *mp, 279 struct xfs_perag *pag, 280 xfs_trans_t *tp, 281 xfs_ino_t ino, 282 struct xfs_inode **ipp, 283 int flags, 284 int lock_flags) 285{ 286 struct xfs_inode *ip; 287 int error; 288 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino); 289 int iflags; 290 291 ip = xfs_inode_alloc(mp, ino); 292 if (!ip) 293 return ENOMEM; 294 295 error = xfs_iread(mp, tp, ip, flags); 296 if (error) 297 goto out_destroy; 298 299 trace_xfs_iget_miss(ip); 300 301 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { 302 error = ENOENT; 303 goto out_destroy; 304 } 305 306 /* 307 * Preload the radix tree so we can insert safely under the 308 * write spinlock. Note that we cannot sleep inside the preload 309 * region. Since we can be called from transaction context, don't 310 * recurse into the file system. 311 */ 312 if (radix_tree_preload(GFP_NOFS)) { 313 error = EAGAIN; 314 goto out_destroy; 315 } 316 317 /* 318 * Because the inode hasn't been added to the radix-tree yet it can't 319 * be found by another thread, so we can do the non-sleeping lock here. 320 */ 321 if (lock_flags) { 322 if (!xfs_ilock_nowait(ip, lock_flags)) 323 BUG(); 324 } 325 326 /* 327 * These values must be set before inserting the inode into the radix 328 * tree as the moment it is inserted a concurrent lookup (allowed by the 329 * RCU locking mechanism) can find it and that lookup must see that this 330 * is an inode currently under construction (i.e. that XFS_INEW is set). 331 * The ip->i_flags_lock that protects the XFS_INEW flag forms the 332 * memory barrier that ensures this detection works correctly at lookup 333 * time. 334 */ 335 iflags = XFS_INEW; 336 if (flags & XFS_IGET_DONTCACHE) 337 iflags |= XFS_IDONTCACHE; 338 ip->i_udquot = ip->i_gdquot = NULL; 339 xfs_iflags_set(ip, iflags); 340 341 /* insert the new inode */ 342 spin_lock(&pag->pag_ici_lock); 343 error = radix_tree_insert(&pag->pag_ici_root, agino, ip); 344 if (unlikely(error)) { 345 WARN_ON(error != -EEXIST); 346 XFS_STATS_INC(xs_ig_dup); 347 error = EAGAIN; 348 goto out_preload_end; 349 } 350 spin_unlock(&pag->pag_ici_lock); 351 radix_tree_preload_end(); 352 353 *ipp = ip; 354 return 0; 355 356out_preload_end: 357 spin_unlock(&pag->pag_ici_lock); 358 radix_tree_preload_end(); 359 if (lock_flags) 360 xfs_iunlock(ip, lock_flags); 361out_destroy: 362 __destroy_inode(VFS_I(ip)); 363 xfs_inode_free(ip); 364 return error; 365} 366 367/* 368 * Look up an inode by number in the given file system. 369 * The inode is looked up in the cache held in each AG. 370 * If the inode is found in the cache, initialise the vfs inode 371 * if necessary. 372 * 373 * If it is not in core, read it in from the file system's device, 374 * add it to the cache and initialise the vfs inode. 375 * 376 * The inode is locked according to the value of the lock_flags parameter. 377 * This flag parameter indicates how and if the inode's IO lock and inode lock 378 * should be taken. 379 * 380 * mp -- the mount point structure for the current file system. It points 381 * to the inode hash table. 382 * tp -- a pointer to the current transaction if there is one. This is 383 * simply passed through to the xfs_iread() call. 384 * ino -- the number of the inode desired. This is the unique identifier 385 * within the file system for the inode being requested. 386 * lock_flags -- flags indicating how to lock the inode. See the comment 387 * for xfs_ilock() for a list of valid values. 388 */ 389int 390xfs_iget( 391 xfs_mount_t *mp, 392 xfs_trans_t *tp, 393 xfs_ino_t ino, 394 uint flags, 395 uint lock_flags, 396 xfs_inode_t **ipp) 397{ 398 xfs_inode_t *ip; 399 int error; 400 xfs_perag_t *pag; 401 xfs_agino_t agino; 402 403 /* 404 * xfs_reclaim_inode() uses the ILOCK to ensure an inode 405 * doesn't get freed while it's being referenced during a 406 * radix tree traversal here. It assumes this function 407 * aqcuires only the ILOCK (and therefore it has no need to 408 * involve the IOLOCK in this synchronization). 409 */ 410 ASSERT((lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) == 0); 411 412 /* reject inode numbers outside existing AGs */ 413 if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) 414 return EINVAL; 415 416 /* get the perag structure and ensure that it's inode capable */ 417 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); 418 agino = XFS_INO_TO_AGINO(mp, ino); 419 420again: 421 error = 0; 422 rcu_read_lock(); 423 ip = radix_tree_lookup(&pag->pag_ici_root, agino); 424 425 if (ip) { 426 error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags); 427 if (error) 428 goto out_error_or_again; 429 } else { 430 rcu_read_unlock(); 431 XFS_STATS_INC(xs_ig_missed); 432 433 error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, 434 flags, lock_flags); 435 if (error) 436 goto out_error_or_again; 437 } 438 xfs_perag_put(pag); 439 440 *ipp = ip; 441 442 /* 443 * If we have a real type for an on-disk inode, we can set ops(&unlock) 444 * now. If it's a new inode being created, xfs_ialloc will handle it. 445 */ 446 if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0) 447 xfs_setup_inode(ip); 448 return 0; 449 450out_error_or_again: 451 if (error == EAGAIN) { 452 delay(1); 453 goto again; 454 } 455 xfs_perag_put(pag); 456 return error; 457} 458 459/* |
|
45 * The inode lookup is done in batches to keep the amount of lock traffic and 46 * radix tree lookups to a minimum. The batch size is a trade off between 47 * lookup reduction and stack usage. This is in the reclaim path, so we can't 48 * be too greedy. 49 */ 50#define XFS_LOOKUP_BATCH 32 51 52STATIC int --- 195 unchanged lines hidden (view full) --- 248{ 249 struct xfs_mount *mp = container_of(to_delayed_work(work), 250 struct xfs_mount, m_reclaim_work); 251 252 xfs_reclaim_inodes(mp, SYNC_TRYLOCK); 253 xfs_reclaim_work_queue(mp); 254} 255 | 460 * The inode lookup is done in batches to keep the amount of lock traffic and 461 * radix tree lookups to a minimum. The batch size is a trade off between 462 * lookup reduction and stack usage. This is in the reclaim path, so we can't 463 * be too greedy. 464 */ 465#define XFS_LOOKUP_BATCH 32 466 467STATIC int --- 195 unchanged lines hidden (view full) --- 663{ 664 struct xfs_mount *mp = container_of(to_delayed_work(work), 665 struct xfs_mount, m_reclaim_work); 666 667 xfs_reclaim_inodes(mp, SYNC_TRYLOCK); 668 xfs_reclaim_work_queue(mp); 669} 670 |
256void | 671static void |
257__xfs_inode_set_reclaim_tag( 258 struct xfs_perag *pag, 259 struct xfs_inode *ip) 260{ 261 radix_tree_tag_set(&pag->pag_ici_root, 262 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), 263 XFS_ICI_RECLAIM_TAG); 264 --- 49 unchanged lines hidden (view full) --- 314 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), 315 XFS_ICI_RECLAIM_TAG); 316 spin_unlock(&ip->i_mount->m_perag_lock); 317 trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno, 318 -1, _RET_IP_); 319 } 320} 321 | 672__xfs_inode_set_reclaim_tag( 673 struct xfs_perag *pag, 674 struct xfs_inode *ip) 675{ 676 radix_tree_tag_set(&pag->pag_ici_root, 677 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), 678 XFS_ICI_RECLAIM_TAG); 679 --- 49 unchanged lines hidden (view full) --- 729 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), 730 XFS_ICI_RECLAIM_TAG); 731 spin_unlock(&ip->i_mount->m_perag_lock); 732 trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno, 733 -1, _RET_IP_); 734 } 735} 736 |
322void | 737STATIC void |
323__xfs_inode_clear_reclaim_tag( 324 xfs_mount_t *mp, 325 xfs_perag_t *pag, 326 xfs_inode_t *ip) 327{ 328 radix_tree_tag_clear(&pag->pag_ici_root, 329 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); 330 __xfs_inode_clear_reclaim(pag, ip); --- 206 unchanged lines hidden (view full) --- 537} 538 539/* 540 * Walk the AGs and reclaim the inodes in them. Even if the filesystem is 541 * corrupted, we still want to try to reclaim all the inodes. If we don't, 542 * then a shut down during filesystem unmount reclaim walk leak all the 543 * unreclaimed inodes. 544 */ | 738__xfs_inode_clear_reclaim_tag( 739 xfs_mount_t *mp, 740 xfs_perag_t *pag, 741 xfs_inode_t *ip) 742{ 743 radix_tree_tag_clear(&pag->pag_ici_root, 744 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); 745 __xfs_inode_clear_reclaim(pag, ip); --- 206 unchanged lines hidden (view full) --- 952} 953 954/* 955 * Walk the AGs and reclaim the inodes in them. Even if the filesystem is 956 * corrupted, we still want to try to reclaim all the inodes. If we don't, 957 * then a shut down during filesystem unmount reclaim walk leak all the 958 * unreclaimed inodes. 959 */ |
545int | 960STATIC int |
546xfs_reclaim_inodes_ag( 547 struct xfs_mount *mp, 548 int flags, 549 int *nr_to_scan) 550{ 551 struct xfs_perag *pag; 552 int error = 0; 553 int last_error = 0; --- 162 unchanged lines hidden --- | 961xfs_reclaim_inodes_ag( 962 struct xfs_mount *mp, 963 int flags, 964 int *nr_to_scan) 965{ 966 struct xfs_perag *pag; 967 int error = 0; 968 int last_error = 0; --- 162 unchanged lines hidden --- |