1 /* 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #include "xfs.h" 19 #include "xfs_fs.h" 20 #include "xfs_format.h" 21 #include "xfs_log_format.h" 22 #include "xfs_trans_resv.h" 23 #include "xfs_sb.h" 24 #include "xfs_mount.h" 25 #include "xfs_inode.h" 26 #include "xfs_error.h" 27 #include "xfs_trans.h" 28 #include "xfs_trans_priv.h" 29 #include "xfs_inode_item.h" 30 #include "xfs_quota.h" 31 #include "xfs_trace.h" 32 #include "xfs_icache.h" 33 #include "xfs_bmap_util.h" 34 #include "xfs_dquot_item.h" 35 #include "xfs_dquot.h" 36 37 #include <linux/kthread.h> 38 #include <linux/freezer.h> 39 40 STATIC void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, 41 struct xfs_perag *pag, struct xfs_inode *ip); 42 43 /* 44 * Allocate and initialise an xfs_inode. 45 */ 46 struct xfs_inode * 47 xfs_inode_alloc( 48 struct xfs_mount *mp, 49 xfs_ino_t ino) 50 { 51 struct xfs_inode *ip; 52 53 /* 54 * if this didn't occur in transactions, we could use 55 * KM_MAYFAIL and return NULL here on ENOMEM. Set the 56 * code up to do this anyway. 57 */ 58 ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); 59 if (!ip) 60 return NULL; 61 if (inode_init_always(mp->m_super, VFS_I(ip))) { 62 kmem_zone_free(xfs_inode_zone, ip); 63 return NULL; 64 } 65 66 XFS_STATS_INC(mp, vn_active); 67 ASSERT(atomic_read(&ip->i_pincount) == 0); 68 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 69 ASSERT(!xfs_isiflocked(ip)); 70 ASSERT(ip->i_ino == 0); 71 72 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 73 74 /* initialise the xfs inode */ 75 ip->i_ino = ino; 76 ip->i_mount = mp; 77 memset(&ip->i_imap, 0, sizeof(struct xfs_imap)); 78 ip->i_afp = NULL; 79 memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); 80 ip->i_flags = 0; 81 ip->i_delayed_blks = 0; 82 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); 83 84 return ip; 85 } 86 87 STATIC void 88 xfs_inode_free_callback( 89 struct rcu_head *head) 90 { 91 struct inode *inode = container_of(head, struct inode, i_rcu); 92 struct xfs_inode *ip = XFS_I(inode); 93 94 kmem_zone_free(xfs_inode_zone, ip); 95 } 96 97 void 98 xfs_inode_free( 99 struct xfs_inode *ip) 100 { 101 switch (ip->i_d.di_mode & S_IFMT) { 102 case S_IFREG: 103 case S_IFDIR: 104 case S_IFLNK: 105 xfs_idestroy_fork(ip, XFS_DATA_FORK); 106 break; 107 } 108 109 if (ip->i_afp) 110 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 111 112 if (ip->i_itemp) { 113 ASSERT(!(ip->i_itemp->ili_item.li_flags & XFS_LI_IN_AIL)); 114 xfs_inode_item_destroy(ip); 115 ip->i_itemp = NULL; 116 } 117 118 /* 119 * Because we use RCU freeing we need to ensure the inode always 120 * appears to be reclaimed with an invalid inode number when in the 121 * free state. The ip->i_flags_lock provides the barrier against lookup 122 * races. 123 */ 124 spin_lock(&ip->i_flags_lock); 125 ip->i_flags = XFS_IRECLAIM; 126 ip->i_ino = 0; 127 spin_unlock(&ip->i_flags_lock); 128 129 /* asserts to verify all state is correct here */ 130 ASSERT(atomic_read(&ip->i_pincount) == 0); 131 ASSERT(!xfs_isiflocked(ip)); 132 XFS_STATS_DEC(ip->i_mount, vn_active); 133 134 call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); 135 } 136 137 /* 138 * Check the validity of the inode we just found it the cache 139 */ 140 static int 141 xfs_iget_cache_hit( 142 struct xfs_perag *pag, 143 struct xfs_inode *ip, 144 xfs_ino_t ino, 145 int flags, 146 int lock_flags) __releases(RCU) 147 { 148 struct inode *inode = VFS_I(ip); 149 struct xfs_mount *mp = ip->i_mount; 150 int error; 151 152 /* 153 * check for re-use of an inode within an RCU grace period due to the 154 * radix tree nodes not being updated yet. We monitor for this by 155 * setting the inode number to zero before freeing the inode structure. 156 * If the inode has been reallocated and set up, then the inode number 157 * will not match, so check for that, too. 158 */ 159 spin_lock(&ip->i_flags_lock); 160 if (ip->i_ino != ino) { 161 trace_xfs_iget_skip(ip); 162 XFS_STATS_INC(mp, xs_ig_frecycle); 163 error = -EAGAIN; 164 goto out_error; 165 } 166 167 168 /* 169 * If we are racing with another cache hit that is currently 170 * instantiating this inode or currently recycling it out of 171 * reclaimabe state, wait for the initialisation to complete 172 * before continuing. 173 * 174 * XXX(hch): eventually we should do something equivalent to 175 * wait_on_inode to wait for these flags to be cleared 176 * instead of polling for it. 177 */ 178 if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { 179 trace_xfs_iget_skip(ip); 180 XFS_STATS_INC(mp, xs_ig_frecycle); 181 error = -EAGAIN; 182 goto out_error; 183 } 184 185 /* 186 * If lookup is racing with unlink return an error immediately. 187 */ 188 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { 189 error = -ENOENT; 190 goto out_error; 191 } 192 193 /* 194 * If IRECLAIMABLE is set, we've torn down the VFS inode already. 195 * Need to carefully get it back into useable state. 196 */ 197 if (ip->i_flags & XFS_IRECLAIMABLE) { 198 trace_xfs_iget_reclaim(ip); 199 200 /* 201 * We need to set XFS_IRECLAIM to prevent xfs_reclaim_inode 202 * from stomping over us while we recycle the inode. We can't 203 * clear the radix tree reclaimable tag yet as it requires 204 * pag_ici_lock to be held exclusive. 205 */ 206 ip->i_flags |= XFS_IRECLAIM; 207 208 spin_unlock(&ip->i_flags_lock); 209 rcu_read_unlock(); 210 211 error = inode_init_always(mp->m_super, inode); 212 if (error) { 213 /* 214 * Re-initializing the inode failed, and we are in deep 215 * trouble. Try to re-add it to the reclaim list. 216 */ 217 rcu_read_lock(); 218 spin_lock(&ip->i_flags_lock); 219 220 ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM); 221 ASSERT(ip->i_flags & XFS_IRECLAIMABLE); 222 trace_xfs_iget_reclaim_fail(ip); 223 goto out_error; 224 } 225 226 spin_lock(&pag->pag_ici_lock); 227 spin_lock(&ip->i_flags_lock); 228 229 /* 230 * Clear the per-lifetime state in the inode as we are now 231 * effectively a new inode and need to return to the initial 232 * state before reuse occurs. 233 */ 234 ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS; 235 ip->i_flags |= XFS_INEW; 236 __xfs_inode_clear_reclaim_tag(mp, pag, ip); 237 inode->i_state = I_NEW; 238 239 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); 240 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 241 242 spin_unlock(&ip->i_flags_lock); 243 spin_unlock(&pag->pag_ici_lock); 244 } else { 245 /* If the VFS inode is being torn down, pause and try again. */ 246 if (!igrab(inode)) { 247 trace_xfs_iget_skip(ip); 248 error = -EAGAIN; 249 goto out_error; 250 } 251 252 /* We've got a live one. */ 253 spin_unlock(&ip->i_flags_lock); 254 rcu_read_unlock(); 255 trace_xfs_iget_hit(ip); 256 } 257 258 if (lock_flags != 0) 259 xfs_ilock(ip, lock_flags); 260 261 xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE); 262 XFS_STATS_INC(mp, xs_ig_found); 263 264 return 0; 265 266 out_error: 267 spin_unlock(&ip->i_flags_lock); 268 rcu_read_unlock(); 269 return error; 270 } 271 272 273 static int 274 xfs_iget_cache_miss( 275 struct xfs_mount *mp, 276 struct xfs_perag *pag, 277 xfs_trans_t *tp, 278 xfs_ino_t ino, 279 struct xfs_inode **ipp, 280 int flags, 281 int lock_flags) 282 { 283 struct xfs_inode *ip; 284 int error; 285 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino); 286 int iflags; 287 288 ip = xfs_inode_alloc(mp, ino); 289 if (!ip) 290 return -ENOMEM; 291 292 error = xfs_iread(mp, tp, ip, flags); 293 if (error) 294 goto out_destroy; 295 296 trace_xfs_iget_miss(ip); 297 298 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { 299 error = -ENOENT; 300 goto out_destroy; 301 } 302 303 /* 304 * Preload the radix tree so we can insert safely under the 305 * write spinlock. Note that we cannot sleep inside the preload 306 * region. Since we can be called from transaction context, don't 307 * recurse into the file system. 308 */ 309 if (radix_tree_preload(GFP_NOFS)) { 310 error = -EAGAIN; 311 goto out_destroy; 312 } 313 314 /* 315 * Because the inode hasn't been added to the radix-tree yet it can't 316 * be found by another thread, so we can do the non-sleeping lock here. 317 */ 318 if (lock_flags) { 319 if (!xfs_ilock_nowait(ip, lock_flags)) 320 BUG(); 321 } 322 323 /* 324 * These values must be set before inserting the inode into the radix 325 * tree as the moment it is inserted a concurrent lookup (allowed by the 326 * RCU locking mechanism) can find it and that lookup must see that this 327 * is an inode currently under construction (i.e. that XFS_INEW is set). 328 * The ip->i_flags_lock that protects the XFS_INEW flag forms the 329 * memory barrier that ensures this detection works correctly at lookup 330 * time. 331 */ 332 iflags = XFS_INEW; 333 if (flags & XFS_IGET_DONTCACHE) 334 iflags |= XFS_IDONTCACHE; 335 ip->i_udquot = NULL; 336 ip->i_gdquot = NULL; 337 ip->i_pdquot = NULL; 338 xfs_iflags_set(ip, iflags); 339 340 /* insert the new inode */ 341 spin_lock(&pag->pag_ici_lock); 342 error = radix_tree_insert(&pag->pag_ici_root, agino, ip); 343 if (unlikely(error)) { 344 WARN_ON(error != -EEXIST); 345 XFS_STATS_INC(mp, xs_ig_dup); 346 error = -EAGAIN; 347 goto out_preload_end; 348 } 349 spin_unlock(&pag->pag_ici_lock); 350 radix_tree_preload_end(); 351 352 *ipp = ip; 353 return 0; 354 355 out_preload_end: 356 spin_unlock(&pag->pag_ici_lock); 357 radix_tree_preload_end(); 358 if (lock_flags) 359 xfs_iunlock(ip, lock_flags); 360 out_destroy: 361 __destroy_inode(VFS_I(ip)); 362 xfs_inode_free(ip); 363 return error; 364 } 365 366 /* 367 * Look up an inode by number in the given file system. 368 * The inode is looked up in the cache held in each AG. 369 * If the inode is found in the cache, initialise the vfs inode 370 * if necessary. 371 * 372 * If it is not in core, read it in from the file system's device, 373 * add it to the cache and initialise the vfs inode. 374 * 375 * The inode is locked according to the value of the lock_flags parameter. 376 * This flag parameter indicates how and if the inode's IO lock and inode lock 377 * should be taken. 378 * 379 * mp -- the mount point structure for the current file system. It points 380 * to the inode hash table. 381 * tp -- a pointer to the current transaction if there is one. This is 382 * simply passed through to the xfs_iread() call. 383 * ino -- the number of the inode desired. This is the unique identifier 384 * within the file system for the inode being requested. 385 * lock_flags -- flags indicating how to lock the inode. See the comment 386 * for xfs_ilock() for a list of valid values. 387 */ 388 int 389 xfs_iget( 390 xfs_mount_t *mp, 391 xfs_trans_t *tp, 392 xfs_ino_t ino, 393 uint flags, 394 uint lock_flags, 395 xfs_inode_t **ipp) 396 { 397 xfs_inode_t *ip; 398 int error; 399 xfs_perag_t *pag; 400 xfs_agino_t agino; 401 402 /* 403 * xfs_reclaim_inode() uses the ILOCK to ensure an inode 404 * doesn't get freed while it's being referenced during a 405 * radix tree traversal here. It assumes this function 406 * aqcuires only the ILOCK (and therefore it has no need to 407 * involve the IOLOCK in this synchronization). 408 */ 409 ASSERT((lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) == 0); 410 411 /* reject inode numbers outside existing AGs */ 412 if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) 413 return -EINVAL; 414 415 XFS_STATS_INC(mp, xs_ig_attempts); 416 417 /* get the perag structure and ensure that it's inode capable */ 418 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); 419 agino = XFS_INO_TO_AGINO(mp, ino); 420 421 again: 422 error = 0; 423 rcu_read_lock(); 424 ip = radix_tree_lookup(&pag->pag_ici_root, agino); 425 426 if (ip) { 427 error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags); 428 if (error) 429 goto out_error_or_again; 430 } else { 431 rcu_read_unlock(); 432 XFS_STATS_INC(mp, xs_ig_missed); 433 434 error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, 435 flags, lock_flags); 436 if (error) 437 goto out_error_or_again; 438 } 439 xfs_perag_put(pag); 440 441 *ipp = ip; 442 443 /* 444 * If we have a real type for an on-disk inode, we can setup the inode 445 * now. If it's a new inode being created, xfs_ialloc will handle it. 446 */ 447 if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0) 448 xfs_setup_existing_inode(ip); 449 return 0; 450 451 out_error_or_again: 452 if (error == -EAGAIN) { 453 delay(1); 454 goto again; 455 } 456 xfs_perag_put(pag); 457 return error; 458 } 459 460 /* 461 * The inode lookup is done in batches to keep the amount of lock traffic and 462 * radix tree lookups to a minimum. The batch size is a trade off between 463 * lookup reduction and stack usage. This is in the reclaim path, so we can't 464 * be too greedy. 465 */ 466 #define XFS_LOOKUP_BATCH 32 467 468 STATIC int 469 xfs_inode_ag_walk_grab( 470 struct xfs_inode *ip) 471 { 472 struct inode *inode = VFS_I(ip); 473 474 ASSERT(rcu_read_lock_held()); 475 476 /* 477 * check for stale RCU freed inode 478 * 479 * If the inode has been reallocated, it doesn't matter if it's not in 480 * the AG we are walking - we are walking for writeback, so if it 481 * passes all the "valid inode" checks and is dirty, then we'll write 482 * it back anyway. If it has been reallocated and still being 483 * initialised, the XFS_INEW check below will catch it. 484 */ 485 spin_lock(&ip->i_flags_lock); 486 if (!ip->i_ino) 487 goto out_unlock_noent; 488 489 /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ 490 if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) 491 goto out_unlock_noent; 492 spin_unlock(&ip->i_flags_lock); 493 494 /* nothing to sync during shutdown */ 495 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 496 return -EFSCORRUPTED; 497 498 /* If we can't grab the inode, it must on it's way to reclaim. */ 499 if (!igrab(inode)) 500 return -ENOENT; 501 502 /* inode is valid */ 503 return 0; 504 505 out_unlock_noent: 506 spin_unlock(&ip->i_flags_lock); 507 return -ENOENT; 508 } 509 510 STATIC int 511 xfs_inode_ag_walk( 512 struct xfs_mount *mp, 513 struct xfs_perag *pag, 514 int (*execute)(struct xfs_inode *ip, int flags, 515 void *args), 516 int flags, 517 void *args, 518 int tag) 519 { 520 uint32_t first_index; 521 int last_error = 0; 522 int skipped; 523 int done; 524 int nr_found; 525 526 restart: 527 done = 0; 528 skipped = 0; 529 first_index = 0; 530 nr_found = 0; 531 do { 532 struct xfs_inode *batch[XFS_LOOKUP_BATCH]; 533 int error = 0; 534 int i; 535 536 rcu_read_lock(); 537 538 if (tag == -1) 539 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, 540 (void **)batch, first_index, 541 XFS_LOOKUP_BATCH); 542 else 543 nr_found = radix_tree_gang_lookup_tag( 544 &pag->pag_ici_root, 545 (void **) batch, first_index, 546 XFS_LOOKUP_BATCH, tag); 547 548 if (!nr_found) { 549 rcu_read_unlock(); 550 break; 551 } 552 553 /* 554 * Grab the inodes before we drop the lock. if we found 555 * nothing, nr == 0 and the loop will be skipped. 556 */ 557 for (i = 0; i < nr_found; i++) { 558 struct xfs_inode *ip = batch[i]; 559 560 if (done || xfs_inode_ag_walk_grab(ip)) 561 batch[i] = NULL; 562 563 /* 564 * Update the index for the next lookup. Catch 565 * overflows into the next AG range which can occur if 566 * we have inodes in the last block of the AG and we 567 * are currently pointing to the last inode. 568 * 569 * Because we may see inodes that are from the wrong AG 570 * due to RCU freeing and reallocation, only update the 571 * index if it lies in this AG. It was a race that lead 572 * us to see this inode, so another lookup from the 573 * same index will not find it again. 574 */ 575 if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno) 576 continue; 577 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 578 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 579 done = 1; 580 } 581 582 /* unlock now we've grabbed the inodes. */ 583 rcu_read_unlock(); 584 585 for (i = 0; i < nr_found; i++) { 586 if (!batch[i]) 587 continue; 588 error = execute(batch[i], flags, args); 589 IRELE(batch[i]); 590 if (error == -EAGAIN) { 591 skipped++; 592 continue; 593 } 594 if (error && last_error != -EFSCORRUPTED) 595 last_error = error; 596 } 597 598 /* bail out if the filesystem is corrupted. */ 599 if (error == -EFSCORRUPTED) 600 break; 601 602 cond_resched(); 603 604 } while (nr_found && !done); 605 606 if (skipped) { 607 delay(1); 608 goto restart; 609 } 610 return last_error; 611 } 612 613 /* 614 * Background scanning to trim post-EOF preallocated space. This is queued 615 * based on the 'speculative_prealloc_lifetime' tunable (5m by default). 616 */ 617 STATIC void 618 xfs_queue_eofblocks( 619 struct xfs_mount *mp) 620 { 621 rcu_read_lock(); 622 if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_EOFBLOCKS_TAG)) 623 queue_delayed_work(mp->m_eofblocks_workqueue, 624 &mp->m_eofblocks_work, 625 msecs_to_jiffies(xfs_eofb_secs * 1000)); 626 rcu_read_unlock(); 627 } 628 629 void 630 xfs_eofblocks_worker( 631 struct work_struct *work) 632 { 633 struct xfs_mount *mp = container_of(to_delayed_work(work), 634 struct xfs_mount, m_eofblocks_work); 635 xfs_icache_free_eofblocks(mp, NULL); 636 xfs_queue_eofblocks(mp); 637 } 638 639 int 640 xfs_inode_ag_iterator( 641 struct xfs_mount *mp, 642 int (*execute)(struct xfs_inode *ip, int flags, 643 void *args), 644 int flags, 645 void *args) 646 { 647 struct xfs_perag *pag; 648 int error = 0; 649 int last_error = 0; 650 xfs_agnumber_t ag; 651 652 ag = 0; 653 while ((pag = xfs_perag_get(mp, ag))) { 654 ag = pag->pag_agno + 1; 655 error = xfs_inode_ag_walk(mp, pag, execute, flags, args, -1); 656 xfs_perag_put(pag); 657 if (error) { 658 last_error = error; 659 if (error == -EFSCORRUPTED) 660 break; 661 } 662 } 663 return last_error; 664 } 665 666 int 667 xfs_inode_ag_iterator_tag( 668 struct xfs_mount *mp, 669 int (*execute)(struct xfs_inode *ip, int flags, 670 void *args), 671 int flags, 672 void *args, 673 int tag) 674 { 675 struct xfs_perag *pag; 676 int error = 0; 677 int last_error = 0; 678 xfs_agnumber_t ag; 679 680 ag = 0; 681 while ((pag = xfs_perag_get_tag(mp, ag, tag))) { 682 ag = pag->pag_agno + 1; 683 error = xfs_inode_ag_walk(mp, pag, execute, flags, args, tag); 684 xfs_perag_put(pag); 685 if (error) { 686 last_error = error; 687 if (error == -EFSCORRUPTED) 688 break; 689 } 690 } 691 return last_error; 692 } 693 694 /* 695 * Queue a new inode reclaim pass if there are reclaimable inodes and there 696 * isn't a reclaim pass already in progress. By default it runs every 5s based 697 * on the xfs periodic sync default of 30s. Perhaps this should have it's own 698 * tunable, but that can be done if this method proves to be ineffective or too 699 * aggressive. 700 */ 701 static void 702 xfs_reclaim_work_queue( 703 struct xfs_mount *mp) 704 { 705 706 rcu_read_lock(); 707 if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { 708 queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work, 709 msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); 710 } 711 rcu_read_unlock(); 712 } 713 714 /* 715 * This is a fast pass over the inode cache to try to get reclaim moving on as 716 * many inodes as possible in a short period of time. It kicks itself every few 717 * seconds, as well as being kicked by the inode cache shrinker when memory 718 * goes low. It scans as quickly as possible avoiding locked inodes or those 719 * already being flushed, and once done schedules a future pass. 720 */ 721 void 722 xfs_reclaim_worker( 723 struct work_struct *work) 724 { 725 struct xfs_mount *mp = container_of(to_delayed_work(work), 726 struct xfs_mount, m_reclaim_work); 727 728 xfs_reclaim_inodes(mp, SYNC_TRYLOCK); 729 xfs_reclaim_work_queue(mp); 730 } 731 732 static void 733 __xfs_inode_set_reclaim_tag( 734 struct xfs_perag *pag, 735 struct xfs_inode *ip) 736 { 737 radix_tree_tag_set(&pag->pag_ici_root, 738 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), 739 XFS_ICI_RECLAIM_TAG); 740 741 if (!pag->pag_ici_reclaimable) { 742 /* propagate the reclaim tag up into the perag radix tree */ 743 spin_lock(&ip->i_mount->m_perag_lock); 744 radix_tree_tag_set(&ip->i_mount->m_perag_tree, 745 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), 746 XFS_ICI_RECLAIM_TAG); 747 spin_unlock(&ip->i_mount->m_perag_lock); 748 749 /* schedule periodic background inode reclaim */ 750 xfs_reclaim_work_queue(ip->i_mount); 751 752 trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, 753 -1, _RET_IP_); 754 } 755 pag->pag_ici_reclaimable++; 756 } 757 758 /* 759 * We set the inode flag atomically with the radix tree tag. 760 * Once we get tag lookups on the radix tree, this inode flag 761 * can go away. 762 */ 763 void 764 xfs_inode_set_reclaim_tag( 765 xfs_inode_t *ip) 766 { 767 struct xfs_mount *mp = ip->i_mount; 768 struct xfs_perag *pag; 769 770 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 771 spin_lock(&pag->pag_ici_lock); 772 spin_lock(&ip->i_flags_lock); 773 __xfs_inode_set_reclaim_tag(pag, ip); 774 __xfs_iflags_set(ip, XFS_IRECLAIMABLE); 775 spin_unlock(&ip->i_flags_lock); 776 spin_unlock(&pag->pag_ici_lock); 777 xfs_perag_put(pag); 778 } 779 780 STATIC void 781 __xfs_inode_clear_reclaim( 782 xfs_perag_t *pag, 783 xfs_inode_t *ip) 784 { 785 pag->pag_ici_reclaimable--; 786 if (!pag->pag_ici_reclaimable) { 787 /* clear the reclaim tag from the perag radix tree */ 788 spin_lock(&ip->i_mount->m_perag_lock); 789 radix_tree_tag_clear(&ip->i_mount->m_perag_tree, 790 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), 791 XFS_ICI_RECLAIM_TAG); 792 spin_unlock(&ip->i_mount->m_perag_lock); 793 trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno, 794 -1, _RET_IP_); 795 } 796 } 797 798 STATIC void 799 __xfs_inode_clear_reclaim_tag( 800 xfs_mount_t *mp, 801 xfs_perag_t *pag, 802 xfs_inode_t *ip) 803 { 804 radix_tree_tag_clear(&pag->pag_ici_root, 805 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); 806 __xfs_inode_clear_reclaim(pag, ip); 807 } 808 809 /* 810 * Grab the inode for reclaim exclusively. 811 * Return 0 if we grabbed it, non-zero otherwise. 812 */ 813 STATIC int 814 xfs_reclaim_inode_grab( 815 struct xfs_inode *ip, 816 int flags) 817 { 818 ASSERT(rcu_read_lock_held()); 819 820 /* quick check for stale RCU freed inode */ 821 if (!ip->i_ino) 822 return 1; 823 824 /* 825 * If we are asked for non-blocking operation, do unlocked checks to 826 * see if the inode already is being flushed or in reclaim to avoid 827 * lock traffic. 828 */ 829 if ((flags & SYNC_TRYLOCK) && 830 __xfs_iflags_test(ip, XFS_IFLOCK | XFS_IRECLAIM)) 831 return 1; 832 833 /* 834 * The radix tree lock here protects a thread in xfs_iget from racing 835 * with us starting reclaim on the inode. Once we have the 836 * XFS_IRECLAIM flag set it will not touch us. 837 * 838 * Due to RCU lookup, we may find inodes that have been freed and only 839 * have XFS_IRECLAIM set. Indeed, we may see reallocated inodes that 840 * aren't candidates for reclaim at all, so we must check the 841 * XFS_IRECLAIMABLE is set first before proceeding to reclaim. 842 */ 843 spin_lock(&ip->i_flags_lock); 844 if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) || 845 __xfs_iflags_test(ip, XFS_IRECLAIM)) { 846 /* not a reclaim candidate. */ 847 spin_unlock(&ip->i_flags_lock); 848 return 1; 849 } 850 __xfs_iflags_set(ip, XFS_IRECLAIM); 851 spin_unlock(&ip->i_flags_lock); 852 return 0; 853 } 854 855 /* 856 * Inodes in different states need to be treated differently. The following 857 * table lists the inode states and the reclaim actions necessary: 858 * 859 * inode state iflush ret required action 860 * --------------- ---------- --------------- 861 * bad - reclaim 862 * shutdown EIO unpin and reclaim 863 * clean, unpinned 0 reclaim 864 * stale, unpinned 0 reclaim 865 * clean, pinned(*) 0 requeue 866 * stale, pinned EAGAIN requeue 867 * dirty, async - requeue 868 * dirty, sync 0 reclaim 869 * 870 * (*) dgc: I don't think the clean, pinned state is possible but it gets 871 * handled anyway given the order of checks implemented. 872 * 873 * Also, because we get the flush lock first, we know that any inode that has 874 * been flushed delwri has had the flush completed by the time we check that 875 * the inode is clean. 876 * 877 * Note that because the inode is flushed delayed write by AIL pushing, the 878 * flush lock may already be held here and waiting on it can result in very 879 * long latencies. Hence for sync reclaims, where we wait on the flush lock, 880 * the caller should push the AIL first before trying to reclaim inodes to 881 * minimise the amount of time spent waiting. For background relaim, we only 882 * bother to reclaim clean inodes anyway. 883 * 884 * Hence the order of actions after gaining the locks should be: 885 * bad => reclaim 886 * shutdown => unpin and reclaim 887 * pinned, async => requeue 888 * pinned, sync => unpin 889 * stale => reclaim 890 * clean => reclaim 891 * dirty, async => requeue 892 * dirty, sync => flush, wait and reclaim 893 */ 894 STATIC int 895 xfs_reclaim_inode( 896 struct xfs_inode *ip, 897 struct xfs_perag *pag, 898 int sync_mode) 899 { 900 struct xfs_buf *bp = NULL; 901 int error; 902 903 restart: 904 error = 0; 905 xfs_ilock(ip, XFS_ILOCK_EXCL); 906 if (!xfs_iflock_nowait(ip)) { 907 if (!(sync_mode & SYNC_WAIT)) 908 goto out; 909 xfs_iflock(ip); 910 } 911 912 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 913 xfs_iunpin_wait(ip); 914 xfs_iflush_abort(ip, false); 915 goto reclaim; 916 } 917 if (xfs_ipincount(ip)) { 918 if (!(sync_mode & SYNC_WAIT)) 919 goto out_ifunlock; 920 xfs_iunpin_wait(ip); 921 } 922 if (xfs_iflags_test(ip, XFS_ISTALE)) 923 goto reclaim; 924 if (xfs_inode_clean(ip)) 925 goto reclaim; 926 927 /* 928 * Never flush out dirty data during non-blocking reclaim, as it would 929 * just contend with AIL pushing trying to do the same job. 930 */ 931 if (!(sync_mode & SYNC_WAIT)) 932 goto out_ifunlock; 933 934 /* 935 * Now we have an inode that needs flushing. 936 * 937 * Note that xfs_iflush will never block on the inode buffer lock, as 938 * xfs_ifree_cluster() can lock the inode buffer before it locks the 939 * ip->i_lock, and we are doing the exact opposite here. As a result, 940 * doing a blocking xfs_imap_to_bp() to get the cluster buffer would 941 * result in an ABBA deadlock with xfs_ifree_cluster(). 942 * 943 * As xfs_ifree_cluser() must gather all inodes that are active in the 944 * cache to mark them stale, if we hit this case we don't actually want 945 * to do IO here - we want the inode marked stale so we can simply 946 * reclaim it. Hence if we get an EAGAIN error here, just unlock the 947 * inode, back off and try again. Hopefully the next pass through will 948 * see the stale flag set on the inode. 949 */ 950 error = xfs_iflush(ip, &bp); 951 if (error == -EAGAIN) { 952 xfs_iunlock(ip, XFS_ILOCK_EXCL); 953 /* backoff longer than in xfs_ifree_cluster */ 954 delay(2); 955 goto restart; 956 } 957 958 if (!error) { 959 error = xfs_bwrite(bp); 960 xfs_buf_relse(bp); 961 } 962 963 xfs_iflock(ip); 964 reclaim: 965 xfs_ifunlock(ip); 966 xfs_iunlock(ip, XFS_ILOCK_EXCL); 967 968 XFS_STATS_INC(ip->i_mount, xs_ig_reclaims); 969 /* 970 * Remove the inode from the per-AG radix tree. 971 * 972 * Because radix_tree_delete won't complain even if the item was never 973 * added to the tree assert that it's been there before to catch 974 * problems with the inode life time early on. 975 */ 976 spin_lock(&pag->pag_ici_lock); 977 if (!radix_tree_delete(&pag->pag_ici_root, 978 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) 979 ASSERT(0); 980 __xfs_inode_clear_reclaim(pag, ip); 981 spin_unlock(&pag->pag_ici_lock); 982 983 /* 984 * Here we do an (almost) spurious inode lock in order to coordinate 985 * with inode cache radix tree lookups. This is because the lookup 986 * can reference the inodes in the cache without taking references. 987 * 988 * We make that OK here by ensuring that we wait until the inode is 989 * unlocked after the lookup before we go ahead and free it. 990 */ 991 xfs_ilock(ip, XFS_ILOCK_EXCL); 992 xfs_qm_dqdetach(ip); 993 xfs_iunlock(ip, XFS_ILOCK_EXCL); 994 995 xfs_inode_free(ip); 996 return error; 997 998 out_ifunlock: 999 xfs_ifunlock(ip); 1000 out: 1001 xfs_iflags_clear(ip, XFS_IRECLAIM); 1002 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1003 /* 1004 * We could return -EAGAIN here to make reclaim rescan the inode tree in 1005 * a short while. However, this just burns CPU time scanning the tree 1006 * waiting for IO to complete and the reclaim work never goes back to 1007 * the idle state. Instead, return 0 to let the next scheduled 1008 * background reclaim attempt to reclaim the inode again. 1009 */ 1010 return 0; 1011 } 1012 1013 /* 1014 * Walk the AGs and reclaim the inodes in them. Even if the filesystem is 1015 * corrupted, we still want to try to reclaim all the inodes. If we don't, 1016 * then a shut down during filesystem unmount reclaim walk leak all the 1017 * unreclaimed inodes. 1018 */ 1019 STATIC int 1020 xfs_reclaim_inodes_ag( 1021 struct xfs_mount *mp, 1022 int flags, 1023 int *nr_to_scan) 1024 { 1025 struct xfs_perag *pag; 1026 int error = 0; 1027 int last_error = 0; 1028 xfs_agnumber_t ag; 1029 int trylock = flags & SYNC_TRYLOCK; 1030 int skipped; 1031 1032 restart: 1033 ag = 0; 1034 skipped = 0; 1035 while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { 1036 unsigned long first_index = 0; 1037 int done = 0; 1038 int nr_found = 0; 1039 1040 ag = pag->pag_agno + 1; 1041 1042 if (trylock) { 1043 if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) { 1044 skipped++; 1045 xfs_perag_put(pag); 1046 continue; 1047 } 1048 first_index = pag->pag_ici_reclaim_cursor; 1049 } else 1050 mutex_lock(&pag->pag_ici_reclaim_lock); 1051 1052 do { 1053 struct xfs_inode *batch[XFS_LOOKUP_BATCH]; 1054 int i; 1055 1056 rcu_read_lock(); 1057 nr_found = radix_tree_gang_lookup_tag( 1058 &pag->pag_ici_root, 1059 (void **)batch, first_index, 1060 XFS_LOOKUP_BATCH, 1061 XFS_ICI_RECLAIM_TAG); 1062 if (!nr_found) { 1063 done = 1; 1064 rcu_read_unlock(); 1065 break; 1066 } 1067 1068 /* 1069 * Grab the inodes before we drop the lock. if we found 1070 * nothing, nr == 0 and the loop will be skipped. 1071 */ 1072 for (i = 0; i < nr_found; i++) { 1073 struct xfs_inode *ip = batch[i]; 1074 1075 if (done || xfs_reclaim_inode_grab(ip, flags)) 1076 batch[i] = NULL; 1077 1078 /* 1079 * Update the index for the next lookup. Catch 1080 * overflows into the next AG range which can 1081 * occur if we have inodes in the last block of 1082 * the AG and we are currently pointing to the 1083 * last inode. 1084 * 1085 * Because we may see inodes that are from the 1086 * wrong AG due to RCU freeing and 1087 * reallocation, only update the index if it 1088 * lies in this AG. It was a race that lead us 1089 * to see this inode, so another lookup from 1090 * the same index will not find it again. 1091 */ 1092 if (XFS_INO_TO_AGNO(mp, ip->i_ino) != 1093 pag->pag_agno) 1094 continue; 1095 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 1096 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 1097 done = 1; 1098 } 1099 1100 /* unlock now we've grabbed the inodes. */ 1101 rcu_read_unlock(); 1102 1103 for (i = 0; i < nr_found; i++) { 1104 if (!batch[i]) 1105 continue; 1106 error = xfs_reclaim_inode(batch[i], pag, flags); 1107 if (error && last_error != -EFSCORRUPTED) 1108 last_error = error; 1109 } 1110 1111 *nr_to_scan -= XFS_LOOKUP_BATCH; 1112 1113 cond_resched(); 1114 1115 } while (nr_found && !done && *nr_to_scan > 0); 1116 1117 if (trylock && !done) 1118 pag->pag_ici_reclaim_cursor = first_index; 1119 else 1120 pag->pag_ici_reclaim_cursor = 0; 1121 mutex_unlock(&pag->pag_ici_reclaim_lock); 1122 xfs_perag_put(pag); 1123 } 1124 1125 /* 1126 * if we skipped any AG, and we still have scan count remaining, do 1127 * another pass this time using blocking reclaim semantics (i.e 1128 * waiting on the reclaim locks and ignoring the reclaim cursors). This 1129 * ensure that when we get more reclaimers than AGs we block rather 1130 * than spin trying to execute reclaim. 1131 */ 1132 if (skipped && (flags & SYNC_WAIT) && *nr_to_scan > 0) { 1133 trylock = 0; 1134 goto restart; 1135 } 1136 return last_error; 1137 } 1138 1139 int 1140 xfs_reclaim_inodes( 1141 xfs_mount_t *mp, 1142 int mode) 1143 { 1144 int nr_to_scan = INT_MAX; 1145 1146 return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan); 1147 } 1148 1149 /* 1150 * Scan a certain number of inodes for reclaim. 1151 * 1152 * When called we make sure that there is a background (fast) inode reclaim in 1153 * progress, while we will throttle the speed of reclaim via doing synchronous 1154 * reclaim of inodes. That means if we come across dirty inodes, we wait for 1155 * them to be cleaned, which we hope will not be very long due to the 1156 * background walker having already kicked the IO off on those dirty inodes. 1157 */ 1158 long 1159 xfs_reclaim_inodes_nr( 1160 struct xfs_mount *mp, 1161 int nr_to_scan) 1162 { 1163 /* kick background reclaimer and push the AIL */ 1164 xfs_reclaim_work_queue(mp); 1165 xfs_ail_push_all(mp->m_ail); 1166 1167 return xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan); 1168 } 1169 1170 /* 1171 * Return the number of reclaimable inodes in the filesystem for 1172 * the shrinker to determine how much to reclaim. 1173 */ 1174 int 1175 xfs_reclaim_inodes_count( 1176 struct xfs_mount *mp) 1177 { 1178 struct xfs_perag *pag; 1179 xfs_agnumber_t ag = 0; 1180 int reclaimable = 0; 1181 1182 while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { 1183 ag = pag->pag_agno + 1; 1184 reclaimable += pag->pag_ici_reclaimable; 1185 xfs_perag_put(pag); 1186 } 1187 return reclaimable; 1188 } 1189 1190 STATIC int 1191 xfs_inode_match_id( 1192 struct xfs_inode *ip, 1193 struct xfs_eofblocks *eofb) 1194 { 1195 if ((eofb->eof_flags & XFS_EOF_FLAGS_UID) && 1196 !uid_eq(VFS_I(ip)->i_uid, eofb->eof_uid)) 1197 return 0; 1198 1199 if ((eofb->eof_flags & XFS_EOF_FLAGS_GID) && 1200 !gid_eq(VFS_I(ip)->i_gid, eofb->eof_gid)) 1201 return 0; 1202 1203 if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) && 1204 xfs_get_projid(ip) != eofb->eof_prid) 1205 return 0; 1206 1207 return 1; 1208 } 1209 1210 /* 1211 * A union-based inode filtering algorithm. Process the inode if any of the 1212 * criteria match. This is for global/internal scans only. 1213 */ 1214 STATIC int 1215 xfs_inode_match_id_union( 1216 struct xfs_inode *ip, 1217 struct xfs_eofblocks *eofb) 1218 { 1219 if ((eofb->eof_flags & XFS_EOF_FLAGS_UID) && 1220 uid_eq(VFS_I(ip)->i_uid, eofb->eof_uid)) 1221 return 1; 1222 1223 if ((eofb->eof_flags & XFS_EOF_FLAGS_GID) && 1224 gid_eq(VFS_I(ip)->i_gid, eofb->eof_gid)) 1225 return 1; 1226 1227 if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) && 1228 xfs_get_projid(ip) == eofb->eof_prid) 1229 return 1; 1230 1231 return 0; 1232 } 1233 1234 STATIC int 1235 xfs_inode_free_eofblocks( 1236 struct xfs_inode *ip, 1237 int flags, 1238 void *args) 1239 { 1240 int ret; 1241 struct xfs_eofblocks *eofb = args; 1242 bool need_iolock = true; 1243 int match; 1244 1245 ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0)); 1246 1247 if (!xfs_can_free_eofblocks(ip, false)) { 1248 /* inode could be preallocated or append-only */ 1249 trace_xfs_inode_free_eofblocks_invalid(ip); 1250 xfs_inode_clear_eofblocks_tag(ip); 1251 return 0; 1252 } 1253 1254 /* 1255 * If the mapping is dirty the operation can block and wait for some 1256 * time. Unless we are waiting, skip it. 1257 */ 1258 if (!(flags & SYNC_WAIT) && 1259 mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY)) 1260 return 0; 1261 1262 if (eofb) { 1263 if (eofb->eof_flags & XFS_EOF_FLAGS_UNION) 1264 match = xfs_inode_match_id_union(ip, eofb); 1265 else 1266 match = xfs_inode_match_id(ip, eofb); 1267 if (!match) 1268 return 0; 1269 1270 /* skip the inode if the file size is too small */ 1271 if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && 1272 XFS_ISIZE(ip) < eofb->eof_min_file_size) 1273 return 0; 1274 1275 /* 1276 * A scan owner implies we already hold the iolock. Skip it in 1277 * xfs_free_eofblocks() to avoid deadlock. This also eliminates 1278 * the possibility of EAGAIN being returned. 1279 */ 1280 if (eofb->eof_scan_owner == ip->i_ino) 1281 need_iolock = false; 1282 } 1283 1284 ret = xfs_free_eofblocks(ip->i_mount, ip, need_iolock); 1285 1286 /* don't revisit the inode if we're not waiting */ 1287 if (ret == -EAGAIN && !(flags & SYNC_WAIT)) 1288 ret = 0; 1289 1290 return ret; 1291 } 1292 1293 int 1294 xfs_icache_free_eofblocks( 1295 struct xfs_mount *mp, 1296 struct xfs_eofblocks *eofb) 1297 { 1298 int flags = SYNC_TRYLOCK; 1299 1300 if (eofb && (eofb->eof_flags & XFS_EOF_FLAGS_SYNC)) 1301 flags = SYNC_WAIT; 1302 1303 return xfs_inode_ag_iterator_tag(mp, xfs_inode_free_eofblocks, flags, 1304 eofb, XFS_ICI_EOFBLOCKS_TAG); 1305 } 1306 1307 /* 1308 * Run eofblocks scans on the quotas applicable to the inode. For inodes with 1309 * multiple quotas, we don't know exactly which quota caused an allocation 1310 * failure. We make a best effort by including each quota under low free space 1311 * conditions (less than 1% free space) in the scan. 1312 */ 1313 int 1314 xfs_inode_free_quota_eofblocks( 1315 struct xfs_inode *ip) 1316 { 1317 int scan = 0; 1318 struct xfs_eofblocks eofb = {0}; 1319 struct xfs_dquot *dq; 1320 1321 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 1322 1323 /* 1324 * Set the scan owner to avoid a potential livelock. Otherwise, the scan 1325 * can repeatedly trylock on the inode we're currently processing. We 1326 * run a sync scan to increase effectiveness and use the union filter to 1327 * cover all applicable quotas in a single scan. 1328 */ 1329 eofb.eof_scan_owner = ip->i_ino; 1330 eofb.eof_flags = XFS_EOF_FLAGS_UNION|XFS_EOF_FLAGS_SYNC; 1331 1332 if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) { 1333 dq = xfs_inode_dquot(ip, XFS_DQ_USER); 1334 if (dq && xfs_dquot_lowsp(dq)) { 1335 eofb.eof_uid = VFS_I(ip)->i_uid; 1336 eofb.eof_flags |= XFS_EOF_FLAGS_UID; 1337 scan = 1; 1338 } 1339 } 1340 1341 if (XFS_IS_GQUOTA_ENFORCED(ip->i_mount)) { 1342 dq = xfs_inode_dquot(ip, XFS_DQ_GROUP); 1343 if (dq && xfs_dquot_lowsp(dq)) { 1344 eofb.eof_gid = VFS_I(ip)->i_gid; 1345 eofb.eof_flags |= XFS_EOF_FLAGS_GID; 1346 scan = 1; 1347 } 1348 } 1349 1350 if (scan) 1351 xfs_icache_free_eofblocks(ip->i_mount, &eofb); 1352 1353 return scan; 1354 } 1355 1356 void 1357 xfs_inode_set_eofblocks_tag( 1358 xfs_inode_t *ip) 1359 { 1360 struct xfs_mount *mp = ip->i_mount; 1361 struct xfs_perag *pag; 1362 int tagged; 1363 1364 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 1365 spin_lock(&pag->pag_ici_lock); 1366 trace_xfs_inode_set_eofblocks_tag(ip); 1367 1368 tagged = radix_tree_tagged(&pag->pag_ici_root, 1369 XFS_ICI_EOFBLOCKS_TAG); 1370 radix_tree_tag_set(&pag->pag_ici_root, 1371 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), 1372 XFS_ICI_EOFBLOCKS_TAG); 1373 if (!tagged) { 1374 /* propagate the eofblocks tag up into the perag radix tree */ 1375 spin_lock(&ip->i_mount->m_perag_lock); 1376 radix_tree_tag_set(&ip->i_mount->m_perag_tree, 1377 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), 1378 XFS_ICI_EOFBLOCKS_TAG); 1379 spin_unlock(&ip->i_mount->m_perag_lock); 1380 1381 /* kick off background trimming */ 1382 xfs_queue_eofblocks(ip->i_mount); 1383 1384 trace_xfs_perag_set_eofblocks(ip->i_mount, pag->pag_agno, 1385 -1, _RET_IP_); 1386 } 1387 1388 spin_unlock(&pag->pag_ici_lock); 1389 xfs_perag_put(pag); 1390 } 1391 1392 void 1393 xfs_inode_clear_eofblocks_tag( 1394 xfs_inode_t *ip) 1395 { 1396 struct xfs_mount *mp = ip->i_mount; 1397 struct xfs_perag *pag; 1398 1399 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 1400 spin_lock(&pag->pag_ici_lock); 1401 trace_xfs_inode_clear_eofblocks_tag(ip); 1402 1403 radix_tree_tag_clear(&pag->pag_ici_root, 1404 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), 1405 XFS_ICI_EOFBLOCKS_TAG); 1406 if (!radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_EOFBLOCKS_TAG)) { 1407 /* clear the eofblocks tag from the perag radix tree */ 1408 spin_lock(&ip->i_mount->m_perag_lock); 1409 radix_tree_tag_clear(&ip->i_mount->m_perag_tree, 1410 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), 1411 XFS_ICI_EOFBLOCKS_TAG); 1412 spin_unlock(&ip->i_mount->m_perag_lock); 1413 trace_xfs_perag_clear_eofblocks(ip->i_mount, pag->pag_agno, 1414 -1, _RET_IP_); 1415 } 1416 1417 spin_unlock(&pag->pag_ici_lock); 1418 xfs_perag_put(pag); 1419 } 1420 1421