1 /* 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #include "xfs.h" 19 #include "xfs_fs.h" 20 #include "xfs_format.h" 21 #include "xfs_log_format.h" 22 #include "xfs_trans_resv.h" 23 #include "xfs_sb.h" 24 #include "xfs_mount.h" 25 #include "xfs_inode.h" 26 #include "xfs_error.h" 27 #include "xfs_trans.h" 28 #include "xfs_trans_priv.h" 29 #include "xfs_inode_item.h" 30 #include "xfs_quota.h" 31 #include "xfs_trace.h" 32 #include "xfs_icache.h" 33 #include "xfs_bmap_util.h" 34 #include "xfs_dquot_item.h" 35 #include "xfs_dquot.h" 36 37 #include <linux/kthread.h> 38 #include <linux/freezer.h> 39 40 STATIC void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, 41 struct xfs_perag *pag, struct xfs_inode *ip); 42 43 /* 44 * Allocate and initialise an xfs_inode. 45 */ 46 struct xfs_inode * 47 xfs_inode_alloc( 48 struct xfs_mount *mp, 49 xfs_ino_t ino) 50 { 51 struct xfs_inode *ip; 52 53 /* 54 * if this didn't occur in transactions, we could use 55 * KM_MAYFAIL and return NULL here on ENOMEM. Set the 56 * code up to do this anyway. 57 */ 58 ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); 59 if (!ip) 60 return NULL; 61 if (inode_init_always(mp->m_super, VFS_I(ip))) { 62 kmem_zone_free(xfs_inode_zone, ip); 63 return NULL; 64 } 65 66 /* VFS doesn't initialise i_mode! */ 67 VFS_I(ip)->i_mode = 0; 68 69 XFS_STATS_INC(mp, vn_active); 70 ASSERT(atomic_read(&ip->i_pincount) == 0); 71 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 72 ASSERT(!xfs_isiflocked(ip)); 73 ASSERT(ip->i_ino == 0); 74 75 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 76 77 /* initialise the xfs inode */ 78 ip->i_ino = ino; 79 ip->i_mount = mp; 80 memset(&ip->i_imap, 0, sizeof(struct xfs_imap)); 81 ip->i_afp = NULL; 82 memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); 83 ip->i_flags = 0; 84 ip->i_delayed_blks = 0; 85 memset(&ip->i_d, 0, sizeof(ip->i_d)); 86 87 return ip; 88 } 89 90 STATIC void 91 xfs_inode_free_callback( 92 struct rcu_head *head) 93 { 94 struct inode *inode = container_of(head, struct inode, i_rcu); 95 struct xfs_inode *ip = XFS_I(inode); 96 97 kmem_zone_free(xfs_inode_zone, ip); 98 } 99 100 void 101 xfs_inode_free( 102 struct xfs_inode *ip) 103 { 104 switch (VFS_I(ip)->i_mode & S_IFMT) { 105 case S_IFREG: 106 case S_IFDIR: 107 case S_IFLNK: 108 xfs_idestroy_fork(ip, XFS_DATA_FORK); 109 break; 110 } 111 112 if (ip->i_afp) 113 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 114 115 if (ip->i_itemp) { 116 ASSERT(!(ip->i_itemp->ili_item.li_flags & XFS_LI_IN_AIL)); 117 xfs_inode_item_destroy(ip); 118 ip->i_itemp = NULL; 119 } 120 121 /* 122 * Because we use RCU freeing we need to ensure the inode always 123 * appears to be reclaimed with an invalid inode number when in the 124 * free state. The ip->i_flags_lock provides the barrier against lookup 125 * races. 126 */ 127 spin_lock(&ip->i_flags_lock); 128 ip->i_flags = XFS_IRECLAIM; 129 ip->i_ino = 0; 130 spin_unlock(&ip->i_flags_lock); 131 132 /* asserts to verify all state is correct here */ 133 ASSERT(atomic_read(&ip->i_pincount) == 0); 134 ASSERT(!xfs_isiflocked(ip)); 135 XFS_STATS_DEC(ip->i_mount, vn_active); 136 137 call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); 138 } 139 140 /* 141 * When we recycle a reclaimable inode, we need to re-initialise the VFS inode 142 * part of the structure. This is made more complex by the fact we store 143 * information about the on-disk values in the VFS inode and so we can't just 144 * overwrite the values unconditionally. Hence we save the parameters we 145 * need to retain across reinitialisation, and rewrite them into the VFS inode 146 * after reinitialisation even if it fails. 147 */ 148 static int 149 xfs_reinit_inode( 150 struct xfs_mount *mp, 151 struct inode *inode) 152 { 153 int error; 154 uint32_t nlink = inode->i_nlink; 155 uint32_t generation = inode->i_generation; 156 uint64_t version = inode->i_version; 157 umode_t mode = inode->i_mode; 158 159 error = inode_init_always(mp->m_super, inode); 160 161 set_nlink(inode, nlink); 162 inode->i_generation = generation; 163 inode->i_version = version; 164 inode->i_mode = mode; 165 return error; 166 } 167 168 /* 169 * Check the validity of the inode we just found it the cache 170 */ 171 static int 172 xfs_iget_cache_hit( 173 struct xfs_perag *pag, 174 struct xfs_inode *ip, 175 xfs_ino_t ino, 176 int flags, 177 int lock_flags) __releases(RCU) 178 { 179 struct inode *inode = VFS_I(ip); 180 struct xfs_mount *mp = ip->i_mount; 181 int error; 182 183 /* 184 * check for re-use of an inode within an RCU grace period due to the 185 * radix tree nodes not being updated yet. We monitor for this by 186 * setting the inode number to zero before freeing the inode structure. 187 * If the inode has been reallocated and set up, then the inode number 188 * will not match, so check for that, too. 189 */ 190 spin_lock(&ip->i_flags_lock); 191 if (ip->i_ino != ino) { 192 trace_xfs_iget_skip(ip); 193 XFS_STATS_INC(mp, xs_ig_frecycle); 194 error = -EAGAIN; 195 goto out_error; 196 } 197 198 199 /* 200 * If we are racing with another cache hit that is currently 201 * instantiating this inode or currently recycling it out of 202 * reclaimabe state, wait for the initialisation to complete 203 * before continuing. 204 * 205 * XXX(hch): eventually we should do something equivalent to 206 * wait_on_inode to wait for these flags to be cleared 207 * instead of polling for it. 208 */ 209 if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { 210 trace_xfs_iget_skip(ip); 211 XFS_STATS_INC(mp, xs_ig_frecycle); 212 error = -EAGAIN; 213 goto out_error; 214 } 215 216 /* 217 * If lookup is racing with unlink return an error immediately. 218 */ 219 if (VFS_I(ip)->i_mode == 0 && !(flags & XFS_IGET_CREATE)) { 220 error = -ENOENT; 221 goto out_error; 222 } 223 224 /* 225 * If IRECLAIMABLE is set, we've torn down the VFS inode already. 226 * Need to carefully get it back into useable state. 227 */ 228 if (ip->i_flags & XFS_IRECLAIMABLE) { 229 trace_xfs_iget_reclaim(ip); 230 231 /* 232 * We need to set XFS_IRECLAIM to prevent xfs_reclaim_inode 233 * from stomping over us while we recycle the inode. We can't 234 * clear the radix tree reclaimable tag yet as it requires 235 * pag_ici_lock to be held exclusive. 236 */ 237 ip->i_flags |= XFS_IRECLAIM; 238 239 spin_unlock(&ip->i_flags_lock); 240 rcu_read_unlock(); 241 242 error = xfs_reinit_inode(mp, inode); 243 if (error) { 244 /* 245 * Re-initializing the inode failed, and we are in deep 246 * trouble. Try to re-add it to the reclaim list. 247 */ 248 rcu_read_lock(); 249 spin_lock(&ip->i_flags_lock); 250 251 ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM); 252 ASSERT(ip->i_flags & XFS_IRECLAIMABLE); 253 trace_xfs_iget_reclaim_fail(ip); 254 goto out_error; 255 } 256 257 spin_lock(&pag->pag_ici_lock); 258 spin_lock(&ip->i_flags_lock); 259 260 /* 261 * Clear the per-lifetime state in the inode as we are now 262 * effectively a new inode and need to return to the initial 263 * state before reuse occurs. 264 */ 265 ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS; 266 ip->i_flags |= XFS_INEW; 267 __xfs_inode_clear_reclaim_tag(mp, pag, ip); 268 inode->i_state = I_NEW; 269 270 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); 271 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 272 273 spin_unlock(&ip->i_flags_lock); 274 spin_unlock(&pag->pag_ici_lock); 275 } else { 276 /* If the VFS inode is being torn down, pause and try again. */ 277 if (!igrab(inode)) { 278 trace_xfs_iget_skip(ip); 279 error = -EAGAIN; 280 goto out_error; 281 } 282 283 /* We've got a live one. */ 284 spin_unlock(&ip->i_flags_lock); 285 rcu_read_unlock(); 286 trace_xfs_iget_hit(ip); 287 } 288 289 if (lock_flags != 0) 290 xfs_ilock(ip, lock_flags); 291 292 xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE); 293 XFS_STATS_INC(mp, xs_ig_found); 294 295 return 0; 296 297 out_error: 298 spin_unlock(&ip->i_flags_lock); 299 rcu_read_unlock(); 300 return error; 301 } 302 303 304 static int 305 xfs_iget_cache_miss( 306 struct xfs_mount *mp, 307 struct xfs_perag *pag, 308 xfs_trans_t *tp, 309 xfs_ino_t ino, 310 struct xfs_inode **ipp, 311 int flags, 312 int lock_flags) 313 { 314 struct xfs_inode *ip; 315 int error; 316 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino); 317 int iflags; 318 319 ip = xfs_inode_alloc(mp, ino); 320 if (!ip) 321 return -ENOMEM; 322 323 error = xfs_iread(mp, tp, ip, flags); 324 if (error) 325 goto out_destroy; 326 327 trace_xfs_iget_miss(ip); 328 329 if ((VFS_I(ip)->i_mode == 0) && !(flags & XFS_IGET_CREATE)) { 330 error = -ENOENT; 331 goto out_destroy; 332 } 333 334 /* 335 * Preload the radix tree so we can insert safely under the 336 * write spinlock. Note that we cannot sleep inside the preload 337 * region. Since we can be called from transaction context, don't 338 * recurse into the file system. 339 */ 340 if (radix_tree_preload(GFP_NOFS)) { 341 error = -EAGAIN; 342 goto out_destroy; 343 } 344 345 /* 346 * Because the inode hasn't been added to the radix-tree yet it can't 347 * be found by another thread, so we can do the non-sleeping lock here. 348 */ 349 if (lock_flags) { 350 if (!xfs_ilock_nowait(ip, lock_flags)) 351 BUG(); 352 } 353 354 /* 355 * These values must be set before inserting the inode into the radix 356 * tree as the moment it is inserted a concurrent lookup (allowed by the 357 * RCU locking mechanism) can find it and that lookup must see that this 358 * is an inode currently under construction (i.e. that XFS_INEW is set). 359 * The ip->i_flags_lock that protects the XFS_INEW flag forms the 360 * memory barrier that ensures this detection works correctly at lookup 361 * time. 362 */ 363 iflags = XFS_INEW; 364 if (flags & XFS_IGET_DONTCACHE) 365 iflags |= XFS_IDONTCACHE; 366 ip->i_udquot = NULL; 367 ip->i_gdquot = NULL; 368 ip->i_pdquot = NULL; 369 xfs_iflags_set(ip, iflags); 370 371 /* insert the new inode */ 372 spin_lock(&pag->pag_ici_lock); 373 error = radix_tree_insert(&pag->pag_ici_root, agino, ip); 374 if (unlikely(error)) { 375 WARN_ON(error != -EEXIST); 376 XFS_STATS_INC(mp, xs_ig_dup); 377 error = -EAGAIN; 378 goto out_preload_end; 379 } 380 spin_unlock(&pag->pag_ici_lock); 381 radix_tree_preload_end(); 382 383 *ipp = ip; 384 return 0; 385 386 out_preload_end: 387 spin_unlock(&pag->pag_ici_lock); 388 radix_tree_preload_end(); 389 if (lock_flags) 390 xfs_iunlock(ip, lock_flags); 391 out_destroy: 392 __destroy_inode(VFS_I(ip)); 393 xfs_inode_free(ip); 394 return error; 395 } 396 397 /* 398 * Look up an inode by number in the given file system. 399 * The inode is looked up in the cache held in each AG. 400 * If the inode is found in the cache, initialise the vfs inode 401 * if necessary. 402 * 403 * If it is not in core, read it in from the file system's device, 404 * add it to the cache and initialise the vfs inode. 405 * 406 * The inode is locked according to the value of the lock_flags parameter. 407 * This flag parameter indicates how and if the inode's IO lock and inode lock 408 * should be taken. 409 * 410 * mp -- the mount point structure for the current file system. It points 411 * to the inode hash table. 412 * tp -- a pointer to the current transaction if there is one. This is 413 * simply passed through to the xfs_iread() call. 414 * ino -- the number of the inode desired. This is the unique identifier 415 * within the file system for the inode being requested. 416 * lock_flags -- flags indicating how to lock the inode. See the comment 417 * for xfs_ilock() for a list of valid values. 418 */ 419 int 420 xfs_iget( 421 xfs_mount_t *mp, 422 xfs_trans_t *tp, 423 xfs_ino_t ino, 424 uint flags, 425 uint lock_flags, 426 xfs_inode_t **ipp) 427 { 428 xfs_inode_t *ip; 429 int error; 430 xfs_perag_t *pag; 431 xfs_agino_t agino; 432 433 /* 434 * xfs_reclaim_inode() uses the ILOCK to ensure an inode 435 * doesn't get freed while it's being referenced during a 436 * radix tree traversal here. It assumes this function 437 * aqcuires only the ILOCK (and therefore it has no need to 438 * involve the IOLOCK in this synchronization). 439 */ 440 ASSERT((lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) == 0); 441 442 /* reject inode numbers outside existing AGs */ 443 if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) 444 return -EINVAL; 445 446 XFS_STATS_INC(mp, xs_ig_attempts); 447 448 /* get the perag structure and ensure that it's inode capable */ 449 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); 450 agino = XFS_INO_TO_AGINO(mp, ino); 451 452 again: 453 error = 0; 454 rcu_read_lock(); 455 ip = radix_tree_lookup(&pag->pag_ici_root, agino); 456 457 if (ip) { 458 error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags); 459 if (error) 460 goto out_error_or_again; 461 } else { 462 rcu_read_unlock(); 463 XFS_STATS_INC(mp, xs_ig_missed); 464 465 error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, 466 flags, lock_flags); 467 if (error) 468 goto out_error_or_again; 469 } 470 xfs_perag_put(pag); 471 472 *ipp = ip; 473 474 /* 475 * If we have a real type for an on-disk inode, we can setup the inode 476 * now. If it's a new inode being created, xfs_ialloc will handle it. 477 */ 478 if (xfs_iflags_test(ip, XFS_INEW) && VFS_I(ip)->i_mode != 0) 479 xfs_setup_existing_inode(ip); 480 return 0; 481 482 out_error_or_again: 483 if (error == -EAGAIN) { 484 delay(1); 485 goto again; 486 } 487 xfs_perag_put(pag); 488 return error; 489 } 490 491 /* 492 * The inode lookup is done in batches to keep the amount of lock traffic and 493 * radix tree lookups to a minimum. The batch size is a trade off between 494 * lookup reduction and stack usage. This is in the reclaim path, so we can't 495 * be too greedy. 496 */ 497 #define XFS_LOOKUP_BATCH 32 498 499 STATIC int 500 xfs_inode_ag_walk_grab( 501 struct xfs_inode *ip) 502 { 503 struct inode *inode = VFS_I(ip); 504 505 ASSERT(rcu_read_lock_held()); 506 507 /* 508 * check for stale RCU freed inode 509 * 510 * If the inode has been reallocated, it doesn't matter if it's not in 511 * the AG we are walking - we are walking for writeback, so if it 512 * passes all the "valid inode" checks and is dirty, then we'll write 513 * it back anyway. If it has been reallocated and still being 514 * initialised, the XFS_INEW check below will catch it. 515 */ 516 spin_lock(&ip->i_flags_lock); 517 if (!ip->i_ino) 518 goto out_unlock_noent; 519 520 /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ 521 if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) 522 goto out_unlock_noent; 523 spin_unlock(&ip->i_flags_lock); 524 525 /* nothing to sync during shutdown */ 526 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 527 return -EFSCORRUPTED; 528 529 /* If we can't grab the inode, it must on it's way to reclaim. */ 530 if (!igrab(inode)) 531 return -ENOENT; 532 533 /* inode is valid */ 534 return 0; 535 536 out_unlock_noent: 537 spin_unlock(&ip->i_flags_lock); 538 return -ENOENT; 539 } 540 541 STATIC int 542 xfs_inode_ag_walk( 543 struct xfs_mount *mp, 544 struct xfs_perag *pag, 545 int (*execute)(struct xfs_inode *ip, int flags, 546 void *args), 547 int flags, 548 void *args, 549 int tag) 550 { 551 uint32_t first_index; 552 int last_error = 0; 553 int skipped; 554 int done; 555 int nr_found; 556 557 restart: 558 done = 0; 559 skipped = 0; 560 first_index = 0; 561 nr_found = 0; 562 do { 563 struct xfs_inode *batch[XFS_LOOKUP_BATCH]; 564 int error = 0; 565 int i; 566 567 rcu_read_lock(); 568 569 if (tag == -1) 570 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, 571 (void **)batch, first_index, 572 XFS_LOOKUP_BATCH); 573 else 574 nr_found = radix_tree_gang_lookup_tag( 575 &pag->pag_ici_root, 576 (void **) batch, first_index, 577 XFS_LOOKUP_BATCH, tag); 578 579 if (!nr_found) { 580 rcu_read_unlock(); 581 break; 582 } 583 584 /* 585 * Grab the inodes before we drop the lock. if we found 586 * nothing, nr == 0 and the loop will be skipped. 587 */ 588 for (i = 0; i < nr_found; i++) { 589 struct xfs_inode *ip = batch[i]; 590 591 if (done || xfs_inode_ag_walk_grab(ip)) 592 batch[i] = NULL; 593 594 /* 595 * Update the index for the next lookup. Catch 596 * overflows into the next AG range which can occur if 597 * we have inodes in the last block of the AG and we 598 * are currently pointing to the last inode. 599 * 600 * Because we may see inodes that are from the wrong AG 601 * due to RCU freeing and reallocation, only update the 602 * index if it lies in this AG. It was a race that lead 603 * us to see this inode, so another lookup from the 604 * same index will not find it again. 605 */ 606 if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno) 607 continue; 608 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 609 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 610 done = 1; 611 } 612 613 /* unlock now we've grabbed the inodes. */ 614 rcu_read_unlock(); 615 616 for (i = 0; i < nr_found; i++) { 617 if (!batch[i]) 618 continue; 619 error = execute(batch[i], flags, args); 620 IRELE(batch[i]); 621 if (error == -EAGAIN) { 622 skipped++; 623 continue; 624 } 625 if (error && last_error != -EFSCORRUPTED) 626 last_error = error; 627 } 628 629 /* bail out if the filesystem is corrupted. */ 630 if (error == -EFSCORRUPTED) 631 break; 632 633 cond_resched(); 634 635 } while (nr_found && !done); 636 637 if (skipped) { 638 delay(1); 639 goto restart; 640 } 641 return last_error; 642 } 643 644 /* 645 * Background scanning to trim post-EOF preallocated space. This is queued 646 * based on the 'speculative_prealloc_lifetime' tunable (5m by default). 647 */ 648 STATIC void 649 xfs_queue_eofblocks( 650 struct xfs_mount *mp) 651 { 652 rcu_read_lock(); 653 if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_EOFBLOCKS_TAG)) 654 queue_delayed_work(mp->m_eofblocks_workqueue, 655 &mp->m_eofblocks_work, 656 msecs_to_jiffies(xfs_eofb_secs * 1000)); 657 rcu_read_unlock(); 658 } 659 660 void 661 xfs_eofblocks_worker( 662 struct work_struct *work) 663 { 664 struct xfs_mount *mp = container_of(to_delayed_work(work), 665 struct xfs_mount, m_eofblocks_work); 666 xfs_icache_free_eofblocks(mp, NULL); 667 xfs_queue_eofblocks(mp); 668 } 669 670 int 671 xfs_inode_ag_iterator( 672 struct xfs_mount *mp, 673 int (*execute)(struct xfs_inode *ip, int flags, 674 void *args), 675 int flags, 676 void *args) 677 { 678 struct xfs_perag *pag; 679 int error = 0; 680 int last_error = 0; 681 xfs_agnumber_t ag; 682 683 ag = 0; 684 while ((pag = xfs_perag_get(mp, ag))) { 685 ag = pag->pag_agno + 1; 686 error = xfs_inode_ag_walk(mp, pag, execute, flags, args, -1); 687 xfs_perag_put(pag); 688 if (error) { 689 last_error = error; 690 if (error == -EFSCORRUPTED) 691 break; 692 } 693 } 694 return last_error; 695 } 696 697 int 698 xfs_inode_ag_iterator_tag( 699 struct xfs_mount *mp, 700 int (*execute)(struct xfs_inode *ip, int flags, 701 void *args), 702 int flags, 703 void *args, 704 int tag) 705 { 706 struct xfs_perag *pag; 707 int error = 0; 708 int last_error = 0; 709 xfs_agnumber_t ag; 710 711 ag = 0; 712 while ((pag = xfs_perag_get_tag(mp, ag, tag))) { 713 ag = pag->pag_agno + 1; 714 error = xfs_inode_ag_walk(mp, pag, execute, flags, args, tag); 715 xfs_perag_put(pag); 716 if (error) { 717 last_error = error; 718 if (error == -EFSCORRUPTED) 719 break; 720 } 721 } 722 return last_error; 723 } 724 725 /* 726 * Queue a new inode reclaim pass if there are reclaimable inodes and there 727 * isn't a reclaim pass already in progress. By default it runs every 5s based 728 * on the xfs periodic sync default of 30s. Perhaps this should have it's own 729 * tunable, but that can be done if this method proves to be ineffective or too 730 * aggressive. 731 */ 732 static void 733 xfs_reclaim_work_queue( 734 struct xfs_mount *mp) 735 { 736 737 rcu_read_lock(); 738 if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { 739 queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work, 740 msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); 741 } 742 rcu_read_unlock(); 743 } 744 745 /* 746 * This is a fast pass over the inode cache to try to get reclaim moving on as 747 * many inodes as possible in a short period of time. It kicks itself every few 748 * seconds, as well as being kicked by the inode cache shrinker when memory 749 * goes low. It scans as quickly as possible avoiding locked inodes or those 750 * already being flushed, and once done schedules a future pass. 751 */ 752 void 753 xfs_reclaim_worker( 754 struct work_struct *work) 755 { 756 struct xfs_mount *mp = container_of(to_delayed_work(work), 757 struct xfs_mount, m_reclaim_work); 758 759 xfs_reclaim_inodes(mp, SYNC_TRYLOCK); 760 xfs_reclaim_work_queue(mp); 761 } 762 763 static void 764 __xfs_inode_set_reclaim_tag( 765 struct xfs_perag *pag, 766 struct xfs_inode *ip) 767 { 768 radix_tree_tag_set(&pag->pag_ici_root, 769 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), 770 XFS_ICI_RECLAIM_TAG); 771 772 if (!pag->pag_ici_reclaimable) { 773 /* propagate the reclaim tag up into the perag radix tree */ 774 spin_lock(&ip->i_mount->m_perag_lock); 775 radix_tree_tag_set(&ip->i_mount->m_perag_tree, 776 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), 777 XFS_ICI_RECLAIM_TAG); 778 spin_unlock(&ip->i_mount->m_perag_lock); 779 780 /* schedule periodic background inode reclaim */ 781 xfs_reclaim_work_queue(ip->i_mount); 782 783 trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, 784 -1, _RET_IP_); 785 } 786 pag->pag_ici_reclaimable++; 787 } 788 789 /* 790 * We set the inode flag atomically with the radix tree tag. 791 * Once we get tag lookups on the radix tree, this inode flag 792 * can go away. 793 */ 794 void 795 xfs_inode_set_reclaim_tag( 796 xfs_inode_t *ip) 797 { 798 struct xfs_mount *mp = ip->i_mount; 799 struct xfs_perag *pag; 800 801 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 802 spin_lock(&pag->pag_ici_lock); 803 spin_lock(&ip->i_flags_lock); 804 __xfs_inode_set_reclaim_tag(pag, ip); 805 __xfs_iflags_set(ip, XFS_IRECLAIMABLE); 806 spin_unlock(&ip->i_flags_lock); 807 spin_unlock(&pag->pag_ici_lock); 808 xfs_perag_put(pag); 809 } 810 811 STATIC void 812 __xfs_inode_clear_reclaim( 813 xfs_perag_t *pag, 814 xfs_inode_t *ip) 815 { 816 pag->pag_ici_reclaimable--; 817 if (!pag->pag_ici_reclaimable) { 818 /* clear the reclaim tag from the perag radix tree */ 819 spin_lock(&ip->i_mount->m_perag_lock); 820 radix_tree_tag_clear(&ip->i_mount->m_perag_tree, 821 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), 822 XFS_ICI_RECLAIM_TAG); 823 spin_unlock(&ip->i_mount->m_perag_lock); 824 trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno, 825 -1, _RET_IP_); 826 } 827 } 828 829 STATIC void 830 __xfs_inode_clear_reclaim_tag( 831 xfs_mount_t *mp, 832 xfs_perag_t *pag, 833 xfs_inode_t *ip) 834 { 835 radix_tree_tag_clear(&pag->pag_ici_root, 836 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); 837 __xfs_inode_clear_reclaim(pag, ip); 838 } 839 840 /* 841 * Grab the inode for reclaim exclusively. 842 * Return 0 if we grabbed it, non-zero otherwise. 843 */ 844 STATIC int 845 xfs_reclaim_inode_grab( 846 struct xfs_inode *ip, 847 int flags) 848 { 849 ASSERT(rcu_read_lock_held()); 850 851 /* quick check for stale RCU freed inode */ 852 if (!ip->i_ino) 853 return 1; 854 855 /* 856 * If we are asked for non-blocking operation, do unlocked checks to 857 * see if the inode already is being flushed or in reclaim to avoid 858 * lock traffic. 859 */ 860 if ((flags & SYNC_TRYLOCK) && 861 __xfs_iflags_test(ip, XFS_IFLOCK | XFS_IRECLAIM)) 862 return 1; 863 864 /* 865 * The radix tree lock here protects a thread in xfs_iget from racing 866 * with us starting reclaim on the inode. Once we have the 867 * XFS_IRECLAIM flag set it will not touch us. 868 * 869 * Due to RCU lookup, we may find inodes that have been freed and only 870 * have XFS_IRECLAIM set. Indeed, we may see reallocated inodes that 871 * aren't candidates for reclaim at all, so we must check the 872 * XFS_IRECLAIMABLE is set first before proceeding to reclaim. 873 */ 874 spin_lock(&ip->i_flags_lock); 875 if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) || 876 __xfs_iflags_test(ip, XFS_IRECLAIM)) { 877 /* not a reclaim candidate. */ 878 spin_unlock(&ip->i_flags_lock); 879 return 1; 880 } 881 __xfs_iflags_set(ip, XFS_IRECLAIM); 882 spin_unlock(&ip->i_flags_lock); 883 return 0; 884 } 885 886 /* 887 * Inodes in different states need to be treated differently. The following 888 * table lists the inode states and the reclaim actions necessary: 889 * 890 * inode state iflush ret required action 891 * --------------- ---------- --------------- 892 * bad - reclaim 893 * shutdown EIO unpin and reclaim 894 * clean, unpinned 0 reclaim 895 * stale, unpinned 0 reclaim 896 * clean, pinned(*) 0 requeue 897 * stale, pinned EAGAIN requeue 898 * dirty, async - requeue 899 * dirty, sync 0 reclaim 900 * 901 * (*) dgc: I don't think the clean, pinned state is possible but it gets 902 * handled anyway given the order of checks implemented. 903 * 904 * Also, because we get the flush lock first, we know that any inode that has 905 * been flushed delwri has had the flush completed by the time we check that 906 * the inode is clean. 907 * 908 * Note that because the inode is flushed delayed write by AIL pushing, the 909 * flush lock may already be held here and waiting on it can result in very 910 * long latencies. Hence for sync reclaims, where we wait on the flush lock, 911 * the caller should push the AIL first before trying to reclaim inodes to 912 * minimise the amount of time spent waiting. For background relaim, we only 913 * bother to reclaim clean inodes anyway. 914 * 915 * Hence the order of actions after gaining the locks should be: 916 * bad => reclaim 917 * shutdown => unpin and reclaim 918 * pinned, async => requeue 919 * pinned, sync => unpin 920 * stale => reclaim 921 * clean => reclaim 922 * dirty, async => requeue 923 * dirty, sync => flush, wait and reclaim 924 */ 925 STATIC int 926 xfs_reclaim_inode( 927 struct xfs_inode *ip, 928 struct xfs_perag *pag, 929 int sync_mode) 930 { 931 struct xfs_buf *bp = NULL; 932 int error; 933 934 restart: 935 error = 0; 936 xfs_ilock(ip, XFS_ILOCK_EXCL); 937 if (!xfs_iflock_nowait(ip)) { 938 if (!(sync_mode & SYNC_WAIT)) 939 goto out; 940 xfs_iflock(ip); 941 } 942 943 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 944 xfs_iunpin_wait(ip); 945 xfs_iflush_abort(ip, false); 946 goto reclaim; 947 } 948 if (xfs_ipincount(ip)) { 949 if (!(sync_mode & SYNC_WAIT)) 950 goto out_ifunlock; 951 xfs_iunpin_wait(ip); 952 } 953 if (xfs_iflags_test(ip, XFS_ISTALE)) 954 goto reclaim; 955 if (xfs_inode_clean(ip)) 956 goto reclaim; 957 958 /* 959 * Never flush out dirty data during non-blocking reclaim, as it would 960 * just contend with AIL pushing trying to do the same job. 961 */ 962 if (!(sync_mode & SYNC_WAIT)) 963 goto out_ifunlock; 964 965 /* 966 * Now we have an inode that needs flushing. 967 * 968 * Note that xfs_iflush will never block on the inode buffer lock, as 969 * xfs_ifree_cluster() can lock the inode buffer before it locks the 970 * ip->i_lock, and we are doing the exact opposite here. As a result, 971 * doing a blocking xfs_imap_to_bp() to get the cluster buffer would 972 * result in an ABBA deadlock with xfs_ifree_cluster(). 973 * 974 * As xfs_ifree_cluser() must gather all inodes that are active in the 975 * cache to mark them stale, if we hit this case we don't actually want 976 * to do IO here - we want the inode marked stale so we can simply 977 * reclaim it. Hence if we get an EAGAIN error here, just unlock the 978 * inode, back off and try again. Hopefully the next pass through will 979 * see the stale flag set on the inode. 980 */ 981 error = xfs_iflush(ip, &bp); 982 if (error == -EAGAIN) { 983 xfs_iunlock(ip, XFS_ILOCK_EXCL); 984 /* backoff longer than in xfs_ifree_cluster */ 985 delay(2); 986 goto restart; 987 } 988 989 if (!error) { 990 error = xfs_bwrite(bp); 991 xfs_buf_relse(bp); 992 } 993 994 xfs_iflock(ip); 995 reclaim: 996 xfs_ifunlock(ip); 997 xfs_iunlock(ip, XFS_ILOCK_EXCL); 998 999 XFS_STATS_INC(ip->i_mount, xs_ig_reclaims); 1000 /* 1001 * Remove the inode from the per-AG radix tree. 1002 * 1003 * Because radix_tree_delete won't complain even if the item was never 1004 * added to the tree assert that it's been there before to catch 1005 * problems with the inode life time early on. 1006 */ 1007 spin_lock(&pag->pag_ici_lock); 1008 if (!radix_tree_delete(&pag->pag_ici_root, 1009 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) 1010 ASSERT(0); 1011 __xfs_inode_clear_reclaim(pag, ip); 1012 spin_unlock(&pag->pag_ici_lock); 1013 1014 /* 1015 * Here we do an (almost) spurious inode lock in order to coordinate 1016 * with inode cache radix tree lookups. This is because the lookup 1017 * can reference the inodes in the cache without taking references. 1018 * 1019 * We make that OK here by ensuring that we wait until the inode is 1020 * unlocked after the lookup before we go ahead and free it. 1021 */ 1022 xfs_ilock(ip, XFS_ILOCK_EXCL); 1023 xfs_qm_dqdetach(ip); 1024 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1025 1026 xfs_inode_free(ip); 1027 return error; 1028 1029 out_ifunlock: 1030 xfs_ifunlock(ip); 1031 out: 1032 xfs_iflags_clear(ip, XFS_IRECLAIM); 1033 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1034 /* 1035 * We could return -EAGAIN here to make reclaim rescan the inode tree in 1036 * a short while. However, this just burns CPU time scanning the tree 1037 * waiting for IO to complete and the reclaim work never goes back to 1038 * the idle state. Instead, return 0 to let the next scheduled 1039 * background reclaim attempt to reclaim the inode again. 1040 */ 1041 return 0; 1042 } 1043 1044 /* 1045 * Walk the AGs and reclaim the inodes in them. Even if the filesystem is 1046 * corrupted, we still want to try to reclaim all the inodes. If we don't, 1047 * then a shut down during filesystem unmount reclaim walk leak all the 1048 * unreclaimed inodes. 1049 */ 1050 STATIC int 1051 xfs_reclaim_inodes_ag( 1052 struct xfs_mount *mp, 1053 int flags, 1054 int *nr_to_scan) 1055 { 1056 struct xfs_perag *pag; 1057 int error = 0; 1058 int last_error = 0; 1059 xfs_agnumber_t ag; 1060 int trylock = flags & SYNC_TRYLOCK; 1061 int skipped; 1062 1063 restart: 1064 ag = 0; 1065 skipped = 0; 1066 while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { 1067 unsigned long first_index = 0; 1068 int done = 0; 1069 int nr_found = 0; 1070 1071 ag = pag->pag_agno + 1; 1072 1073 if (trylock) { 1074 if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) { 1075 skipped++; 1076 xfs_perag_put(pag); 1077 continue; 1078 } 1079 first_index = pag->pag_ici_reclaim_cursor; 1080 } else 1081 mutex_lock(&pag->pag_ici_reclaim_lock); 1082 1083 do { 1084 struct xfs_inode *batch[XFS_LOOKUP_BATCH]; 1085 int i; 1086 1087 rcu_read_lock(); 1088 nr_found = radix_tree_gang_lookup_tag( 1089 &pag->pag_ici_root, 1090 (void **)batch, first_index, 1091 XFS_LOOKUP_BATCH, 1092 XFS_ICI_RECLAIM_TAG); 1093 if (!nr_found) { 1094 done = 1; 1095 rcu_read_unlock(); 1096 break; 1097 } 1098 1099 /* 1100 * Grab the inodes before we drop the lock. if we found 1101 * nothing, nr == 0 and the loop will be skipped. 1102 */ 1103 for (i = 0; i < nr_found; i++) { 1104 struct xfs_inode *ip = batch[i]; 1105 1106 if (done || xfs_reclaim_inode_grab(ip, flags)) 1107 batch[i] = NULL; 1108 1109 /* 1110 * Update the index for the next lookup. Catch 1111 * overflows into the next AG range which can 1112 * occur if we have inodes in the last block of 1113 * the AG and we are currently pointing to the 1114 * last inode. 1115 * 1116 * Because we may see inodes that are from the 1117 * wrong AG due to RCU freeing and 1118 * reallocation, only update the index if it 1119 * lies in this AG. It was a race that lead us 1120 * to see this inode, so another lookup from 1121 * the same index will not find it again. 1122 */ 1123 if (XFS_INO_TO_AGNO(mp, ip->i_ino) != 1124 pag->pag_agno) 1125 continue; 1126 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 1127 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 1128 done = 1; 1129 } 1130 1131 /* unlock now we've grabbed the inodes. */ 1132 rcu_read_unlock(); 1133 1134 for (i = 0; i < nr_found; i++) { 1135 if (!batch[i]) 1136 continue; 1137 error = xfs_reclaim_inode(batch[i], pag, flags); 1138 if (error && last_error != -EFSCORRUPTED) 1139 last_error = error; 1140 } 1141 1142 *nr_to_scan -= XFS_LOOKUP_BATCH; 1143 1144 cond_resched(); 1145 1146 } while (nr_found && !done && *nr_to_scan > 0); 1147 1148 if (trylock && !done) 1149 pag->pag_ici_reclaim_cursor = first_index; 1150 else 1151 pag->pag_ici_reclaim_cursor = 0; 1152 mutex_unlock(&pag->pag_ici_reclaim_lock); 1153 xfs_perag_put(pag); 1154 } 1155 1156 /* 1157 * if we skipped any AG, and we still have scan count remaining, do 1158 * another pass this time using blocking reclaim semantics (i.e 1159 * waiting on the reclaim locks and ignoring the reclaim cursors). This 1160 * ensure that when we get more reclaimers than AGs we block rather 1161 * than spin trying to execute reclaim. 1162 */ 1163 if (skipped && (flags & SYNC_WAIT) && *nr_to_scan > 0) { 1164 trylock = 0; 1165 goto restart; 1166 } 1167 return last_error; 1168 } 1169 1170 int 1171 xfs_reclaim_inodes( 1172 xfs_mount_t *mp, 1173 int mode) 1174 { 1175 int nr_to_scan = INT_MAX; 1176 1177 return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan); 1178 } 1179 1180 /* 1181 * Scan a certain number of inodes for reclaim. 1182 * 1183 * When called we make sure that there is a background (fast) inode reclaim in 1184 * progress, while we will throttle the speed of reclaim via doing synchronous 1185 * reclaim of inodes. That means if we come across dirty inodes, we wait for 1186 * them to be cleaned, which we hope will not be very long due to the 1187 * background walker having already kicked the IO off on those dirty inodes. 1188 */ 1189 long 1190 xfs_reclaim_inodes_nr( 1191 struct xfs_mount *mp, 1192 int nr_to_scan) 1193 { 1194 /* kick background reclaimer and push the AIL */ 1195 xfs_reclaim_work_queue(mp); 1196 xfs_ail_push_all(mp->m_ail); 1197 1198 return xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan); 1199 } 1200 1201 /* 1202 * Return the number of reclaimable inodes in the filesystem for 1203 * the shrinker to determine how much to reclaim. 1204 */ 1205 int 1206 xfs_reclaim_inodes_count( 1207 struct xfs_mount *mp) 1208 { 1209 struct xfs_perag *pag; 1210 xfs_agnumber_t ag = 0; 1211 int reclaimable = 0; 1212 1213 while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { 1214 ag = pag->pag_agno + 1; 1215 reclaimable += pag->pag_ici_reclaimable; 1216 xfs_perag_put(pag); 1217 } 1218 return reclaimable; 1219 } 1220 1221 STATIC int 1222 xfs_inode_match_id( 1223 struct xfs_inode *ip, 1224 struct xfs_eofblocks *eofb) 1225 { 1226 if ((eofb->eof_flags & XFS_EOF_FLAGS_UID) && 1227 !uid_eq(VFS_I(ip)->i_uid, eofb->eof_uid)) 1228 return 0; 1229 1230 if ((eofb->eof_flags & XFS_EOF_FLAGS_GID) && 1231 !gid_eq(VFS_I(ip)->i_gid, eofb->eof_gid)) 1232 return 0; 1233 1234 if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) && 1235 xfs_get_projid(ip) != eofb->eof_prid) 1236 return 0; 1237 1238 return 1; 1239 } 1240 1241 /* 1242 * A union-based inode filtering algorithm. Process the inode if any of the 1243 * criteria match. This is for global/internal scans only. 1244 */ 1245 STATIC int 1246 xfs_inode_match_id_union( 1247 struct xfs_inode *ip, 1248 struct xfs_eofblocks *eofb) 1249 { 1250 if ((eofb->eof_flags & XFS_EOF_FLAGS_UID) && 1251 uid_eq(VFS_I(ip)->i_uid, eofb->eof_uid)) 1252 return 1; 1253 1254 if ((eofb->eof_flags & XFS_EOF_FLAGS_GID) && 1255 gid_eq(VFS_I(ip)->i_gid, eofb->eof_gid)) 1256 return 1; 1257 1258 if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) && 1259 xfs_get_projid(ip) == eofb->eof_prid) 1260 return 1; 1261 1262 return 0; 1263 } 1264 1265 STATIC int 1266 xfs_inode_free_eofblocks( 1267 struct xfs_inode *ip, 1268 int flags, 1269 void *args) 1270 { 1271 int ret; 1272 struct xfs_eofblocks *eofb = args; 1273 bool need_iolock = true; 1274 int match; 1275 1276 ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0)); 1277 1278 if (!xfs_can_free_eofblocks(ip, false)) { 1279 /* inode could be preallocated or append-only */ 1280 trace_xfs_inode_free_eofblocks_invalid(ip); 1281 xfs_inode_clear_eofblocks_tag(ip); 1282 return 0; 1283 } 1284 1285 /* 1286 * If the mapping is dirty the operation can block and wait for some 1287 * time. Unless we are waiting, skip it. 1288 */ 1289 if (!(flags & SYNC_WAIT) && 1290 mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY)) 1291 return 0; 1292 1293 if (eofb) { 1294 if (eofb->eof_flags & XFS_EOF_FLAGS_UNION) 1295 match = xfs_inode_match_id_union(ip, eofb); 1296 else 1297 match = xfs_inode_match_id(ip, eofb); 1298 if (!match) 1299 return 0; 1300 1301 /* skip the inode if the file size is too small */ 1302 if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && 1303 XFS_ISIZE(ip) < eofb->eof_min_file_size) 1304 return 0; 1305 1306 /* 1307 * A scan owner implies we already hold the iolock. Skip it in 1308 * xfs_free_eofblocks() to avoid deadlock. This also eliminates 1309 * the possibility of EAGAIN being returned. 1310 */ 1311 if (eofb->eof_scan_owner == ip->i_ino) 1312 need_iolock = false; 1313 } 1314 1315 ret = xfs_free_eofblocks(ip->i_mount, ip, need_iolock); 1316 1317 /* don't revisit the inode if we're not waiting */ 1318 if (ret == -EAGAIN && !(flags & SYNC_WAIT)) 1319 ret = 0; 1320 1321 return ret; 1322 } 1323 1324 int 1325 xfs_icache_free_eofblocks( 1326 struct xfs_mount *mp, 1327 struct xfs_eofblocks *eofb) 1328 { 1329 int flags = SYNC_TRYLOCK; 1330 1331 if (eofb && (eofb->eof_flags & XFS_EOF_FLAGS_SYNC)) 1332 flags = SYNC_WAIT; 1333 1334 return xfs_inode_ag_iterator_tag(mp, xfs_inode_free_eofblocks, flags, 1335 eofb, XFS_ICI_EOFBLOCKS_TAG); 1336 } 1337 1338 /* 1339 * Run eofblocks scans on the quotas applicable to the inode. For inodes with 1340 * multiple quotas, we don't know exactly which quota caused an allocation 1341 * failure. We make a best effort by including each quota under low free space 1342 * conditions (less than 1% free space) in the scan. 1343 */ 1344 int 1345 xfs_inode_free_quota_eofblocks( 1346 struct xfs_inode *ip) 1347 { 1348 int scan = 0; 1349 struct xfs_eofblocks eofb = {0}; 1350 struct xfs_dquot *dq; 1351 1352 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 1353 1354 /* 1355 * Set the scan owner to avoid a potential livelock. Otherwise, the scan 1356 * can repeatedly trylock on the inode we're currently processing. We 1357 * run a sync scan to increase effectiveness and use the union filter to 1358 * cover all applicable quotas in a single scan. 1359 */ 1360 eofb.eof_scan_owner = ip->i_ino; 1361 eofb.eof_flags = XFS_EOF_FLAGS_UNION|XFS_EOF_FLAGS_SYNC; 1362 1363 if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) { 1364 dq = xfs_inode_dquot(ip, XFS_DQ_USER); 1365 if (dq && xfs_dquot_lowsp(dq)) { 1366 eofb.eof_uid = VFS_I(ip)->i_uid; 1367 eofb.eof_flags |= XFS_EOF_FLAGS_UID; 1368 scan = 1; 1369 } 1370 } 1371 1372 if (XFS_IS_GQUOTA_ENFORCED(ip->i_mount)) { 1373 dq = xfs_inode_dquot(ip, XFS_DQ_GROUP); 1374 if (dq && xfs_dquot_lowsp(dq)) { 1375 eofb.eof_gid = VFS_I(ip)->i_gid; 1376 eofb.eof_flags |= XFS_EOF_FLAGS_GID; 1377 scan = 1; 1378 } 1379 } 1380 1381 if (scan) 1382 xfs_icache_free_eofblocks(ip->i_mount, &eofb); 1383 1384 return scan; 1385 } 1386 1387 void 1388 xfs_inode_set_eofblocks_tag( 1389 xfs_inode_t *ip) 1390 { 1391 struct xfs_mount *mp = ip->i_mount; 1392 struct xfs_perag *pag; 1393 int tagged; 1394 1395 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 1396 spin_lock(&pag->pag_ici_lock); 1397 trace_xfs_inode_set_eofblocks_tag(ip); 1398 1399 tagged = radix_tree_tagged(&pag->pag_ici_root, 1400 XFS_ICI_EOFBLOCKS_TAG); 1401 radix_tree_tag_set(&pag->pag_ici_root, 1402 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), 1403 XFS_ICI_EOFBLOCKS_TAG); 1404 if (!tagged) { 1405 /* propagate the eofblocks tag up into the perag radix tree */ 1406 spin_lock(&ip->i_mount->m_perag_lock); 1407 radix_tree_tag_set(&ip->i_mount->m_perag_tree, 1408 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), 1409 XFS_ICI_EOFBLOCKS_TAG); 1410 spin_unlock(&ip->i_mount->m_perag_lock); 1411 1412 /* kick off background trimming */ 1413 xfs_queue_eofblocks(ip->i_mount); 1414 1415 trace_xfs_perag_set_eofblocks(ip->i_mount, pag->pag_agno, 1416 -1, _RET_IP_); 1417 } 1418 1419 spin_unlock(&pag->pag_ici_lock); 1420 xfs_perag_put(pag); 1421 } 1422 1423 void 1424 xfs_inode_clear_eofblocks_tag( 1425 xfs_inode_t *ip) 1426 { 1427 struct xfs_mount *mp = ip->i_mount; 1428 struct xfs_perag *pag; 1429 1430 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 1431 spin_lock(&pag->pag_ici_lock); 1432 trace_xfs_inode_clear_eofblocks_tag(ip); 1433 1434 radix_tree_tag_clear(&pag->pag_ici_root, 1435 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), 1436 XFS_ICI_EOFBLOCKS_TAG); 1437 if (!radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_EOFBLOCKS_TAG)) { 1438 /* clear the eofblocks tag from the perag radix tree */ 1439 spin_lock(&ip->i_mount->m_perag_lock); 1440 radix_tree_tag_clear(&ip->i_mount->m_perag_tree, 1441 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), 1442 XFS_ICI_EOFBLOCKS_TAG); 1443 spin_unlock(&ip->i_mount->m_perag_lock); 1444 trace_xfs_perag_clear_eofblocks(ip->i_mount, pag->pag_agno, 1445 -1, _RET_IP_); 1446 } 1447 1448 spin_unlock(&pag->pag_ici_lock); 1449 xfs_perag_put(pag); 1450 } 1451 1452