xfs_inode.c (3b5d1afd1f13bcab85eaa28223ad396694f929e3) | xfs_inode.c (b63da6c8dfa9b2ab3554e8c59ef294d1f28bb9bd) |
---|---|
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6#include <linux/iversion.h> 7 8#include "xfs.h" --- 30 unchanged lines hidden (view full) --- 39kmem_zone_t *xfs_inode_zone; 40 41/* 42 * Used in xfs_itruncate_extents(). This is the maximum number of extents 43 * freed from a file in a single transaction. 44 */ 45#define XFS_ITRUNC_MAX_EXTENTS 2 46 | 1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6#include <linux/iversion.h> 7 8#include "xfs.h" --- 30 unchanged lines hidden (view full) --- 39kmem_zone_t *xfs_inode_zone; 40 41/* 42 * Used in xfs_itruncate_extents(). This is the maximum number of extents 43 * freed from a file in a single transaction. 44 */ 45#define XFS_ITRUNC_MAX_EXTENTS 2 46 |
47STATIC int xfs_iflush_int(struct xfs_inode *, struct xfs_buf *); | |
48STATIC int xfs_iunlink(struct xfs_trans *, struct xfs_inode *); 49STATIC int xfs_iunlink_remove(struct xfs_trans *, struct xfs_inode *); 50 51/* 52 * helper function to extract extent size hint from inode 53 */ 54xfs_extlen_t 55xfs_get_extsz_hint( --- 391 unchanged lines hidden (view full) --- 447 uint lock_mode) 448{ 449 int attempts = 0, i, j, try_lock; 450 struct xfs_log_item *lp; 451 452 /* 453 * Currently supports between 2 and 5 inodes with exclusive locking. We 454 * support an arbitrary depth of locking here, but absolute limits on | 47STATIC int xfs_iunlink(struct xfs_trans *, struct xfs_inode *); 48STATIC int xfs_iunlink_remove(struct xfs_trans *, struct xfs_inode *); 49 50/* 51 * helper function to extract extent size hint from inode 52 */ 53xfs_extlen_t 54xfs_get_extsz_hint( --- 391 unchanged lines hidden (view full) --- 446 uint lock_mode) 447{ 448 int attempts = 0, i, j, try_lock; 449 struct xfs_log_item *lp; 450 451 /* 452 * Currently supports between 2 and 5 inodes with exclusive locking. We 453 * support an arbitrary depth of locking here, but absolute limits on |
455 * inodes depend on the the type of locking and the limits placed by | 454 * inodes depend on the type of locking and the limits placed by |
456 * lockdep annotations in xfs_lock_inumorder. These are all checked by 457 * the asserts. 458 */ 459 ASSERT(ips && inodes >= 2 && inodes <= 5); 460 ASSERT(lock_mode & (XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL | 461 XFS_ILOCK_EXCL)); 462 ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED | XFS_MMAPLOCK_SHARED | 463 XFS_ILOCK_SHARED))); --- 1271 unchanged lines hidden (view full) --- 1735 "Failed to remove inode(s) from unlinked list. " 1736 "Please free space, unmount and run xfs_repair."); 1737 } else { 1738 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1739 } 1740 return error; 1741 } 1742 | 455 * lockdep annotations in xfs_lock_inumorder. These are all checked by 456 * the asserts. 457 */ 458 ASSERT(ips && inodes >= 2 && inodes <= 5); 459 ASSERT(lock_mode & (XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL | 460 XFS_ILOCK_EXCL)); 461 ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED | XFS_MMAPLOCK_SHARED | 462 XFS_ILOCK_SHARED))); --- 1271 unchanged lines hidden (view full) --- 1734 "Failed to remove inode(s) from unlinked list. " 1735 "Please free space, unmount and run xfs_repair."); 1736 } else { 1737 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1738 } 1739 return error; 1740 } 1741 |
1742 /* 1743 * We do not hold the inode locked across the entire rolling transaction 1744 * here. We only need to hold it for the first transaction that 1745 * xfs_ifree() builds, which may mark the inode XFS_ISTALE if the 1746 * underlying cluster buffer is freed. Relogging an XFS_ISTALE inode 1747 * here breaks the relationship between cluster buffer invalidation and 1748 * stale inode invalidation on cluster buffer item journal commit 1749 * completion, and can result in leaving dirty stale inodes hanging 1750 * around in memory. 1751 * 1752 * We have no need for serialising this inode operation against other 1753 * operations - we freed the inode and hence reallocation is required 1754 * and that will serialise on reallocating the space the deferops need 1755 * to free. Hence we can unlock the inode on the first commit of 1756 * the transaction rather than roll it right through the deferops. This 1757 * avoids relogging the XFS_ISTALE inode. 1758 * 1759 * We check that xfs_ifree() hasn't grown an internal transaction roll 1760 * by asserting that the inode is still locked when it returns. 1761 */ |
|
1743 xfs_ilock(ip, XFS_ILOCK_EXCL); | 1762 xfs_ilock(ip, XFS_ILOCK_EXCL); |
1744 xfs_trans_ijoin(tp, ip, 0); | 1763 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); |
1745 1746 error = xfs_ifree(tp, ip); | 1764 1765 error = xfs_ifree(tp, ip); |
1766 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
|
1747 if (error) { 1748 /* 1749 * If we fail to free the inode, shut down. The cancel 1750 * might do that, we need to make sure. Otherwise the 1751 * inode might be lost for a long time or forever. 1752 */ 1753 if (!XFS_FORCED_SHUTDOWN(mp)) { 1754 xfs_notice(mp, "%s: xfs_ifree returned error %d", 1755 __func__, error); 1756 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 1757 } 1758 xfs_trans_cancel(tp); | 1767 if (error) { 1768 /* 1769 * If we fail to free the inode, shut down. The cancel 1770 * might do that, we need to make sure. Otherwise the 1771 * inode might be lost for a long time or forever. 1772 */ 1773 if (!XFS_FORCED_SHUTDOWN(mp)) { 1774 xfs_notice(mp, "%s: xfs_ifree returned error %d", 1775 __func__, error); 1776 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 1777 } 1778 xfs_trans_cancel(tp); |
1759 xfs_iunlock(ip, XFS_ILOCK_EXCL); | |
1760 return error; 1761 } 1762 1763 /* 1764 * Credit the quota account(s). The inode is gone. 1765 */ 1766 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1); 1767 1768 /* 1769 * Just ignore errors at this point. There is nothing we can do except 1770 * to try to keep going. Make sure it's not a silent error. 1771 */ 1772 error = xfs_trans_commit(tp); 1773 if (error) 1774 xfs_notice(mp, "%s: xfs_trans_commit returned error %d", 1775 __func__, error); 1776 | 1779 return error; 1780 } 1781 1782 /* 1783 * Credit the quota account(s). The inode is gone. 1784 */ 1785 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1); 1786 1787 /* 1788 * Just ignore errors at this point. There is nothing we can do except 1789 * to try to keep going. Make sure it's not a silent error. 1790 */ 1791 error = xfs_trans_commit(tp); 1792 if (error) 1793 xfs_notice(mp, "%s: xfs_trans_commit returned error %d", 1794 __func__, error); 1795 |
1777 xfs_iunlock(ip, XFS_ILOCK_EXCL); | |
1778 return 0; 1779} 1780 1781/* 1782 * xfs_inactive 1783 * 1784 * This is called when the vnode reference count for the vnode 1785 * goes to zero. If the file has been unlinked, then it must --- 356 unchanged lines hidden (view full) --- 2142 dip->di_next_unlinked = cpu_to_be32(next_agino); 2143 offset = imap->im_boffset + 2144 offsetof(struct xfs_dinode, di_next_unlinked); 2145 2146 /* need to recalc the inode CRC if appropriate */ 2147 xfs_dinode_calc_crc(mp, dip); 2148 xfs_trans_inode_buf(tp, ibp); 2149 xfs_trans_log_buf(tp, ibp, offset, offset + sizeof(xfs_agino_t) - 1); | 1796 return 0; 1797} 1798 1799/* 1800 * xfs_inactive 1801 * 1802 * This is called when the vnode reference count for the vnode 1803 * goes to zero. If the file has been unlinked, then it must --- 356 unchanged lines hidden (view full) --- 2160 dip->di_next_unlinked = cpu_to_be32(next_agino); 2161 offset = imap->im_boffset + 2162 offsetof(struct xfs_dinode, di_next_unlinked); 2163 2164 /* need to recalc the inode CRC if appropriate */ 2165 xfs_dinode_calc_crc(mp, dip); 2166 xfs_trans_inode_buf(tp, ibp); 2167 xfs_trans_log_buf(tp, ibp, offset, offset + sizeof(xfs_agino_t) - 1); |
2150 xfs_inobp_check(mp, ibp); | |
2151} 2152 2153/* Set an in-core inode's unlinked pointer and return the old value. */ 2154STATIC int 2155xfs_iunlink_update_inode( 2156 struct xfs_trans *tp, 2157 struct xfs_inode *ip, 2158 xfs_agnumber_t agno, --- 84 unchanged lines hidden (view full) --- 2243 next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); 2244 if (next_agino == agino || 2245 !xfs_verify_agino_or_null(mp, agno, next_agino)) { 2246 xfs_buf_mark_corrupt(agibp); 2247 return -EFSCORRUPTED; 2248 } 2249 2250 if (next_agino != NULLAGINO) { | 2168} 2169 2170/* Set an in-core inode's unlinked pointer and return the old value. */ 2171STATIC int 2172xfs_iunlink_update_inode( 2173 struct xfs_trans *tp, 2174 struct xfs_inode *ip, 2175 xfs_agnumber_t agno, --- 84 unchanged lines hidden (view full) --- 2260 next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); 2261 if (next_agino == agino || 2262 !xfs_verify_agino_or_null(mp, agno, next_agino)) { 2263 xfs_buf_mark_corrupt(agibp); 2264 return -EFSCORRUPTED; 2265 } 2266 2267 if (next_agino != NULLAGINO) { |
2251 struct xfs_perag *pag; | |
2252 xfs_agino_t old_agino; 2253 2254 /* 2255 * There is already another inode in the bucket, so point this 2256 * inode to the current head of the list. 2257 */ 2258 error = xfs_iunlink_update_inode(tp, ip, agno, next_agino, 2259 &old_agino); 2260 if (error) 2261 return error; 2262 ASSERT(old_agino == NULLAGINO); 2263 2264 /* 2265 * agino has been unlinked, add a backref from the next inode 2266 * back to agino. 2267 */ | 2268 xfs_agino_t old_agino; 2269 2270 /* 2271 * There is already another inode in the bucket, so point this 2272 * inode to the current head of the list. 2273 */ 2274 error = xfs_iunlink_update_inode(tp, ip, agno, next_agino, 2275 &old_agino); 2276 if (error) 2277 return error; 2278 ASSERT(old_agino == NULLAGINO); 2279 2280 /* 2281 * agino has been unlinked, add a backref from the next inode 2282 * back to agino. 2283 */ |
2268 pag = xfs_perag_get(mp, agno); 2269 error = xfs_iunlink_add_backref(pag, agino, next_agino); 2270 xfs_perag_put(pag); | 2284 error = xfs_iunlink_add_backref(agibp->b_pag, agino, next_agino); |
2271 if (error) 2272 return error; 2273 } 2274 2275 /* Point the head of the list to point to this inode. */ 2276 return xfs_iunlink_update_bucket(tp, agno, agibp, bucket_index, agino); 2277} 2278 --- 119 unchanged lines hidden (view full) --- 2398 struct xfs_trans *tp, 2399 struct xfs_inode *ip) 2400{ 2401 struct xfs_mount *mp = tp->t_mountp; 2402 struct xfs_agi *agi; 2403 struct xfs_buf *agibp; 2404 struct xfs_buf *last_ibp; 2405 struct xfs_dinode *last_dip = NULL; | 2285 if (error) 2286 return error; 2287 } 2288 2289 /* Point the head of the list to point to this inode. */ 2290 return xfs_iunlink_update_bucket(tp, agno, agibp, bucket_index, agino); 2291} 2292 --- 119 unchanged lines hidden (view full) --- 2412 struct xfs_trans *tp, 2413 struct xfs_inode *ip) 2414{ 2415 struct xfs_mount *mp = tp->t_mountp; 2416 struct xfs_agi *agi; 2417 struct xfs_buf *agibp; 2418 struct xfs_buf *last_ibp; 2419 struct xfs_dinode *last_dip = NULL; |
2406 struct xfs_perag *pag = NULL; | |
2407 xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino); 2408 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 2409 xfs_agino_t next_agino; 2410 xfs_agino_t head_agino; 2411 short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 2412 int error; 2413 2414 trace_xfs_iunlink_remove(ip); --- 27 unchanged lines hidden (view full) --- 2442 /* 2443 * If there was a backref pointing from the next inode back to this 2444 * one, remove it because we've removed this inode from the list. 2445 * 2446 * Later, if this inode was in the middle of the list we'll update 2447 * this inode's backref to point from the next inode. 2448 */ 2449 if (next_agino != NULLAGINO) { | 2420 xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino); 2421 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 2422 xfs_agino_t next_agino; 2423 xfs_agino_t head_agino; 2424 short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 2425 int error; 2426 2427 trace_xfs_iunlink_remove(ip); --- 27 unchanged lines hidden (view full) --- 2455 /* 2456 * If there was a backref pointing from the next inode back to this 2457 * one, remove it because we've removed this inode from the list. 2458 * 2459 * Later, if this inode was in the middle of the list we'll update 2460 * this inode's backref to point from the next inode. 2461 */ 2462 if (next_agino != NULLAGINO) { |
2450 pag = xfs_perag_get(mp, agno); 2451 error = xfs_iunlink_change_backref(pag, next_agino, | 2463 error = xfs_iunlink_change_backref(agibp->b_pag, next_agino, |
2452 NULLAGINO); 2453 if (error) | 2464 NULLAGINO); 2465 if (error) |
2454 goto out; | 2466 return error; |
2455 } 2456 | 2467 } 2468 |
2457 if (head_agino == agino) { 2458 /* Point the head of the list to the next unlinked inode. */ 2459 error = xfs_iunlink_update_bucket(tp, agno, agibp, bucket_index, 2460 next_agino); 2461 if (error) 2462 goto out; 2463 } else { | 2469 if (head_agino != agino) { |
2464 struct xfs_imap imap; 2465 xfs_agino_t prev_agino; 2466 | 2470 struct xfs_imap imap; 2471 xfs_agino_t prev_agino; 2472 |
2467 if (!pag) 2468 pag = xfs_perag_get(mp, agno); 2469 | |
2470 /* We need to search the list for the inode being freed. */ 2471 error = xfs_iunlink_map_prev(tp, agno, head_agino, agino, 2472 &prev_agino, &imap, &last_dip, &last_ibp, | 2473 /* We need to search the list for the inode being freed. */ 2474 error = xfs_iunlink_map_prev(tp, agno, head_agino, agino, 2475 &prev_agino, &imap, &last_dip, &last_ibp, |
2473 pag); | 2476 agibp->b_pag); |
2474 if (error) | 2477 if (error) |
2475 goto out; | 2478 return error; |
2476 2477 /* Point the previous inode on the list to the next inode. */ 2478 xfs_iunlink_update_dinode(tp, agno, prev_agino, last_ibp, 2479 last_dip, &imap, next_agino); 2480 2481 /* 2482 * Now we deal with the backref for this inode. If this inode 2483 * pointed at a real inode, change the backref that pointed to 2484 * us to point to our old next. If this inode was the end of 2485 * the list, delete the backref that pointed to us. Note that 2486 * change_backref takes care of deleting the backref if 2487 * next_agino is NULLAGINO. 2488 */ | 2479 2480 /* Point the previous inode on the list to the next inode. */ 2481 xfs_iunlink_update_dinode(tp, agno, prev_agino, last_ibp, 2482 last_dip, &imap, next_agino); 2483 2484 /* 2485 * Now we deal with the backref for this inode. If this inode 2486 * pointed at a real inode, change the backref that pointed to 2487 * us to point to our old next. If this inode was the end of 2488 * the list, delete the backref that pointed to us. Note that 2489 * change_backref takes care of deleting the backref if 2490 * next_agino is NULLAGINO. 2491 */ |
2489 error = xfs_iunlink_change_backref(pag, agino, next_agino); 2490 if (error) 2491 goto out; | 2492 return xfs_iunlink_change_backref(agibp->b_pag, agino, 2493 next_agino); |
2492 } 2493 | 2494 } 2495 |
2494out: 2495 if (pag) 2496 xfs_perag_put(pag); 2497 return error; | 2496 /* Point the head of the list to the next unlinked inode. */ 2497 return xfs_iunlink_update_bucket(tp, agno, agibp, bucket_index, 2498 next_agino); |
2498} 2499 2500/* | 2499} 2500 2501/* |
2501 * Look up the inode number specified and mark it stale if it is found. If it is 2502 * dirty, return the inode so it can be attached to the cluster buffer so it can 2503 * be processed appropriately when the cluster free transaction completes. | 2502 * Look up the inode number specified and if it is not already marked XFS_ISTALE 2503 * mark it stale. We should only find clean inodes in this lookup that aren't 2504 * already stale. |
2504 */ | 2505 */ |
2505static struct xfs_inode * 2506xfs_ifree_get_one_inode( 2507 struct xfs_perag *pag, | 2506static void 2507xfs_ifree_mark_inode_stale( 2508 struct xfs_buf *bp, |
2508 struct xfs_inode *free_ip, 2509 xfs_ino_t inum) 2510{ | 2509 struct xfs_inode *free_ip, 2510 xfs_ino_t inum) 2511{ |
2511 struct xfs_mount *mp = pag->pag_mount; | 2512 struct xfs_mount *mp = bp->b_mount; 2513 struct xfs_perag *pag = bp->b_pag; 2514 struct xfs_inode_log_item *iip; |
2512 struct xfs_inode *ip; 2513 2514retry: 2515 rcu_read_lock(); 2516 ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, inum)); 2517 2518 /* Inode not in memory, nothing to do */ | 2515 struct xfs_inode *ip; 2516 2517retry: 2518 rcu_read_lock(); 2519 ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, inum)); 2520 2521 /* Inode not in memory, nothing to do */ |
2519 if (!ip) 2520 goto out_rcu_unlock; | 2522 if (!ip) { 2523 rcu_read_unlock(); 2524 return; 2525 } |
2521 2522 /* 2523 * because this is an RCU protected lookup, we could find a recently 2524 * freed or even reallocated inode during the lookup. We need to check 2525 * under the i_flags_lock for a valid inode here. Skip it if it is not 2526 * valid, the wrong inode or stale. 2527 */ 2528 spin_lock(&ip->i_flags_lock); 2529 if (ip->i_ino != inum || __xfs_iflags_test(ip, XFS_ISTALE)) { 2530 spin_unlock(&ip->i_flags_lock); | 2526 2527 /* 2528 * because this is an RCU protected lookup, we could find a recently 2529 * freed or even reallocated inode during the lookup. We need to check 2530 * under the i_flags_lock for a valid inode here. Skip it if it is not 2531 * valid, the wrong inode or stale. 2532 */ 2533 spin_lock(&ip->i_flags_lock); 2534 if (ip->i_ino != inum || __xfs_iflags_test(ip, XFS_ISTALE)) { 2535 spin_unlock(&ip->i_flags_lock); |
2531 goto out_rcu_unlock; | 2536 rcu_read_unlock(); 2537 return; |
2532 } | 2538 } |
2533 spin_unlock(&ip->i_flags_lock); | |
2534 2535 /* 2536 * Don't try to lock/unlock the current inode, but we _cannot_ skip the 2537 * other inodes that we did not find in the list attached to the buffer 2538 * and are not already marked stale. If we can't lock it, back off and 2539 * retry. 2540 */ 2541 if (ip != free_ip) { 2542 if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { | 2539 2540 /* 2541 * Don't try to lock/unlock the current inode, but we _cannot_ skip the 2542 * other inodes that we did not find in the list attached to the buffer 2543 * and are not already marked stale. If we can't lock it, back off and 2544 * retry. 2545 */ 2546 if (ip != free_ip) { 2547 if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { |
2548 spin_unlock(&ip->i_flags_lock); |
|
2543 rcu_read_unlock(); 2544 delay(1); 2545 goto retry; 2546 } | 2549 rcu_read_unlock(); 2550 delay(1); 2551 goto retry; 2552 } |
2547 2548 /* 2549 * Check the inode number again in case we're racing with 2550 * freeing in xfs_reclaim_inode(). See the comments in that 2551 * function for more information as to why the initial check is 2552 * not sufficient. 2553 */ 2554 if (ip->i_ino != inum) { 2555 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2556 goto out_rcu_unlock; 2557 } | |
2558 } | 2553 } |
2554 ip->i_flags |= XFS_ISTALE; 2555 spin_unlock(&ip->i_flags_lock); |
|
2559 rcu_read_unlock(); 2560 | 2556 rcu_read_unlock(); 2557 |
2561 xfs_iflock(ip); 2562 xfs_iflags_set(ip, XFS_ISTALE); | 2558 /* 2559 * If we can't get the flush lock, the inode is already attached. All 2560 * we needed to do here is mark the inode stale so buffer IO completion 2561 * will remove it from the AIL. 2562 */ 2563 iip = ip->i_itemp; 2564 if (!xfs_iflock_nowait(ip)) { 2565 ASSERT(!list_empty(&iip->ili_item.li_bio_list)); 2566 ASSERT(iip->ili_last_fields); 2567 goto out_iunlock; 2568 } |
2563 2564 /* | 2569 2570 /* |
2565 * We don't need to attach clean inodes or those only with unlogged 2566 * changes (which we throw away, anyway). | 2571 * Inodes not attached to the buffer can be released immediately. 2572 * Everything else has to go through xfs_iflush_abort() on journal 2573 * commit as the flock synchronises removal of the inode from the 2574 * cluster buffer against inode reclaim. |
2567 */ | 2575 */ |
2568 if (!ip->i_itemp || xfs_inode_clean(ip)) { 2569 ASSERT(ip != free_ip); | 2576 if (!iip || list_empty(&iip->ili_item.li_bio_list)) { |
2570 xfs_ifunlock(ip); | 2577 xfs_ifunlock(ip); |
2571 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2572 goto out_no_inode; | 2578 goto out_iunlock; |
2573 } | 2579 } |
2574 return ip; | |
2575 | 2580 |
2576out_rcu_unlock: 2577 rcu_read_unlock(); 2578out_no_inode: 2579 return NULL; | 2581 /* we have a dirty inode in memory that has not yet been flushed. */ 2582 spin_lock(&iip->ili_lock); 2583 iip->ili_last_fields = iip->ili_fields; 2584 iip->ili_fields = 0; 2585 iip->ili_fsync_fields = 0; 2586 spin_unlock(&iip->ili_lock); 2587 ASSERT(iip->ili_last_fields); 2588 2589out_iunlock: 2590 if (ip != free_ip) 2591 xfs_iunlock(ip, XFS_ILOCK_EXCL); |
2580} 2581 2582/* 2583 * A big issue when freeing the inode cluster is that we _cannot_ skip any 2584 * inodes that are in memory - they all must be marked stale and attached to 2585 * the cluster buffer. 2586 */ 2587STATIC int 2588xfs_ifree_cluster( | 2592} 2593 2594/* 2595 * A big issue when freeing the inode cluster is that we _cannot_ skip any 2596 * inodes that are in memory - they all must be marked stale and attached to 2597 * the cluster buffer. 2598 */ 2599STATIC int 2600xfs_ifree_cluster( |
2589 xfs_inode_t *free_ip, 2590 xfs_trans_t *tp, | 2601 struct xfs_inode *free_ip, 2602 struct xfs_trans *tp, |
2591 struct xfs_icluster *xic) 2592{ | 2603 struct xfs_icluster *xic) 2604{ |
2593 xfs_mount_t *mp = free_ip->i_mount; | 2605 struct xfs_mount *mp = free_ip->i_mount; 2606 struct xfs_ino_geometry *igeo = M_IGEO(mp); 2607 struct xfs_buf *bp; 2608 xfs_daddr_t blkno; 2609 xfs_ino_t inum = xic->first_ino; |
2594 int nbufs; 2595 int i, j; 2596 int ioffset; | 2610 int nbufs; 2611 int i, j; 2612 int ioffset; |
2597 xfs_daddr_t blkno; 2598 xfs_buf_t *bp; 2599 xfs_inode_t *ip; 2600 struct xfs_inode_log_item *iip; 2601 struct xfs_log_item *lip; 2602 struct xfs_perag *pag; 2603 struct xfs_ino_geometry *igeo = M_IGEO(mp); 2604 xfs_ino_t inum; | |
2605 int error; 2606 | 2613 int error; 2614 |
2607 inum = xic->first_ino; 2608 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum)); | |
2609 nbufs = igeo->ialloc_blks / igeo->blocks_per_cluster; 2610 2611 for (j = 0; j < nbufs; j++, inum += igeo->inodes_per_cluster) { 2612 /* 2613 * The allocation bitmap tells us which inodes of the chunk were 2614 * physically allocated. Skip the cluster if an inode falls into 2615 * a sparse region. 2616 */ --- 12 unchanged lines hidden (view full) --- 2629 * can't get the flush lock on is attached to the buffer. 2630 * If we scan the in-memory inodes first, then buffer IO can 2631 * complete before we get a lock on it, and hence we may fail 2632 * to mark all the active inodes on the buffer stale. 2633 */ 2634 error = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 2635 mp->m_bsize * igeo->blocks_per_cluster, 2636 XBF_UNMAPPED, &bp); | 2615 nbufs = igeo->ialloc_blks / igeo->blocks_per_cluster; 2616 2617 for (j = 0; j < nbufs; j++, inum += igeo->inodes_per_cluster) { 2618 /* 2619 * The allocation bitmap tells us which inodes of the chunk were 2620 * physically allocated. Skip the cluster if an inode falls into 2621 * a sparse region. 2622 */ --- 12 unchanged lines hidden (view full) --- 2635 * can't get the flush lock on is attached to the buffer. 2636 * If we scan the in-memory inodes first, then buffer IO can 2637 * complete before we get a lock on it, and hence we may fail 2638 * to mark all the active inodes on the buffer stale. 2639 */ 2640 error = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 2641 mp->m_bsize * igeo->blocks_per_cluster, 2642 XBF_UNMAPPED, &bp); |
2637 if (error) { 2638 xfs_perag_put(pag); | 2643 if (error) |
2639 return error; | 2644 return error; |
2640 } | |
2641 2642 /* 2643 * This buffer may not have been correctly initialised as we 2644 * didn't read it from disk. That's not important because we are 2645 * only using to mark the buffer as stale in the log, and to 2646 * attach stale cached inodes on it. That means it will never be 2647 * dispatched for IO. If it is, we want to know about it, and we 2648 * want it to fail. We can acheive this by adding a write 2649 * verifier to the buffer. 2650 */ 2651 bp->b_ops = &xfs_inode_buf_ops; 2652 2653 /* | 2645 2646 /* 2647 * This buffer may not have been correctly initialised as we 2648 * didn't read it from disk. That's not important because we are 2649 * only using to mark the buffer as stale in the log, and to 2650 * attach stale cached inodes on it. That means it will never be 2651 * dispatched for IO. If it is, we want to know about it, and we 2652 * want it to fail. We can acheive this by adding a write 2653 * verifier to the buffer. 2654 */ 2655 bp->b_ops = &xfs_inode_buf_ops; 2656 2657 /* |
2654 * Walk the inodes already attached to the buffer and mark them 2655 * stale. These will all have the flush locks held, so an 2656 * in-memory inode walk can't lock them. By marking them all 2657 * stale first, we will not attempt to lock them in the loop 2658 * below as the XFS_ISTALE flag will be set. | 2658 * Now we need to set all the cached clean inodes as XFS_ISTALE, 2659 * too. This requires lookups, and will skip inodes that we've 2660 * already marked XFS_ISTALE. |
2659 */ | 2661 */ |
2660 list_for_each_entry(lip, &bp->b_li_list, li_bio_list) { 2661 if (lip->li_type == XFS_LI_INODE) { 2662 iip = (struct xfs_inode_log_item *)lip; 2663 ASSERT(iip->ili_logged == 1); 2664 lip->li_cb = xfs_istale_done; 2665 xfs_trans_ail_copy_lsn(mp->m_ail, 2666 &iip->ili_flush_lsn, 2667 &iip->ili_item.li_lsn); 2668 xfs_iflags_set(iip->ili_inode, XFS_ISTALE); 2669 } 2670 } | 2662 for (i = 0; i < igeo->inodes_per_cluster; i++) 2663 xfs_ifree_mark_inode_stale(bp, free_ip, inum + i); |
2671 | 2664 |
2672 2673 /* 2674 * For each inode in memory attempt to add it to the inode 2675 * buffer and set it up for being staled on buffer IO 2676 * completion. This is safe as we've locked out tail pushing 2677 * and flushing by locking the buffer. 2678 * 2679 * We have already marked every inode that was part of a 2680 * transaction stale above, which means there is no point in 2681 * even trying to lock them. 2682 */ 2683 for (i = 0; i < igeo->inodes_per_cluster; i++) { 2684 ip = xfs_ifree_get_one_inode(pag, free_ip, inum + i); 2685 if (!ip) 2686 continue; 2687 2688 iip = ip->i_itemp; 2689 iip->ili_last_fields = iip->ili_fields; 2690 iip->ili_fields = 0; 2691 iip->ili_fsync_fields = 0; 2692 iip->ili_logged = 1; 2693 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 2694 &iip->ili_item.li_lsn); 2695 2696 xfs_buf_attach_iodone(bp, xfs_istale_done, 2697 &iip->ili_item); 2698 2699 if (ip != free_ip) 2700 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2701 } 2702 | |
2703 xfs_trans_stale_inode_buf(tp, bp); 2704 xfs_trans_binval(tp, bp); 2705 } | 2665 xfs_trans_stale_inode_buf(tp, bp); 2666 xfs_trans_binval(tp, bp); 2667 } |
2706 2707 xfs_perag_put(pag); | |
2708 return 0; 2709} 2710 2711/* 2712 * This is called to return an inode to the inode free list. 2713 * The inode should already be truncated to 0 length and have 2714 * no pages associated with it. This routine also assumes that 2715 * the inode is already a part of the transaction. --- 4 unchanged lines hidden (view full) --- 2720 */ 2721int 2722xfs_ifree( 2723 struct xfs_trans *tp, 2724 struct xfs_inode *ip) 2725{ 2726 int error; 2727 struct xfs_icluster xic = { 0 }; | 2668 return 0; 2669} 2670 2671/* 2672 * This is called to return an inode to the inode free list. 2673 * The inode should already be truncated to 0 length and have 2674 * no pages associated with it. This routine also assumes that 2675 * the inode is already a part of the transaction. --- 4 unchanged lines hidden (view full) --- 2680 */ 2681int 2682xfs_ifree( 2683 struct xfs_trans *tp, 2684 struct xfs_inode *ip) 2685{ 2686 int error; 2687 struct xfs_icluster xic = { 0 }; |
2688 struct xfs_inode_log_item *iip = ip->i_itemp; |
|
2728 2729 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 2730 ASSERT(VFS_I(ip)->i_nlink == 0); 2731 ASSERT(ip->i_df.if_nextents == 0); 2732 ASSERT(ip->i_d.di_size == 0 || !S_ISREG(VFS_I(ip)->i_mode)); 2733 ASSERT(ip->i_d.di_nblocks == 0); 2734 2735 /* --- 21 unchanged lines hidden (view full) --- 2757 VFS_I(ip)->i_mode = 0; /* mark incore inode as free */ 2758 ip->i_d.di_flags = 0; 2759 ip->i_d.di_flags2 = 0; 2760 ip->i_d.di_dmevmask = 0; 2761 ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */ 2762 ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS; 2763 2764 /* Don't attempt to replay owner changes for a deleted inode */ | 2689 2690 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 2691 ASSERT(VFS_I(ip)->i_nlink == 0); 2692 ASSERT(ip->i_df.if_nextents == 0); 2693 ASSERT(ip->i_d.di_size == 0 || !S_ISREG(VFS_I(ip)->i_mode)); 2694 ASSERT(ip->i_d.di_nblocks == 0); 2695 2696 /* --- 21 unchanged lines hidden (view full) --- 2718 VFS_I(ip)->i_mode = 0; /* mark incore inode as free */ 2719 ip->i_d.di_flags = 0; 2720 ip->i_d.di_flags2 = 0; 2721 ip->i_d.di_dmevmask = 0; 2722 ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */ 2723 ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS; 2724 2725 /* Don't attempt to replay owner changes for a deleted inode */ |
2765 ip->i_itemp->ili_fields &= ~(XFS_ILOG_AOWNER|XFS_ILOG_DOWNER); | 2726 spin_lock(&iip->ili_lock); 2727 iip->ili_fields &= ~(XFS_ILOG_AOWNER | XFS_ILOG_DOWNER); 2728 spin_unlock(&iip->ili_lock); |
2766 2767 /* 2768 * Bump the generation count so no one will be confused 2769 * by reincarnations of this inode. 2770 */ 2771 VFS_I(ip)->i_generation++; 2772 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 2773 --- 363 unchanged lines hidden (view full) --- 3137out_trans_abort: 3138 xfs_trans_cancel(tp); 3139 return error; 3140} 3141 3142/* 3143 * xfs_rename_alloc_whiteout() 3144 * | 2729 2730 /* 2731 * Bump the generation count so no one will be confused 2732 * by reincarnations of this inode. 2733 */ 2734 VFS_I(ip)->i_generation++; 2735 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 2736 --- 363 unchanged lines hidden (view full) --- 3100out_trans_abort: 3101 xfs_trans_cancel(tp); 3102 return error; 3103} 3104 3105/* 3106 * xfs_rename_alloc_whiteout() 3107 * |
3145 * Return a referenced, unlinked, unlocked inode that that can be used as a | 3108 * Return a referenced, unlinked, unlocked inode that can be used as a |
3146 * whiteout in a rename transaction. We use a tmpfile inode here so that if we 3147 * crash between allocating the inode and linking it into the rename transaction 3148 * recovery will free the inode and we won't leak it. 3149 */ 3150static int 3151xfs_rename_alloc_whiteout( 3152 struct xfs_inode *dp, 3153 struct xfs_inode **wip) --- 310 unchanged lines hidden (view full) --- 3464out_trans_cancel: 3465 xfs_trans_cancel(tp); 3466out_release_wip: 3467 if (wip) 3468 xfs_irele(wip); 3469 return error; 3470} 3471 | 3109 * whiteout in a rename transaction. We use a tmpfile inode here so that if we 3110 * crash between allocating the inode and linking it into the rename transaction 3111 * recovery will free the inode and we won't leak it. 3112 */ 3113static int 3114xfs_rename_alloc_whiteout( 3115 struct xfs_inode *dp, 3116 struct xfs_inode **wip) --- 310 unchanged lines hidden (view full) --- 3427out_trans_cancel: 3428 xfs_trans_cancel(tp); 3429out_release_wip: 3430 if (wip) 3431 xfs_irele(wip); 3432 return error; 3433} 3434 |
3472STATIC int 3473xfs_iflush_cluster( 3474 struct xfs_inode *ip, 3475 struct xfs_buf *bp) 3476{ 3477 struct xfs_mount *mp = ip->i_mount; 3478 struct xfs_perag *pag; 3479 unsigned long first_index, mask; 3480 int cilist_size; 3481 struct xfs_inode **cilist; 3482 struct xfs_inode *cip; 3483 struct xfs_ino_geometry *igeo = M_IGEO(mp); 3484 int error = 0; 3485 int nr_found; 3486 int clcount = 0; 3487 int i; 3488 3489 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 3490 3491 cilist_size = igeo->inodes_per_cluster * sizeof(struct xfs_inode *); 3492 cilist = kmem_alloc(cilist_size, KM_MAYFAIL|KM_NOFS); 3493 if (!cilist) 3494 goto out_put; 3495 3496 mask = ~(igeo->inodes_per_cluster - 1); 3497 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; 3498 rcu_read_lock(); 3499 /* really need a gang lookup range call here */ 3500 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)cilist, 3501 first_index, igeo->inodes_per_cluster); 3502 if (nr_found == 0) 3503 goto out_free; 3504 3505 for (i = 0; i < nr_found; i++) { 3506 cip = cilist[i]; 3507 if (cip == ip) 3508 continue; 3509 3510 /* 3511 * because this is an RCU protected lookup, we could find a 3512 * recently freed or even reallocated inode during the lookup. 3513 * We need to check under the i_flags_lock for a valid inode 3514 * here. Skip it if it is not valid or the wrong inode. 3515 */ 3516 spin_lock(&cip->i_flags_lock); 3517 if (!cip->i_ino || 3518 __xfs_iflags_test(cip, XFS_ISTALE)) { 3519 spin_unlock(&cip->i_flags_lock); 3520 continue; 3521 } 3522 3523 /* 3524 * Once we fall off the end of the cluster, no point checking 3525 * any more inodes in the list because they will also all be 3526 * outside the cluster. 3527 */ 3528 if ((XFS_INO_TO_AGINO(mp, cip->i_ino) & mask) != first_index) { 3529 spin_unlock(&cip->i_flags_lock); 3530 break; 3531 } 3532 spin_unlock(&cip->i_flags_lock); 3533 3534 /* 3535 * Do an un-protected check to see if the inode is dirty and 3536 * is a candidate for flushing. These checks will be repeated 3537 * later after the appropriate locks are acquired. 3538 */ 3539 if (xfs_inode_clean(cip) && xfs_ipincount(cip) == 0) 3540 continue; 3541 3542 /* 3543 * Try to get locks. If any are unavailable or it is pinned, 3544 * then this inode cannot be flushed and is skipped. 3545 */ 3546 3547 if (!xfs_ilock_nowait(cip, XFS_ILOCK_SHARED)) 3548 continue; 3549 if (!xfs_iflock_nowait(cip)) { 3550 xfs_iunlock(cip, XFS_ILOCK_SHARED); 3551 continue; 3552 } 3553 if (xfs_ipincount(cip)) { 3554 xfs_ifunlock(cip); 3555 xfs_iunlock(cip, XFS_ILOCK_SHARED); 3556 continue; 3557 } 3558 3559 3560 /* 3561 * Check the inode number again, just to be certain we are not 3562 * racing with freeing in xfs_reclaim_inode(). See the comments 3563 * in that function for more information as to why the initial 3564 * check is not sufficient. 3565 */ 3566 if (!cip->i_ino) { 3567 xfs_ifunlock(cip); 3568 xfs_iunlock(cip, XFS_ILOCK_SHARED); 3569 continue; 3570 } 3571 3572 /* 3573 * arriving here means that this inode can be flushed. First 3574 * re-check that it's dirty before flushing. 3575 */ 3576 if (!xfs_inode_clean(cip)) { 3577 error = xfs_iflush_int(cip, bp); 3578 if (error) { 3579 xfs_iunlock(cip, XFS_ILOCK_SHARED); 3580 goto out_free; 3581 } 3582 clcount++; 3583 } else { 3584 xfs_ifunlock(cip); 3585 } 3586 xfs_iunlock(cip, XFS_ILOCK_SHARED); 3587 } 3588 3589 if (clcount) { 3590 XFS_STATS_INC(mp, xs_icluster_flushcnt); 3591 XFS_STATS_ADD(mp, xs_icluster_flushinode, clcount); 3592 } 3593 3594out_free: 3595 rcu_read_unlock(); 3596 kmem_free(cilist); 3597out_put: 3598 xfs_perag_put(pag); 3599 return error; 3600} 3601 3602/* 3603 * Flush dirty inode metadata into the backing buffer. 3604 * 3605 * The caller must have the inode lock and the inode flush lock held. The 3606 * inode lock will still be held upon return to the caller, and the inode 3607 * flush lock will be released after the inode has reached the disk. 3608 * 3609 * The caller must write out the buffer returned in *bpp and release it. 3610 */ 3611int | 3435static int |
3612xfs_iflush( 3613 struct xfs_inode *ip, | 3436xfs_iflush( 3437 struct xfs_inode *ip, |
3614 struct xfs_buf **bpp) 3615{ 3616 struct xfs_mount *mp = ip->i_mount; 3617 struct xfs_buf *bp = NULL; 3618 struct xfs_dinode *dip; 3619 int error; 3620 3621 XFS_STATS_INC(mp, xs_iflush_count); 3622 3623 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 3624 ASSERT(xfs_isiflocked(ip)); 3625 ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE || 3626 ip->i_df.if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); 3627 3628 *bpp = NULL; 3629 3630 xfs_iunpin_wait(ip); 3631 3632 /* 3633 * For stale inodes we cannot rely on the backing buffer remaining 3634 * stale in cache for the remaining life of the stale inode and so 3635 * xfs_imap_to_bp() below may give us a buffer that no longer contains 3636 * inodes below. We have to check this after ensuring the inode is 3637 * unpinned so that it is safe to reclaim the stale inode after the 3638 * flush call. 3639 */ 3640 if (xfs_iflags_test(ip, XFS_ISTALE)) { 3641 xfs_ifunlock(ip); 3642 return 0; 3643 } 3644 3645 /* 3646 * Get the buffer containing the on-disk inode. We are doing a try-lock 3647 * operation here, so we may get an EAGAIN error. In that case, return 3648 * leaving the inode dirty. 3649 * 3650 * If we get any other error, we effectively have a corruption situation 3651 * and we cannot flush the inode. Abort the flush and shut down. 3652 */ 3653 error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK); 3654 if (error == -EAGAIN) { 3655 xfs_ifunlock(ip); 3656 return error; 3657 } 3658 if (error) 3659 goto abort; 3660 3661 /* 3662 * If the buffer is pinned then push on the log now so we won't 3663 * get stuck waiting in the write for too long. 3664 */ 3665 if (xfs_buf_ispinned(bp)) 3666 xfs_log_force(mp, 0); 3667 3668 /* 3669 * Flush the provided inode then attempt to gather others from the 3670 * cluster into the write. 3671 * 3672 * Note: Once we attempt to flush an inode, we must run buffer 3673 * completion callbacks on any failure. If this fails, simulate an I/O 3674 * failure on the buffer and shut down. 3675 */ 3676 error = xfs_iflush_int(ip, bp); 3677 if (!error) 3678 error = xfs_iflush_cluster(ip, bp); 3679 if (error) { 3680 bp->b_flags |= XBF_ASYNC; 3681 xfs_buf_ioend_fail(bp); 3682 goto shutdown; 3683 } 3684 3685 *bpp = bp; 3686 return 0; 3687 3688abort: 3689 xfs_iflush_abort(ip); 3690shutdown: 3691 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 3692 return error; 3693} 3694 3695STATIC int 3696xfs_iflush_int( 3697 struct xfs_inode *ip, | |
3698 struct xfs_buf *bp) 3699{ 3700 struct xfs_inode_log_item *iip = ip->i_itemp; 3701 struct xfs_dinode *dip; 3702 struct xfs_mount *mp = ip->i_mount; 3703 int error; 3704 3705 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 3706 ASSERT(xfs_isiflocked(ip)); 3707 ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE || 3708 ip->i_df.if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); | 3438 struct xfs_buf *bp) 3439{ 3440 struct xfs_inode_log_item *iip = ip->i_itemp; 3441 struct xfs_dinode *dip; 3442 struct xfs_mount *mp = ip->i_mount; 3443 int error; 3444 3445 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 3446 ASSERT(xfs_isiflocked(ip)); 3447 ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE || 3448 ip->i_df.if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); |
3709 ASSERT(iip != NULL && iip->ili_fields != 0); | 3449 ASSERT(iip->ili_item.li_buf == bp); |
3710 3711 dip = xfs_buf_offset(bp, ip->i_imap.im_boffset); 3712 3713 /* 3714 * We don't flush the inode if any of the following checks fail, but we 3715 * do still update the log item and attach to the backing buffer as if 3716 * the flush happened. This is a formality to facilitate predictable 3717 * error handling as the caller will shutdown and fail the buffer. --- 78 unchanged lines hidden (view full) --- 3796 3797 /* Wrap, we never let the log put out DI_MAX_FLUSH */ 3798 if (ip->i_d.di_flushiter == DI_MAX_FLUSH) 3799 ip->i_d.di_flushiter = 0; 3800 3801 xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK); 3802 if (XFS_IFORK_Q(ip)) 3803 xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK); | 3450 3451 dip = xfs_buf_offset(bp, ip->i_imap.im_boffset); 3452 3453 /* 3454 * We don't flush the inode if any of the following checks fail, but we 3455 * do still update the log item and attach to the backing buffer as if 3456 * the flush happened. This is a formality to facilitate predictable 3457 * error handling as the caller will shutdown and fail the buffer. --- 78 unchanged lines hidden (view full) --- 3536 3537 /* Wrap, we never let the log put out DI_MAX_FLUSH */ 3538 if (ip->i_d.di_flushiter == DI_MAX_FLUSH) 3539 ip->i_d.di_flushiter = 0; 3540 3541 xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK); 3542 if (XFS_IFORK_Q(ip)) 3543 xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK); |
3804 xfs_inobp_check(mp, bp); | |
3805 3806 /* 3807 * We've recorded everything logged in the inode, so we'd like to clear 3808 * the ili_fields bits so we don't log and flush things unnecessarily. 3809 * However, we can't stop logging all this information until the data 3810 * we've copied into the disk buffer is written to disk. If we did we 3811 * might overwrite the copy of the inode in the log with all the data 3812 * after re-logging only part of it, and in the face of a crash we 3813 * wouldn't have all the data we need to recover. 3814 * 3815 * What we do is move the bits to the ili_last_fields field. When 3816 * logging the inode, these bits are moved back to the ili_fields field. 3817 * In the xfs_iflush_done() routine we clear ili_last_fields, since we 3818 * know that the information those bits represent is permanently on 3819 * disk. As long as the flush completes before the inode is logged 3820 * again, then both ili_fields and ili_last_fields will be cleared. | 3544 3545 /* 3546 * We've recorded everything logged in the inode, so we'd like to clear 3547 * the ili_fields bits so we don't log and flush things unnecessarily. 3548 * However, we can't stop logging all this information until the data 3549 * we've copied into the disk buffer is written to disk. If we did we 3550 * might overwrite the copy of the inode in the log with all the data 3551 * after re-logging only part of it, and in the face of a crash we 3552 * wouldn't have all the data we need to recover. 3553 * 3554 * What we do is move the bits to the ili_last_fields field. When 3555 * logging the inode, these bits are moved back to the ili_fields field. 3556 * In the xfs_iflush_done() routine we clear ili_last_fields, since we 3557 * know that the information those bits represent is permanently on 3558 * disk. As long as the flush completes before the inode is logged 3559 * again, then both ili_fields and ili_last_fields will be cleared. |
3821 * 3822 * We can play with the ili_fields bits here, because the inode lock 3823 * must be held exclusively in order to set bits there and the flush 3824 * lock protects the ili_last_fields bits. Set ili_logged so the flush 3825 * done routine can tell whether or not to look in the AIL. Also, store 3826 * the current LSN of the inode so that we can tell whether the item has 3827 * moved in the AIL from xfs_iflush_done(). In order to read the lsn we 3828 * need the AIL lock, because it is a 64 bit value that cannot be read 3829 * atomically. | |
3830 */ 3831 error = 0; 3832flush_out: | 3560 */ 3561 error = 0; 3562flush_out: |
3563 spin_lock(&iip->ili_lock); |
|
3833 iip->ili_last_fields = iip->ili_fields; 3834 iip->ili_fields = 0; 3835 iip->ili_fsync_fields = 0; | 3564 iip->ili_last_fields = iip->ili_fields; 3565 iip->ili_fields = 0; 3566 iip->ili_fsync_fields = 0; |
3836 iip->ili_logged = 1; | 3567 spin_unlock(&iip->ili_lock); |
3837 | 3568 |
3569 /* 3570 * Store the current LSN of the inode so that we can tell whether the 3571 * item has moved in the AIL from xfs_iflush_done(). 3572 */ |
|
3838 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 3839 &iip->ili_item.li_lsn); 3840 | 3573 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 3574 &iip->ili_item.li_lsn); 3575 |
3576 /* generate the checksum. */ 3577 xfs_dinode_calc_crc(mp, dip); 3578 return error; 3579} 3580 3581/* 3582 * Non-blocking flush of dirty inode metadata into the backing buffer. 3583 * 3584 * The caller must have a reference to the inode and hold the cluster buffer 3585 * locked. The function will walk across all the inodes on the cluster buffer it 3586 * can find and lock without blocking, and flush them to the cluster buffer. 3587 * 3588 * On successful flushing of at least one inode, the caller must write out the 3589 * buffer and release it. If no inodes are flushed, -EAGAIN will be returned and 3590 * the caller needs to release the buffer. On failure, the filesystem will be 3591 * shut down, the buffer will have been unlocked and released, and EFSCORRUPTED 3592 * will be returned. 3593 */ 3594int 3595xfs_iflush_cluster( 3596 struct xfs_buf *bp) 3597{ 3598 struct xfs_mount *mp = bp->b_mount; 3599 struct xfs_log_item *lip, *n; 3600 struct xfs_inode *ip; 3601 struct xfs_inode_log_item *iip; 3602 int clcount = 0; 3603 int error = 0; 3604 |
|
3841 /* | 3605 /* |
3842 * Attach the inode item callback to the buffer whether the flush 3843 * succeeded or not. If not, the caller will shut down and fail I/O 3844 * completion on the buffer to remove the inode from the AIL and release 3845 * the flush lock. | 3606 * We must use the safe variant here as on shutdown xfs_iflush_abort() 3607 * can remove itself from the list. |
3846 */ | 3608 */ |
3847 xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item); | 3609 list_for_each_entry_safe(lip, n, &bp->b_li_list, li_bio_list) { 3610 iip = (struct xfs_inode_log_item *)lip; 3611 ip = iip->ili_inode; |
3848 | 3612 |
3849 /* generate the checksum. */ 3850 xfs_dinode_calc_crc(mp, dip); | 3613 /* 3614 * Quick and dirty check to avoid locks if possible. 3615 */ 3616 if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLOCK)) 3617 continue; 3618 if (xfs_ipincount(ip)) 3619 continue; |
3851 | 3620 |
3852 ASSERT(!list_empty(&bp->b_li_list)); 3853 ASSERT(bp->b_iodone != NULL); 3854 return error; | 3621 /* 3622 * The inode is still attached to the buffer, which means it is 3623 * dirty but reclaim might try to grab it. Check carefully for 3624 * that, and grab the ilock while still holding the i_flags_lock 3625 * to guarantee reclaim will not be able to reclaim this inode 3626 * once we drop the i_flags_lock. 3627 */ 3628 spin_lock(&ip->i_flags_lock); 3629 ASSERT(!__xfs_iflags_test(ip, XFS_ISTALE)); 3630 if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLOCK)) { 3631 spin_unlock(&ip->i_flags_lock); 3632 continue; 3633 } 3634 3635 /* 3636 * ILOCK will pin the inode against reclaim and prevent 3637 * concurrent transactions modifying the inode while we are 3638 * flushing the inode. 3639 */ 3640 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { 3641 spin_unlock(&ip->i_flags_lock); 3642 continue; 3643 } 3644 spin_unlock(&ip->i_flags_lock); 3645 3646 /* 3647 * Skip inodes that are already flush locked as they have 3648 * already been written to the buffer. 3649 */ 3650 if (!xfs_iflock_nowait(ip)) { 3651 xfs_iunlock(ip, XFS_ILOCK_SHARED); 3652 continue; 3653 } 3654 3655 /* 3656 * Abort flushing this inode if we are shut down because the 3657 * inode may not currently be in the AIL. This can occur when 3658 * log I/O failure unpins the inode without inserting into the 3659 * AIL, leaving a dirty/unpinned inode attached to the buffer 3660 * that otherwise looks like it should be flushed. 3661 */ 3662 if (XFS_FORCED_SHUTDOWN(mp)) { 3663 xfs_iunpin_wait(ip); 3664 /* xfs_iflush_abort() drops the flush lock */ 3665 xfs_iflush_abort(ip); 3666 xfs_iunlock(ip, XFS_ILOCK_SHARED); 3667 error = -EIO; 3668 continue; 3669 } 3670 3671 /* don't block waiting on a log force to unpin dirty inodes */ 3672 if (xfs_ipincount(ip)) { 3673 xfs_ifunlock(ip); 3674 xfs_iunlock(ip, XFS_ILOCK_SHARED); 3675 continue; 3676 } 3677 3678 if (!xfs_inode_clean(ip)) 3679 error = xfs_iflush(ip, bp); 3680 else 3681 xfs_ifunlock(ip); 3682 xfs_iunlock(ip, XFS_ILOCK_SHARED); 3683 if (error) 3684 break; 3685 clcount++; 3686 } 3687 3688 if (error) { 3689 bp->b_flags |= XBF_ASYNC; 3690 xfs_buf_ioend_fail(bp); 3691 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 3692 return error; 3693 } 3694 3695 if (!clcount) 3696 return -EAGAIN; 3697 3698 XFS_STATS_INC(mp, xs_icluster_flushcnt); 3699 XFS_STATS_ADD(mp, xs_icluster_flushinode, clcount); 3700 return 0; 3701 |
3855} 3856 3857/* Release an inode. */ 3858void 3859xfs_irele( 3860 struct xfs_inode *ip) 3861{ 3862 trace_xfs_irele(ip, _RET_IP_); --- 13 unchanged lines hidden (view full) --- 3876 if (xfs_ipincount(ip)) 3877 lsn = ip->i_itemp->ili_last_lsn; 3878 xfs_iunlock(ip, XFS_ILOCK_SHARED); 3879 3880 if (!lsn) 3881 return 0; 3882 return xfs_log_force_lsn(ip->i_mount, lsn, XFS_LOG_SYNC, NULL); 3883} | 3702} 3703 3704/* Release an inode. */ 3705void 3706xfs_irele( 3707 struct xfs_inode *ip) 3708{ 3709 trace_xfs_irele(ip, _RET_IP_); --- 13 unchanged lines hidden (view full) --- 3723 if (xfs_ipincount(ip)) 3724 lsn = ip->i_itemp->ili_last_lsn; 3725 xfs_iunlock(ip, XFS_ILOCK_SHARED); 3726 3727 if (!lsn) 3728 return 0; 3729 return xfs_log_force_lsn(ip->i_mount, lsn, XFS_LOG_SYNC, NULL); 3730} |
3731 3732/* 3733 * Grab the exclusive iolock for a data copy from src to dest, making sure to 3734 * abide vfs locking order (lowest pointer value goes first) and breaking the 3735 * layout leases before proceeding. The loop is needed because we cannot call 3736 * the blocking break_layout() with the iolocks held, and therefore have to 3737 * back out both locks. 3738 */ 3739static int 3740xfs_iolock_two_inodes_and_break_layout( 3741 struct inode *src, 3742 struct inode *dest) 3743{ 3744 int error; 3745 3746 if (src > dest) 3747 swap(src, dest); 3748 3749retry: 3750 /* Wait to break both inodes' layouts before we start locking. */ 3751 error = break_layout(src, true); 3752 if (error) 3753 return error; 3754 if (src != dest) { 3755 error = break_layout(dest, true); 3756 if (error) 3757 return error; 3758 } 3759 3760 /* Lock one inode and make sure nobody got in and leased it. */ 3761 inode_lock(src); 3762 error = break_layout(src, false); 3763 if (error) { 3764 inode_unlock(src); 3765 if (error == -EWOULDBLOCK) 3766 goto retry; 3767 return error; 3768 } 3769 3770 if (src == dest) 3771 return 0; 3772 3773 /* Lock the other inode and make sure nobody got in and leased it. */ 3774 inode_lock_nested(dest, I_MUTEX_NONDIR2); 3775 error = break_layout(dest, false); 3776 if (error) { 3777 inode_unlock(src); 3778 inode_unlock(dest); 3779 if (error == -EWOULDBLOCK) 3780 goto retry; 3781 return error; 3782 } 3783 3784 return 0; 3785} 3786 3787/* 3788 * Lock two inodes so that userspace cannot initiate I/O via file syscalls or 3789 * mmap activity. 3790 */ 3791int 3792xfs_ilock2_io_mmap( 3793 struct xfs_inode *ip1, 3794 struct xfs_inode *ip2) 3795{ 3796 int ret; 3797 3798 ret = xfs_iolock_two_inodes_and_break_layout(VFS_I(ip1), VFS_I(ip2)); 3799 if (ret) 3800 return ret; 3801 if (ip1 == ip2) 3802 xfs_ilock(ip1, XFS_MMAPLOCK_EXCL); 3803 else 3804 xfs_lock_two_inodes(ip1, XFS_MMAPLOCK_EXCL, 3805 ip2, XFS_MMAPLOCK_EXCL); 3806 return 0; 3807} 3808 3809/* Unlock both inodes to allow IO and mmap activity. */ 3810void 3811xfs_iunlock2_io_mmap( 3812 struct xfs_inode *ip1, 3813 struct xfs_inode *ip2) 3814{ 3815 bool same_inode = (ip1 == ip2); 3816 3817 xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL); 3818 if (!same_inode) 3819 xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL); 3820 inode_unlock(VFS_I(ip2)); 3821 if (!same_inode) 3822 inode_unlock(VFS_I(ip1)); 3823} |
|