12a82b8beSDavid Chinner /* 22a82b8beSDavid Chinner * Copyright (c) 2006-2007 Silicon Graphics, Inc. 32a82b8beSDavid Chinner * All Rights Reserved. 42a82b8beSDavid Chinner * 52a82b8beSDavid Chinner * This program is free software; you can redistribute it and/or 62a82b8beSDavid Chinner * modify it under the terms of the GNU General Public License as 72a82b8beSDavid Chinner * published by the Free Software Foundation. 82a82b8beSDavid Chinner * 92a82b8beSDavid Chinner * This program is distributed in the hope that it would be useful, 102a82b8beSDavid Chinner * but WITHOUT ANY WARRANTY; without even the implied warranty of 112a82b8beSDavid Chinner * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 122a82b8beSDavid Chinner * GNU General Public License for more details. 132a82b8beSDavid Chinner * 142a82b8beSDavid Chinner * You should have received a copy of the GNU General Public License 152a82b8beSDavid Chinner * along with this program; if not, write the Free Software Foundation, 162a82b8beSDavid Chinner * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 172a82b8beSDavid Chinner */ 182a82b8beSDavid Chinner #include "xfs.h" 192a82b8beSDavid Chinner #include "xfs_bmap_btree.h" 202a82b8beSDavid Chinner #include "xfs_inum.h" 212a82b8beSDavid Chinner #include "xfs_dinode.h" 222a82b8beSDavid Chinner #include "xfs_inode.h" 232a82b8beSDavid Chinner #include "xfs_ag.h" 242a82b8beSDavid Chinner #include "xfs_log.h" 252a82b8beSDavid Chinner #include "xfs_trans.h" 262a82b8beSDavid Chinner #include "xfs_sb.h" 272a82b8beSDavid Chinner #include "xfs_mount.h" 282a82b8beSDavid Chinner #include "xfs_bmap.h" 292a82b8beSDavid Chinner #include "xfs_alloc.h" 302a82b8beSDavid Chinner #include "xfs_utils.h" 312a82b8beSDavid Chinner #include "xfs_mru_cache.h" 322a82b8beSDavid Chinner #include "xfs_filestream.h" 330b1b213fSChristoph Hellwig #include "xfs_trace.h" 342a82b8beSDavid Chinner 352a82b8beSDavid Chinner #ifdef XFS_FILESTREAMS_TRACE 362a82b8beSDavid Chinner 372a82b8beSDavid Chinner ktrace_t *xfs_filestreams_trace_buf; 382a82b8beSDavid Chinner 392a82b8beSDavid Chinner STATIC void 402a82b8beSDavid Chinner xfs_filestreams_trace( 412a82b8beSDavid Chinner xfs_mount_t *mp, /* mount point */ 422a82b8beSDavid Chinner int type, /* type of trace */ 432a82b8beSDavid Chinner const char *func, /* source function */ 442a82b8beSDavid Chinner int line, /* source line number */ 452a82b8beSDavid Chinner __psunsigned_t arg0, 462a82b8beSDavid Chinner __psunsigned_t arg1, 472a82b8beSDavid Chinner __psunsigned_t arg2, 482a82b8beSDavid Chinner __psunsigned_t arg3, 492a82b8beSDavid Chinner __psunsigned_t arg4, 502a82b8beSDavid Chinner __psunsigned_t arg5) 512a82b8beSDavid Chinner { 522a82b8beSDavid Chinner ktrace_enter(xfs_filestreams_trace_buf, 532a82b8beSDavid Chinner (void *)(__psint_t)(type | (line << 16)), 542a82b8beSDavid Chinner (void *)func, 552a82b8beSDavid Chinner (void *)(__psunsigned_t)current_pid(), 562a82b8beSDavid Chinner (void *)mp, 572a82b8beSDavid Chinner (void *)(__psunsigned_t)arg0, 582a82b8beSDavid Chinner (void *)(__psunsigned_t)arg1, 592a82b8beSDavid Chinner (void *)(__psunsigned_t)arg2, 602a82b8beSDavid Chinner (void *)(__psunsigned_t)arg3, 612a82b8beSDavid Chinner (void *)(__psunsigned_t)arg4, 622a82b8beSDavid Chinner (void *)(__psunsigned_t)arg5, 632a82b8beSDavid Chinner NULL, NULL, NULL, NULL, NULL, NULL); 642a82b8beSDavid Chinner } 652a82b8beSDavid Chinner 662a82b8beSDavid Chinner #define TRACE0(mp,t) TRACE6(mp,t,0,0,0,0,0,0) 672a82b8beSDavid Chinner #define TRACE1(mp,t,a0) TRACE6(mp,t,a0,0,0,0,0,0) 682a82b8beSDavid Chinner #define TRACE2(mp,t,a0,a1) TRACE6(mp,t,a0,a1,0,0,0,0) 692a82b8beSDavid Chinner #define TRACE3(mp,t,a0,a1,a2) TRACE6(mp,t,a0,a1,a2,0,0,0) 702a82b8beSDavid Chinner #define TRACE4(mp,t,a0,a1,a2,a3) TRACE6(mp,t,a0,a1,a2,a3,0,0) 712a82b8beSDavid Chinner #define TRACE5(mp,t,a0,a1,a2,a3,a4) TRACE6(mp,t,a0,a1,a2,a3,a4,0) 722a82b8beSDavid Chinner #define TRACE6(mp,t,a0,a1,a2,a3,a4,a5) \ 7334a622b2SHarvey Harrison xfs_filestreams_trace(mp, t, __func__, __LINE__, \ 742a82b8beSDavid Chinner (__psunsigned_t)a0, (__psunsigned_t)a1, \ 752a82b8beSDavid Chinner (__psunsigned_t)a2, (__psunsigned_t)a3, \ 762a82b8beSDavid Chinner (__psunsigned_t)a4, (__psunsigned_t)a5) 772a82b8beSDavid Chinner 782a82b8beSDavid Chinner #define TRACE_AG_SCAN(mp, ag, ag2) \ 792a82b8beSDavid Chinner TRACE2(mp, XFS_FSTRM_KTRACE_AGSCAN, ag, ag2); 802a82b8beSDavid Chinner #define TRACE_AG_PICK1(mp, max_ag, maxfree) \ 812a82b8beSDavid Chinner TRACE2(mp, XFS_FSTRM_KTRACE_AGPICK1, max_ag, maxfree); 822a82b8beSDavid Chinner #define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag) \ 832a82b8beSDavid Chinner TRACE6(mp, XFS_FSTRM_KTRACE_AGPICK2, ag, ag2, \ 842a82b8beSDavid Chinner cnt, free, scan, flag) 852a82b8beSDavid Chinner #define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2) \ 862a82b8beSDavid Chinner TRACE5(mp, XFS_FSTRM_KTRACE_UPDATE, ip, ag, cnt, ag2, cnt2) 872a82b8beSDavid Chinner #define TRACE_FREE(mp, ip, pip, ag, cnt) \ 882a82b8beSDavid Chinner TRACE4(mp, XFS_FSTRM_KTRACE_FREE, ip, pip, ag, cnt) 892a82b8beSDavid Chinner #define TRACE_LOOKUP(mp, ip, pip, ag, cnt) \ 902a82b8beSDavid Chinner TRACE4(mp, XFS_FSTRM_KTRACE_ITEM_LOOKUP, ip, pip, ag, cnt) 912a82b8beSDavid Chinner #define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt) \ 922a82b8beSDavid Chinner TRACE4(mp, XFS_FSTRM_KTRACE_ASSOCIATE, ip, pip, ag, cnt) 932a82b8beSDavid Chinner #define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt) \ 942a82b8beSDavid Chinner TRACE6(mp, XFS_FSTRM_KTRACE_MOVEAG, ip, pip, oag, ocnt, nag, ncnt) 952a82b8beSDavid Chinner #define TRACE_ORPHAN(mp, ip, ag) \ 962a82b8beSDavid Chinner TRACE2(mp, XFS_FSTRM_KTRACE_ORPHAN, ip, ag); 972a82b8beSDavid Chinner 982a82b8beSDavid Chinner 992a82b8beSDavid Chinner #else 1002a82b8beSDavid Chinner #define TRACE_AG_SCAN(mp, ag, ag2) 1012a82b8beSDavid Chinner #define TRACE_AG_PICK1(mp, max_ag, maxfree) 1022a82b8beSDavid Chinner #define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag) 1032a82b8beSDavid Chinner #define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2) 1042a82b8beSDavid Chinner #define TRACE_FREE(mp, ip, pip, ag, cnt) 1052a82b8beSDavid Chinner #define TRACE_LOOKUP(mp, ip, pip, ag, cnt) 1062a82b8beSDavid Chinner #define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt) 1072a82b8beSDavid Chinner #define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt) 1082a82b8beSDavid Chinner #define TRACE_ORPHAN(mp, ip, ag) 1092a82b8beSDavid Chinner #endif 1102a82b8beSDavid Chinner 1112a82b8beSDavid Chinner static kmem_zone_t *item_zone; 1122a82b8beSDavid Chinner 1132a82b8beSDavid Chinner /* 1142a82b8beSDavid Chinner * Structure for associating a file or a directory with an allocation group. 1152a82b8beSDavid Chinner * The parent directory pointer is only needed for files, but since there will 1162a82b8beSDavid Chinner * generally be vastly more files than directories in the cache, using the same 1172a82b8beSDavid Chinner * data structure simplifies the code with very little memory overhead. 1182a82b8beSDavid Chinner */ 1192a82b8beSDavid Chinner typedef struct fstrm_item 1202a82b8beSDavid Chinner { 1212a82b8beSDavid Chinner xfs_agnumber_t ag; /* AG currently in use for the file/directory. */ 1222a82b8beSDavid Chinner xfs_inode_t *ip; /* inode self-pointer. */ 1232a82b8beSDavid Chinner xfs_inode_t *pip; /* Parent directory inode pointer. */ 1242a82b8beSDavid Chinner } fstrm_item_t; 1252a82b8beSDavid Chinner 1260664ce8dSChristoph Hellwig /* 1270664ce8dSChristoph Hellwig * Allocation group filestream associations are tracked with per-ag atomic 1280664ce8dSChristoph Hellwig * counters. These counters allow _xfs_filestream_pick_ag() to tell whether a 1290664ce8dSChristoph Hellwig * particular AG already has active filestreams associated with it. The mount 1300664ce8dSChristoph Hellwig * point's m_peraglock is used to protect these counters from per-ag array 1310664ce8dSChristoph Hellwig * re-allocation during a growfs operation. When xfs_growfs_data_private() is 1320664ce8dSChristoph Hellwig * about to reallocate the array, it calls xfs_filestream_flush() with the 1330664ce8dSChristoph Hellwig * m_peraglock held in write mode. 1340664ce8dSChristoph Hellwig * 1350664ce8dSChristoph Hellwig * Since xfs_mru_cache_flush() guarantees that all the free functions for all 1360664ce8dSChristoph Hellwig * the cache elements have finished executing before it returns, it's safe for 1370664ce8dSChristoph Hellwig * the free functions to use the atomic counters without m_peraglock protection. 1380664ce8dSChristoph Hellwig * This allows the implementation of xfs_fstrm_free_func() to be agnostic about 1390664ce8dSChristoph Hellwig * whether it was called with the m_peraglock held in read mode, write mode or 1400664ce8dSChristoph Hellwig * not held at all. The race condition this addresses is the following: 1410664ce8dSChristoph Hellwig * 1420664ce8dSChristoph Hellwig * - The work queue scheduler fires and pulls a filestream directory cache 1430664ce8dSChristoph Hellwig * element off the LRU end of the cache for deletion, then gets pre-empted. 1440664ce8dSChristoph Hellwig * - A growfs operation grabs the m_peraglock in write mode, flushes all the 1450664ce8dSChristoph Hellwig * remaining items from the cache and reallocates the mount point's per-ag 1460664ce8dSChristoph Hellwig * array, resetting all the counters to zero. 1470664ce8dSChristoph Hellwig * - The work queue thread resumes and calls the free function for the element 1480664ce8dSChristoph Hellwig * it started cleaning up earlier. In the process it decrements the 1490664ce8dSChristoph Hellwig * filestreams counter for an AG that now has no references. 1500664ce8dSChristoph Hellwig * 1510664ce8dSChristoph Hellwig * With a shrinkfs feature, the above scenario could panic the system. 1520664ce8dSChristoph Hellwig * 1530664ce8dSChristoph Hellwig * All other uses of the following macros should be protected by either the 1540664ce8dSChristoph Hellwig * m_peraglock held in read mode, or the cache's internal locking exposed by the 1550664ce8dSChristoph Hellwig * interval between a call to xfs_mru_cache_lookup() and a call to 1560664ce8dSChristoph Hellwig * xfs_mru_cache_done(). In addition, the m_peraglock must be held in read mode 1570664ce8dSChristoph Hellwig * when new elements are added to the cache. 1580664ce8dSChristoph Hellwig * 1590664ce8dSChristoph Hellwig * Combined, these locking rules ensure that no associations will ever exist in 1600664ce8dSChristoph Hellwig * the cache that reference per-ag array elements that have since been 1610664ce8dSChristoph Hellwig * reallocated. 1620664ce8dSChristoph Hellwig */ 1630664ce8dSChristoph Hellwig static int 1640664ce8dSChristoph Hellwig xfs_filestream_peek_ag( 1650664ce8dSChristoph Hellwig xfs_mount_t *mp, 1660664ce8dSChristoph Hellwig xfs_agnumber_t agno) 1670664ce8dSChristoph Hellwig { 1680664ce8dSChristoph Hellwig struct xfs_perag *pag; 1690664ce8dSChristoph Hellwig int ret; 1700664ce8dSChristoph Hellwig 1710664ce8dSChristoph Hellwig pag = xfs_perag_get(mp, agno); 1720664ce8dSChristoph Hellwig ret = atomic_read(&pag->pagf_fstrms); 1730664ce8dSChristoph Hellwig xfs_perag_put(pag); 1740664ce8dSChristoph Hellwig return ret; 1750664ce8dSChristoph Hellwig } 1760664ce8dSChristoph Hellwig 1770664ce8dSChristoph Hellwig static int 1780664ce8dSChristoph Hellwig xfs_filestream_get_ag( 1790664ce8dSChristoph Hellwig xfs_mount_t *mp, 1800664ce8dSChristoph Hellwig xfs_agnumber_t agno) 1810664ce8dSChristoph Hellwig { 1820664ce8dSChristoph Hellwig struct xfs_perag *pag; 1830664ce8dSChristoph Hellwig int ret; 1840664ce8dSChristoph Hellwig 1850664ce8dSChristoph Hellwig pag = xfs_perag_get(mp, agno); 1860664ce8dSChristoph Hellwig ret = atomic_inc_return(&pag->pagf_fstrms); 1870664ce8dSChristoph Hellwig xfs_perag_put(pag); 1880664ce8dSChristoph Hellwig return ret; 1890664ce8dSChristoph Hellwig } 1900664ce8dSChristoph Hellwig 1910664ce8dSChristoph Hellwig static void 1920664ce8dSChristoph Hellwig xfs_filestream_put_ag( 1930664ce8dSChristoph Hellwig xfs_mount_t *mp, 1940664ce8dSChristoph Hellwig xfs_agnumber_t agno) 1950664ce8dSChristoph Hellwig { 1960664ce8dSChristoph Hellwig struct xfs_perag *pag; 1970664ce8dSChristoph Hellwig 1980664ce8dSChristoph Hellwig pag = xfs_perag_get(mp, agno); 1990664ce8dSChristoph Hellwig atomic_dec(&pag->pagf_fstrms); 2000664ce8dSChristoph Hellwig xfs_perag_put(pag); 2010664ce8dSChristoph Hellwig } 2022a82b8beSDavid Chinner 2032a82b8beSDavid Chinner /* 2042a82b8beSDavid Chinner * Scan the AGs starting at startag looking for an AG that isn't in use and has 2052a82b8beSDavid Chinner * at least minlen blocks free. 2062a82b8beSDavid Chinner */ 2072a82b8beSDavid Chinner static int 2082a82b8beSDavid Chinner _xfs_filestream_pick_ag( 2092a82b8beSDavid Chinner xfs_mount_t *mp, 2102a82b8beSDavid Chinner xfs_agnumber_t startag, 2112a82b8beSDavid Chinner xfs_agnumber_t *agp, 2122a82b8beSDavid Chinner int flags, 2132a82b8beSDavid Chinner xfs_extlen_t minlen) 2142a82b8beSDavid Chinner { 2154196ac08SDave Chinner int streams, max_streams; 2162a82b8beSDavid Chinner int err, trylock, nscan; 2176cc87645SDave Chinner xfs_extlen_t longest, free, minfree, maxfree = 0; 2182a82b8beSDavid Chinner xfs_agnumber_t ag, max_ag = NULLAGNUMBER; 2192a82b8beSDavid Chinner struct xfs_perag *pag; 2202a82b8beSDavid Chinner 2212a82b8beSDavid Chinner /* 2% of an AG's blocks must be free for it to be chosen. */ 2222a82b8beSDavid Chinner minfree = mp->m_sb.sb_agblocks / 50; 2232a82b8beSDavid Chinner 2242a82b8beSDavid Chinner ag = startag; 2252a82b8beSDavid Chinner *agp = NULLAGNUMBER; 2262a82b8beSDavid Chinner 2272a82b8beSDavid Chinner /* For the first pass, don't sleep trying to init the per-AG. */ 2282a82b8beSDavid Chinner trylock = XFS_ALLOC_FLAG_TRYLOCK; 2292a82b8beSDavid Chinner 2302a82b8beSDavid Chinner for (nscan = 0; 1; nscan++) { 2314196ac08SDave Chinner pag = xfs_perag_get(mp, ag); 2324196ac08SDave Chinner TRACE_AG_SCAN(mp, ag, atomic_read(&pag->pagf_fstrms)); 2332a82b8beSDavid Chinner 2342a82b8beSDavid Chinner if (!pag->pagf_init) { 2352a82b8beSDavid Chinner err = xfs_alloc_pagf_init(mp, NULL, ag, trylock); 2364196ac08SDave Chinner if (err && !trylock) { 2374196ac08SDave Chinner xfs_perag_put(pag); 2382a82b8beSDavid Chinner return err; 2392a82b8beSDavid Chinner } 2404196ac08SDave Chinner } 2412a82b8beSDavid Chinner 2422a82b8beSDavid Chinner /* Might fail sometimes during the 1st pass with trylock set. */ 2432a82b8beSDavid Chinner if (!pag->pagf_init) 2442a82b8beSDavid Chinner goto next_ag; 2452a82b8beSDavid Chinner 2462a82b8beSDavid Chinner /* Keep track of the AG with the most free blocks. */ 2472a82b8beSDavid Chinner if (pag->pagf_freeblks > maxfree) { 2482a82b8beSDavid Chinner maxfree = pag->pagf_freeblks; 2494196ac08SDave Chinner max_streams = atomic_read(&pag->pagf_fstrms); 2502a82b8beSDavid Chinner max_ag = ag; 2512a82b8beSDavid Chinner } 2522a82b8beSDavid Chinner 2532a82b8beSDavid Chinner /* 2542a82b8beSDavid Chinner * The AG reference count does two things: it enforces mutual 2552a82b8beSDavid Chinner * exclusion when examining the suitability of an AG in this 2562a82b8beSDavid Chinner * loop, and it guards against two filestreams being established 2572a82b8beSDavid Chinner * in the same AG as each other. 2582a82b8beSDavid Chinner */ 2592a82b8beSDavid Chinner if (xfs_filestream_get_ag(mp, ag) > 1) { 2602a82b8beSDavid Chinner xfs_filestream_put_ag(mp, ag); 2612a82b8beSDavid Chinner goto next_ag; 2622a82b8beSDavid Chinner } 2632a82b8beSDavid Chinner 2646cc87645SDave Chinner longest = xfs_alloc_longest_free_extent(mp, pag); 2652a82b8beSDavid Chinner if (((minlen && longest >= minlen) || 2662a82b8beSDavid Chinner (!minlen && pag->pagf_freeblks >= minfree)) && 2672a82b8beSDavid Chinner (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) || 2682a82b8beSDavid Chinner (flags & XFS_PICK_LOWSPACE))) { 2692a82b8beSDavid Chinner 2702a82b8beSDavid Chinner /* Break out, retaining the reference on the AG. */ 2712a82b8beSDavid Chinner free = pag->pagf_freeblks; 2724196ac08SDave Chinner streams = atomic_read(&pag->pagf_fstrms); 2734196ac08SDave Chinner xfs_perag_put(pag); 2742a82b8beSDavid Chinner *agp = ag; 2752a82b8beSDavid Chinner break; 2762a82b8beSDavid Chinner } 2772a82b8beSDavid Chinner 2782a82b8beSDavid Chinner /* Drop the reference on this AG, it's not usable. */ 2792a82b8beSDavid Chinner xfs_filestream_put_ag(mp, ag); 2802a82b8beSDavid Chinner next_ag: 2814196ac08SDave Chinner xfs_perag_put(pag); 2822a82b8beSDavid Chinner /* Move to the next AG, wrapping to AG 0 if necessary. */ 2832a82b8beSDavid Chinner if (++ag >= mp->m_sb.sb_agcount) 2842a82b8beSDavid Chinner ag = 0; 2852a82b8beSDavid Chinner 2862a82b8beSDavid Chinner /* If a full pass of the AGs hasn't been done yet, continue. */ 2872a82b8beSDavid Chinner if (ag != startag) 2882a82b8beSDavid Chinner continue; 2892a82b8beSDavid Chinner 2902a82b8beSDavid Chinner /* Allow sleeping in xfs_alloc_pagf_init() on the 2nd pass. */ 2912a82b8beSDavid Chinner if (trylock != 0) { 2922a82b8beSDavid Chinner trylock = 0; 2932a82b8beSDavid Chinner continue; 2942a82b8beSDavid Chinner } 2952a82b8beSDavid Chinner 2962a82b8beSDavid Chinner /* Finally, if lowspace wasn't set, set it for the 3rd pass. */ 2972a82b8beSDavid Chinner if (!(flags & XFS_PICK_LOWSPACE)) { 2982a82b8beSDavid Chinner flags |= XFS_PICK_LOWSPACE; 2992a82b8beSDavid Chinner continue; 3002a82b8beSDavid Chinner } 3012a82b8beSDavid Chinner 3022a82b8beSDavid Chinner /* 3032a82b8beSDavid Chinner * Take the AG with the most free space, regardless of whether 3042a82b8beSDavid Chinner * it's already in use by another filestream. 3052a82b8beSDavid Chinner */ 3062a82b8beSDavid Chinner if (max_ag != NULLAGNUMBER) { 3072a82b8beSDavid Chinner xfs_filestream_get_ag(mp, max_ag); 3082a82b8beSDavid Chinner TRACE_AG_PICK1(mp, max_ag, maxfree); 3094196ac08SDave Chinner streams = max_streams; 3102a82b8beSDavid Chinner free = maxfree; 3112a82b8beSDavid Chinner *agp = max_ag; 3122a82b8beSDavid Chinner break; 3132a82b8beSDavid Chinner } 3142a82b8beSDavid Chinner 3152a82b8beSDavid Chinner /* take AG 0 if none matched */ 3162a82b8beSDavid Chinner TRACE_AG_PICK1(mp, max_ag, maxfree); 3172a82b8beSDavid Chinner *agp = 0; 3182a82b8beSDavid Chinner return 0; 3192a82b8beSDavid Chinner } 3202a82b8beSDavid Chinner 3214196ac08SDave Chinner TRACE_AG_PICK2(mp, startag, *agp, streams, free, nscan, flags); 3222a82b8beSDavid Chinner 3232a82b8beSDavid Chinner return 0; 3242a82b8beSDavid Chinner } 3252a82b8beSDavid Chinner 3262a82b8beSDavid Chinner /* 3272a82b8beSDavid Chinner * Set the allocation group number for a file or a directory, updating inode 3281c1c6ebcSDave Chinner * references and per-AG references as appropriate. 3292a82b8beSDavid Chinner */ 3302a82b8beSDavid Chinner static int 3312a82b8beSDavid Chinner _xfs_filestream_update_ag( 3322a82b8beSDavid Chinner xfs_inode_t *ip, 3332a82b8beSDavid Chinner xfs_inode_t *pip, 3342a82b8beSDavid Chinner xfs_agnumber_t ag) 3352a82b8beSDavid Chinner { 3362a82b8beSDavid Chinner int err = 0; 3372a82b8beSDavid Chinner xfs_mount_t *mp; 3382a82b8beSDavid Chinner xfs_mru_cache_t *cache; 3392a82b8beSDavid Chinner fstrm_item_t *item; 3402a82b8beSDavid Chinner xfs_agnumber_t old_ag; 3412a82b8beSDavid Chinner xfs_inode_t *old_pip; 3422a82b8beSDavid Chinner 3432a82b8beSDavid Chinner /* 3442a82b8beSDavid Chinner * Either ip is a regular file and pip is a directory, or ip is a 3452a82b8beSDavid Chinner * directory and pip is NULL. 3462a82b8beSDavid Chinner */ 347abbede1bSAl Viro ASSERT(ip && ((S_ISREG(ip->i_d.di_mode) && pip && 34803209378SAl Viro S_ISDIR(pip->i_d.di_mode)) || 34903209378SAl Viro (S_ISDIR(ip->i_d.di_mode) && !pip))); 3502a82b8beSDavid Chinner 3512a82b8beSDavid Chinner mp = ip->i_mount; 3522a82b8beSDavid Chinner cache = mp->m_filestream; 3532a82b8beSDavid Chinner 3542a82b8beSDavid Chinner item = xfs_mru_cache_lookup(cache, ip->i_ino); 3552a82b8beSDavid Chinner if (item) { 3562a82b8beSDavid Chinner ASSERT(item->ip == ip); 3572a82b8beSDavid Chinner old_ag = item->ag; 3582a82b8beSDavid Chinner item->ag = ag; 3592a82b8beSDavid Chinner old_pip = item->pip; 3602a82b8beSDavid Chinner item->pip = pip; 3612a82b8beSDavid Chinner xfs_mru_cache_done(cache); 3622a82b8beSDavid Chinner 3632a82b8beSDavid Chinner /* 3642a82b8beSDavid Chinner * If the AG has changed, drop the old ref and take a new one, 3652a82b8beSDavid Chinner * effectively transferring the reference from old to new AG. 3662a82b8beSDavid Chinner */ 3672a82b8beSDavid Chinner if (ag != old_ag) { 3682a82b8beSDavid Chinner xfs_filestream_put_ag(mp, old_ag); 3692a82b8beSDavid Chinner xfs_filestream_get_ag(mp, ag); 3702a82b8beSDavid Chinner } 3712a82b8beSDavid Chinner 3722a82b8beSDavid Chinner /* 3732a82b8beSDavid Chinner * If ip is a file and its pip has changed, drop the old ref and 3742a82b8beSDavid Chinner * take a new one. 3752a82b8beSDavid Chinner */ 3762a82b8beSDavid Chinner if (pip && pip != old_pip) { 3772a82b8beSDavid Chinner IRELE(old_pip); 3782a82b8beSDavid Chinner IHOLD(pip); 3792a82b8beSDavid Chinner } 3802a82b8beSDavid Chinner 3812a82b8beSDavid Chinner TRACE_UPDATE(mp, ip, old_ag, xfs_filestream_peek_ag(mp, old_ag), 3822a82b8beSDavid Chinner ag, xfs_filestream_peek_ag(mp, ag)); 3832a82b8beSDavid Chinner return 0; 3842a82b8beSDavid Chinner } 3852a82b8beSDavid Chinner 3862a82b8beSDavid Chinner item = kmem_zone_zalloc(item_zone, KM_MAYFAIL); 3872a82b8beSDavid Chinner if (!item) 3882a82b8beSDavid Chinner return ENOMEM; 3892a82b8beSDavid Chinner 3902a82b8beSDavid Chinner item->ag = ag; 3912a82b8beSDavid Chinner item->ip = ip; 3922a82b8beSDavid Chinner item->pip = pip; 3932a82b8beSDavid Chinner 3942a82b8beSDavid Chinner err = xfs_mru_cache_insert(cache, ip->i_ino, item); 3952a82b8beSDavid Chinner if (err) { 3962a82b8beSDavid Chinner kmem_zone_free(item_zone, item); 3972a82b8beSDavid Chinner return err; 3982a82b8beSDavid Chinner } 3992a82b8beSDavid Chinner 4002a82b8beSDavid Chinner /* Take a reference on the AG. */ 4012a82b8beSDavid Chinner xfs_filestream_get_ag(mp, ag); 4022a82b8beSDavid Chinner 4032a82b8beSDavid Chinner /* 4042a82b8beSDavid Chinner * Take a reference on the inode itself regardless of whether it's a 4052a82b8beSDavid Chinner * regular file or a directory. 4062a82b8beSDavid Chinner */ 4072a82b8beSDavid Chinner IHOLD(ip); 4082a82b8beSDavid Chinner 4092a82b8beSDavid Chinner /* 4102a82b8beSDavid Chinner * In the case of a regular file, take a reference on the parent inode 4112a82b8beSDavid Chinner * as well to ensure it remains in-core. 4122a82b8beSDavid Chinner */ 4132a82b8beSDavid Chinner if (pip) 4142a82b8beSDavid Chinner IHOLD(pip); 4152a82b8beSDavid Chinner 4162a82b8beSDavid Chinner TRACE_UPDATE(mp, ip, ag, xfs_filestream_peek_ag(mp, ag), 4172a82b8beSDavid Chinner ag, xfs_filestream_peek_ag(mp, ag)); 4182a82b8beSDavid Chinner 4192a82b8beSDavid Chinner return 0; 4202a82b8beSDavid Chinner } 4212a82b8beSDavid Chinner 4222a82b8beSDavid Chinner /* xfs_fstrm_free_func(): callback for freeing cached stream items. */ 423a8272ce0SDavid Chinner STATIC void 4242a82b8beSDavid Chinner xfs_fstrm_free_func( 425bcc7b445SEric Sandeen unsigned long ino, 426bcc7b445SEric Sandeen void *data) 4272a82b8beSDavid Chinner { 428bcc7b445SEric Sandeen fstrm_item_t *item = (fstrm_item_t *)data; 4292a82b8beSDavid Chinner xfs_inode_t *ip = item->ip; 4302a82b8beSDavid Chinner 4312a82b8beSDavid Chinner ASSERT(ip->i_ino == ino); 4322a82b8beSDavid Chinner 4332a82b8beSDavid Chinner xfs_iflags_clear(ip, XFS_IFILESTREAM); 4342a82b8beSDavid Chinner 4352a82b8beSDavid Chinner /* Drop the reference taken on the AG when the item was added. */ 4360664ce8dSChristoph Hellwig xfs_filestream_put_ag(ip->i_mount, item->ag); 4372a82b8beSDavid Chinner 4382a82b8beSDavid Chinner TRACE_FREE(ip->i_mount, ip, item->pip, item->ag, 4392a82b8beSDavid Chinner xfs_filestream_peek_ag(ip->i_mount, item->ag)); 4402a82b8beSDavid Chinner 4412a82b8beSDavid Chinner /* 4422a82b8beSDavid Chinner * _xfs_filestream_update_ag() always takes a reference on the inode 4432a82b8beSDavid Chinner * itself, whether it's a file or a directory. Release it here. 4442a82b8beSDavid Chinner * This can result in the inode being freed and so we must 4452a82b8beSDavid Chinner * not hold any inode locks when freeing filesstreams objects 4462a82b8beSDavid Chinner * otherwise we can deadlock here. 4472a82b8beSDavid Chinner */ 4482a82b8beSDavid Chinner IRELE(ip); 4492a82b8beSDavid Chinner 4502a82b8beSDavid Chinner /* 4512a82b8beSDavid Chinner * In the case of a regular file, _xfs_filestream_update_ag() also 4522a82b8beSDavid Chinner * takes a ref on the parent inode to keep it in-core. Release that 4532a82b8beSDavid Chinner * too. 4542a82b8beSDavid Chinner */ 4552a82b8beSDavid Chinner if (item->pip) 4562a82b8beSDavid Chinner IRELE(item->pip); 4572a82b8beSDavid Chinner 4582a82b8beSDavid Chinner /* Finally, free the memory allocated for the item. */ 4592a82b8beSDavid Chinner kmem_zone_free(item_zone, item); 4602a82b8beSDavid Chinner } 4612a82b8beSDavid Chinner 4622a82b8beSDavid Chinner /* 4632a82b8beSDavid Chinner * xfs_filestream_init() is called at xfs initialisation time to set up the 4642a82b8beSDavid Chinner * memory zone that will be used for filestream data structure allocation. 4652a82b8beSDavid Chinner */ 4662a82b8beSDavid Chinner int 4672a82b8beSDavid Chinner xfs_filestream_init(void) 4682a82b8beSDavid Chinner { 4692a82b8beSDavid Chinner item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item"); 4709f8868ffSChristoph Hellwig if (!item_zone) 4719f8868ffSChristoph Hellwig return -ENOMEM; 4720b1b213fSChristoph Hellwig 4739f8868ffSChristoph Hellwig return 0; 4742a82b8beSDavid Chinner } 4752a82b8beSDavid Chinner 4762a82b8beSDavid Chinner /* 4772a82b8beSDavid Chinner * xfs_filestream_uninit() is called at xfs termination time to destroy the 4782a82b8beSDavid Chinner * memory zone that was used for filestream data structure allocation. 4792a82b8beSDavid Chinner */ 4802a82b8beSDavid Chinner void 4812a82b8beSDavid Chinner xfs_filestream_uninit(void) 4822a82b8beSDavid Chinner { 4832a82b8beSDavid Chinner kmem_zone_destroy(item_zone); 4842a82b8beSDavid Chinner } 4852a82b8beSDavid Chinner 4862a82b8beSDavid Chinner /* 4872a82b8beSDavid Chinner * xfs_filestream_mount() is called when a file system is mounted with the 4882a82b8beSDavid Chinner * filestream option. It is responsible for allocating the data structures 4892a82b8beSDavid Chinner * needed to track the new file system's file streams. 4902a82b8beSDavid Chinner */ 4912a82b8beSDavid Chinner int 4922a82b8beSDavid Chinner xfs_filestream_mount( 4932a82b8beSDavid Chinner xfs_mount_t *mp) 4942a82b8beSDavid Chinner { 4952a82b8beSDavid Chinner int err; 4962a82b8beSDavid Chinner unsigned int lifetime, grp_count; 4972a82b8beSDavid Chinner 4982a82b8beSDavid Chinner /* 4992a82b8beSDavid Chinner * The filestream timer tunable is currently fixed within the range of 5002a82b8beSDavid Chinner * one second to four minutes, with five seconds being the default. The 5012a82b8beSDavid Chinner * group count is somewhat arbitrary, but it'd be nice to adhere to the 5022a82b8beSDavid Chinner * timer tunable to within about 10 percent. This requires at least 10 5032a82b8beSDavid Chinner * groups. 5042a82b8beSDavid Chinner */ 5052a82b8beSDavid Chinner lifetime = xfs_fstrm_centisecs * 10; 5062a82b8beSDavid Chinner grp_count = 10; 5072a82b8beSDavid Chinner 5082a82b8beSDavid Chinner err = xfs_mru_cache_create(&mp->m_filestream, lifetime, grp_count, 509bcc7b445SEric Sandeen xfs_fstrm_free_func); 5102a82b8beSDavid Chinner 5112a82b8beSDavid Chinner return err; 5122a82b8beSDavid Chinner } 5132a82b8beSDavid Chinner 5142a82b8beSDavid Chinner /* 5152a82b8beSDavid Chinner * xfs_filestream_unmount() is called when a file system that was mounted with 5162a82b8beSDavid Chinner * the filestream option is unmounted. It drains the data structures created 5172a82b8beSDavid Chinner * to track the file system's file streams and frees all the memory that was 5182a82b8beSDavid Chinner * allocated. 5192a82b8beSDavid Chinner */ 5202a82b8beSDavid Chinner void 5212a82b8beSDavid Chinner xfs_filestream_unmount( 5222a82b8beSDavid Chinner xfs_mount_t *mp) 5232a82b8beSDavid Chinner { 5242a82b8beSDavid Chinner xfs_mru_cache_destroy(mp->m_filestream); 5252a82b8beSDavid Chinner } 5262a82b8beSDavid Chinner 5272a82b8beSDavid Chinner /* 5282a82b8beSDavid Chinner * Return the AG of the filestream the file or directory belongs to, or 5292a82b8beSDavid Chinner * NULLAGNUMBER otherwise. 5302a82b8beSDavid Chinner */ 5312a82b8beSDavid Chinner xfs_agnumber_t 5322a82b8beSDavid Chinner xfs_filestream_lookup_ag( 5332a82b8beSDavid Chinner xfs_inode_t *ip) 5342a82b8beSDavid Chinner { 5352a82b8beSDavid Chinner xfs_mru_cache_t *cache; 5362a82b8beSDavid Chinner fstrm_item_t *item; 5372a82b8beSDavid Chinner xfs_agnumber_t ag; 5382a82b8beSDavid Chinner int ref; 5392a82b8beSDavid Chinner 54003209378SAl Viro if (!S_ISREG(ip->i_d.di_mode) && !S_ISDIR(ip->i_d.di_mode)) { 5412a82b8beSDavid Chinner ASSERT(0); 5422a82b8beSDavid Chinner return NULLAGNUMBER; 5432a82b8beSDavid Chinner } 5442a82b8beSDavid Chinner 5452a82b8beSDavid Chinner cache = ip->i_mount->m_filestream; 5462a82b8beSDavid Chinner item = xfs_mru_cache_lookup(cache, ip->i_ino); 5472a82b8beSDavid Chinner if (!item) { 5482a82b8beSDavid Chinner TRACE_LOOKUP(ip->i_mount, ip, NULL, NULLAGNUMBER, 0); 5492a82b8beSDavid Chinner return NULLAGNUMBER; 5502a82b8beSDavid Chinner } 5512a82b8beSDavid Chinner 5522a82b8beSDavid Chinner ASSERT(ip == item->ip); 5532a82b8beSDavid Chinner ag = item->ag; 5542a82b8beSDavid Chinner ref = xfs_filestream_peek_ag(ip->i_mount, ag); 5552a82b8beSDavid Chinner xfs_mru_cache_done(cache); 5562a82b8beSDavid Chinner 5572a82b8beSDavid Chinner TRACE_LOOKUP(ip->i_mount, ip, item->pip, ag, ref); 5582a82b8beSDavid Chinner return ag; 5592a82b8beSDavid Chinner } 5602a82b8beSDavid Chinner 5612a82b8beSDavid Chinner /* 5622a82b8beSDavid Chinner * xfs_filestream_associate() should only be called to associate a regular file 5632a82b8beSDavid Chinner * with its parent directory. Calling it with a child directory isn't 5642a82b8beSDavid Chinner * appropriate because filestreams don't apply to entire directory hierarchies. 5652a82b8beSDavid Chinner * Creating a file in a child directory of an existing filestream directory 5662a82b8beSDavid Chinner * starts a new filestream with its own allocation group association. 5672a82b8beSDavid Chinner * 5682a82b8beSDavid Chinner * Returns < 0 on error, 0 if successful association occurred, > 0 if 5692a82b8beSDavid Chinner * we failed to get an association because of locking issues. 5702a82b8beSDavid Chinner */ 5712a82b8beSDavid Chinner int 5722a82b8beSDavid Chinner xfs_filestream_associate( 5732a82b8beSDavid Chinner xfs_inode_t *pip, 5742a82b8beSDavid Chinner xfs_inode_t *ip) 5752a82b8beSDavid Chinner { 5762a82b8beSDavid Chinner xfs_mount_t *mp; 5772a82b8beSDavid Chinner xfs_mru_cache_t *cache; 5782a82b8beSDavid Chinner fstrm_item_t *item; 5792a82b8beSDavid Chinner xfs_agnumber_t ag, rotorstep, startag; 5802a82b8beSDavid Chinner int err = 0; 5812a82b8beSDavid Chinner 58203209378SAl Viro ASSERT(S_ISDIR(pip->i_d.di_mode)); 58303209378SAl Viro ASSERT(S_ISREG(ip->i_d.di_mode)); 58403209378SAl Viro if (!S_ISDIR(pip->i_d.di_mode) || !S_ISREG(ip->i_d.di_mode)) 5852a82b8beSDavid Chinner return -EINVAL; 5862a82b8beSDavid Chinner 5872a82b8beSDavid Chinner mp = pip->i_mount; 5882a82b8beSDavid Chinner cache = mp->m_filestream; 5892a82b8beSDavid Chinner 5902a82b8beSDavid Chinner /* 5912a82b8beSDavid Chinner * We have a problem, Houston. 5922a82b8beSDavid Chinner * 5932a82b8beSDavid Chinner * Taking the iolock here violates inode locking order - we already 5942a82b8beSDavid Chinner * hold the ilock. Hence if we block getting this lock we may never 5952a82b8beSDavid Chinner * wake. Unfortunately, that means if we can't get the lock, we're 5962a82b8beSDavid Chinner * screwed in terms of getting a stream association - we can't spin 5972a82b8beSDavid Chinner * waiting for the lock because someone else is waiting on the lock we 5982a82b8beSDavid Chinner * hold and we cannot drop that as we are in a transaction here. 5992a82b8beSDavid Chinner * 600075fe102SChristoph Hellwig * Lucky for us, this inversion is not a problem because it's a 601075fe102SChristoph Hellwig * directory inode that we are trying to lock here. 6022a82b8beSDavid Chinner * 6032a82b8beSDavid Chinner * So, if we can't get the iolock without sleeping then just give up 6042a82b8beSDavid Chinner */ 6051c1c6ebcSDave Chinner if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL)) 6062a82b8beSDavid Chinner return 1; 6072a82b8beSDavid Chinner 6082a82b8beSDavid Chinner /* If the parent directory is already in the cache, use its AG. */ 6092a82b8beSDavid Chinner item = xfs_mru_cache_lookup(cache, pip->i_ino); 6102a82b8beSDavid Chinner if (item) { 6112a82b8beSDavid Chinner ASSERT(item->ip == pip); 6122a82b8beSDavid Chinner ag = item->ag; 6132a82b8beSDavid Chinner xfs_mru_cache_done(cache); 6142a82b8beSDavid Chinner 6152a82b8beSDavid Chinner TRACE_LOOKUP(mp, pip, pip, ag, xfs_filestream_peek_ag(mp, ag)); 6162a82b8beSDavid Chinner err = _xfs_filestream_update_ag(ip, pip, ag); 6172a82b8beSDavid Chinner 6182a82b8beSDavid Chinner goto exit; 6192a82b8beSDavid Chinner } 6202a82b8beSDavid Chinner 6212a82b8beSDavid Chinner /* 6222a82b8beSDavid Chinner * Set the starting AG using the rotor for inode32, otherwise 6232a82b8beSDavid Chinner * use the directory inode's AG. 6242a82b8beSDavid Chinner */ 6252a82b8beSDavid Chinner if (mp->m_flags & XFS_MOUNT_32BITINODES) { 6262a82b8beSDavid Chinner rotorstep = xfs_rotorstep; 6272a82b8beSDavid Chinner startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount; 6282a82b8beSDavid Chinner mp->m_agfrotor = (mp->m_agfrotor + 1) % 6292a82b8beSDavid Chinner (mp->m_sb.sb_agcount * rotorstep); 6302a82b8beSDavid Chinner } else 6312a82b8beSDavid Chinner startag = XFS_INO_TO_AGNO(mp, pip->i_ino); 6322a82b8beSDavid Chinner 6332a82b8beSDavid Chinner /* Pick a new AG for the parent inode starting at startag. */ 6342a82b8beSDavid Chinner err = _xfs_filestream_pick_ag(mp, startag, &ag, 0, 0); 6352a82b8beSDavid Chinner if (err || ag == NULLAGNUMBER) 6362a82b8beSDavid Chinner goto exit_did_pick; 6372a82b8beSDavid Chinner 6382a82b8beSDavid Chinner /* Associate the parent inode with the AG. */ 6392a82b8beSDavid Chinner err = _xfs_filestream_update_ag(pip, NULL, ag); 6402a82b8beSDavid Chinner if (err) 6412a82b8beSDavid Chinner goto exit_did_pick; 6422a82b8beSDavid Chinner 6432a82b8beSDavid Chinner /* Associate the file inode with the AG. */ 6442a82b8beSDavid Chinner err = _xfs_filestream_update_ag(ip, pip, ag); 6452a82b8beSDavid Chinner if (err) 6462a82b8beSDavid Chinner goto exit_did_pick; 6472a82b8beSDavid Chinner 6482a82b8beSDavid Chinner TRACE_ASSOCIATE(mp, ip, pip, ag, xfs_filestream_peek_ag(mp, ag)); 6492a82b8beSDavid Chinner 6502a82b8beSDavid Chinner exit_did_pick: 6512a82b8beSDavid Chinner /* 6522a82b8beSDavid Chinner * If _xfs_filestream_pick_ag() returned a valid AG, remove the 6532a82b8beSDavid Chinner * reference it took on it, since the file and directory will have taken 6542a82b8beSDavid Chinner * their own now if they were successfully cached. 6552a82b8beSDavid Chinner */ 6562a82b8beSDavid Chinner if (ag != NULLAGNUMBER) 6572a82b8beSDavid Chinner xfs_filestream_put_ag(mp, ag); 6582a82b8beSDavid Chinner 6592a82b8beSDavid Chinner exit: 6602a82b8beSDavid Chinner xfs_iunlock(pip, XFS_IOLOCK_EXCL); 6612a82b8beSDavid Chinner return -err; 6622a82b8beSDavid Chinner } 6632a82b8beSDavid Chinner 6642a82b8beSDavid Chinner /* 6652a82b8beSDavid Chinner * Pick a new allocation group for the current file and its file stream. This 6662a82b8beSDavid Chinner * function is called by xfs_bmap_filestreams() with the mount point's per-ag 6672a82b8beSDavid Chinner * lock held. 6682a82b8beSDavid Chinner */ 6692a82b8beSDavid Chinner int 6702a82b8beSDavid Chinner xfs_filestream_new_ag( 6712a82b8beSDavid Chinner xfs_bmalloca_t *ap, 6722a82b8beSDavid Chinner xfs_agnumber_t *agp) 6732a82b8beSDavid Chinner { 6742a82b8beSDavid Chinner int flags, err; 6752a82b8beSDavid Chinner xfs_inode_t *ip, *pip = NULL; 6762a82b8beSDavid Chinner xfs_mount_t *mp; 6772a82b8beSDavid Chinner xfs_mru_cache_t *cache; 6782a82b8beSDavid Chinner xfs_extlen_t minlen; 6792a82b8beSDavid Chinner fstrm_item_t *dir, *file; 6802a82b8beSDavid Chinner xfs_agnumber_t ag = NULLAGNUMBER; 6812a82b8beSDavid Chinner 6822a82b8beSDavid Chinner ip = ap->ip; 6832a82b8beSDavid Chinner mp = ip->i_mount; 6842a82b8beSDavid Chinner cache = mp->m_filestream; 6852a82b8beSDavid Chinner minlen = ap->alen; 6862a82b8beSDavid Chinner *agp = NULLAGNUMBER; 6872a82b8beSDavid Chinner 6882a82b8beSDavid Chinner /* 6892a82b8beSDavid Chinner * Look for the file in the cache, removing it if it's found. Doing 6902a82b8beSDavid Chinner * this allows it to be held across the dir lookup that follows. 6912a82b8beSDavid Chinner */ 6922a82b8beSDavid Chinner file = xfs_mru_cache_remove(cache, ip->i_ino); 6932a82b8beSDavid Chinner if (file) { 6942a82b8beSDavid Chinner ASSERT(ip == file->ip); 6952a82b8beSDavid Chinner 6962a82b8beSDavid Chinner /* Save the file's parent inode and old AG number for later. */ 6972a82b8beSDavid Chinner pip = file->pip; 6982a82b8beSDavid Chinner ag = file->ag; 6992a82b8beSDavid Chinner 7002a82b8beSDavid Chinner /* Look for the file's directory in the cache. */ 7012a82b8beSDavid Chinner dir = xfs_mru_cache_lookup(cache, pip->i_ino); 7022a82b8beSDavid Chinner if (dir) { 7032a82b8beSDavid Chinner ASSERT(pip == dir->ip); 7042a82b8beSDavid Chinner 7052a82b8beSDavid Chinner /* 7062a82b8beSDavid Chinner * If the directory has already moved on to a new AG, 7072a82b8beSDavid Chinner * use that AG as the new AG for the file. Don't 7082a82b8beSDavid Chinner * forget to twiddle the AG refcounts to match the 7092a82b8beSDavid Chinner * movement. 7102a82b8beSDavid Chinner */ 7112a82b8beSDavid Chinner if (dir->ag != file->ag) { 7122a82b8beSDavid Chinner xfs_filestream_put_ag(mp, file->ag); 7132a82b8beSDavid Chinner xfs_filestream_get_ag(mp, dir->ag); 7142a82b8beSDavid Chinner *agp = file->ag = dir->ag; 7152a82b8beSDavid Chinner } 7162a82b8beSDavid Chinner 7172a82b8beSDavid Chinner xfs_mru_cache_done(cache); 7182a82b8beSDavid Chinner } 7192a82b8beSDavid Chinner 7202a82b8beSDavid Chinner /* 7212a82b8beSDavid Chinner * Put the file back in the cache. If this fails, the free 7222a82b8beSDavid Chinner * function needs to be called to tidy up in the same way as if 7232a82b8beSDavid Chinner * the item had simply expired from the cache. 7242a82b8beSDavid Chinner */ 7252a82b8beSDavid Chinner err = xfs_mru_cache_insert(cache, ip->i_ino, file); 7262a82b8beSDavid Chinner if (err) { 7272a82b8beSDavid Chinner xfs_fstrm_free_func(ip->i_ino, file); 7282a82b8beSDavid Chinner return err; 7292a82b8beSDavid Chinner } 7302a82b8beSDavid Chinner 7312a82b8beSDavid Chinner /* 7322a82b8beSDavid Chinner * If the file's AG was moved to the directory's new AG, there's 7332a82b8beSDavid Chinner * nothing more to be done. 7342a82b8beSDavid Chinner */ 7352a82b8beSDavid Chinner if (*agp != NULLAGNUMBER) { 7362a82b8beSDavid Chinner TRACE_MOVEAG(mp, ip, pip, 7372a82b8beSDavid Chinner ag, xfs_filestream_peek_ag(mp, ag), 7382a82b8beSDavid Chinner *agp, xfs_filestream_peek_ag(mp, *agp)); 7392a82b8beSDavid Chinner return 0; 7402a82b8beSDavid Chinner } 7412a82b8beSDavid Chinner } 7422a82b8beSDavid Chinner 7432a82b8beSDavid Chinner /* 7442a82b8beSDavid Chinner * If the file's parent directory is known, take its iolock in exclusive 7452a82b8beSDavid Chinner * mode to prevent two sibling files from racing each other to migrate 7462a82b8beSDavid Chinner * themselves and their parent to different AGs. 747785ce418SChristoph Hellwig * 748785ce418SChristoph Hellwig * Note that we lock the parent directory iolock inside the child 749785ce418SChristoph Hellwig * iolock here. That's fine as we never hold both parent and child 750785ce418SChristoph Hellwig * iolock in any other place. This is different from the ilock, 751785ce418SChristoph Hellwig * which requires locking of the child after the parent for namespace 752785ce418SChristoph Hellwig * operations. 7532a82b8beSDavid Chinner */ 7542a82b8beSDavid Chinner if (pip) 755785ce418SChristoph Hellwig xfs_ilock(pip, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); 7562a82b8beSDavid Chinner 7572a82b8beSDavid Chinner /* 7582a82b8beSDavid Chinner * A new AG needs to be found for the file. If the file's parent 7592a82b8beSDavid Chinner * directory is also known, it will be moved to the new AG as well to 7602a82b8beSDavid Chinner * ensure that files created inside it in future use the new AG. 7612a82b8beSDavid Chinner */ 7622a82b8beSDavid Chinner ag = (ag == NULLAGNUMBER) ? 0 : (ag + 1) % mp->m_sb.sb_agcount; 7632a82b8beSDavid Chinner flags = (ap->userdata ? XFS_PICK_USERDATA : 0) | 764*0937e0fdSDave Chinner (ap->flist->xbf_low ? XFS_PICK_LOWSPACE : 0); 7652a82b8beSDavid Chinner 7662a82b8beSDavid Chinner err = _xfs_filestream_pick_ag(mp, ag, agp, flags, minlen); 7672a82b8beSDavid Chinner if (err || *agp == NULLAGNUMBER) 7682a82b8beSDavid Chinner goto exit; 7692a82b8beSDavid Chinner 7702a82b8beSDavid Chinner /* 7712a82b8beSDavid Chinner * If the file wasn't found in the file cache, then its parent directory 7722a82b8beSDavid Chinner * inode isn't known. For this to have happened, the file must either 7732a82b8beSDavid Chinner * be pre-existing, or it was created long enough ago that its cache 7742a82b8beSDavid Chinner * entry has expired. This isn't the sort of usage that the filestreams 7752a82b8beSDavid Chinner * allocator is trying to optimise, so there's no point trying to track 7762a82b8beSDavid Chinner * its new AG somehow in the filestream data structures. 7772a82b8beSDavid Chinner */ 7782a82b8beSDavid Chinner if (!pip) { 7792a82b8beSDavid Chinner TRACE_ORPHAN(mp, ip, *agp); 7802a82b8beSDavid Chinner goto exit; 7812a82b8beSDavid Chinner } 7822a82b8beSDavid Chinner 7832a82b8beSDavid Chinner /* Associate the parent inode with the AG. */ 7842a82b8beSDavid Chinner err = _xfs_filestream_update_ag(pip, NULL, *agp); 7852a82b8beSDavid Chinner if (err) 7862a82b8beSDavid Chinner goto exit; 7872a82b8beSDavid Chinner 7882a82b8beSDavid Chinner /* Associate the file inode with the AG. */ 7892a82b8beSDavid Chinner err = _xfs_filestream_update_ag(ip, pip, *agp); 7902a82b8beSDavid Chinner if (err) 7912a82b8beSDavid Chinner goto exit; 7922a82b8beSDavid Chinner 7932a82b8beSDavid Chinner TRACE_MOVEAG(mp, ip, pip, NULLAGNUMBER, 0, 7942a82b8beSDavid Chinner *agp, xfs_filestream_peek_ag(mp, *agp)); 7952a82b8beSDavid Chinner 7962a82b8beSDavid Chinner exit: 7972a82b8beSDavid Chinner /* 7982a82b8beSDavid Chinner * If _xfs_filestream_pick_ag() returned a valid AG, remove the 7992a82b8beSDavid Chinner * reference it took on it, since the file and directory will have taken 8002a82b8beSDavid Chinner * their own now if they were successfully cached. 8012a82b8beSDavid Chinner */ 8022a82b8beSDavid Chinner if (*agp != NULLAGNUMBER) 8032a82b8beSDavid Chinner xfs_filestream_put_ag(mp, *agp); 8042a82b8beSDavid Chinner else 8052a82b8beSDavid Chinner *agp = 0; 8062a82b8beSDavid Chinner 8072a82b8beSDavid Chinner if (pip) 8082a82b8beSDavid Chinner xfs_iunlock(pip, XFS_IOLOCK_EXCL); 8092a82b8beSDavid Chinner 8102a82b8beSDavid Chinner return err; 8112a82b8beSDavid Chinner } 8122a82b8beSDavid Chinner 8132a82b8beSDavid Chinner /* 8142a82b8beSDavid Chinner * Remove an association between an inode and a filestream object. 8152a82b8beSDavid Chinner * Typically this is done on last close of an unlinked file. 8162a82b8beSDavid Chinner */ 8172a82b8beSDavid Chinner void 8182a82b8beSDavid Chinner xfs_filestream_deassociate( 8192a82b8beSDavid Chinner xfs_inode_t *ip) 8202a82b8beSDavid Chinner { 8212a82b8beSDavid Chinner xfs_mru_cache_t *cache = ip->i_mount->m_filestream; 8222a82b8beSDavid Chinner 8232a82b8beSDavid Chinner xfs_mru_cache_delete(cache, ip->i_ino); 8242a82b8beSDavid Chinner } 825