12a82b8beSDavid Chinner /* 22a82b8beSDavid Chinner * Copyright (c) 2006-2007 Silicon Graphics, Inc. 32a82b8beSDavid Chinner * All Rights Reserved. 42a82b8beSDavid Chinner * 52a82b8beSDavid Chinner * This program is free software; you can redistribute it and/or 62a82b8beSDavid Chinner * modify it under the terms of the GNU General Public License as 72a82b8beSDavid Chinner * published by the Free Software Foundation. 82a82b8beSDavid Chinner * 92a82b8beSDavid Chinner * This program is distributed in the hope that it would be useful, 102a82b8beSDavid Chinner * but WITHOUT ANY WARRANTY; without even the implied warranty of 112a82b8beSDavid Chinner * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 122a82b8beSDavid Chinner * GNU General Public License for more details. 132a82b8beSDavid Chinner * 142a82b8beSDavid Chinner * You should have received a copy of the GNU General Public License 152a82b8beSDavid Chinner * along with this program; if not, write the Free Software Foundation, 162a82b8beSDavid Chinner * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 172a82b8beSDavid Chinner */ 182a82b8beSDavid Chinner #include "xfs.h" 19a4fbe6abSDave Chinner #include "xfs_format.h" 20239880efSDave Chinner #include "xfs_log_format.h" 21239880efSDave Chinner #include "xfs_trans_resv.h" 22239880efSDave Chinner #include "xfs_ag.h" 23239880efSDave Chinner #include "xfs_sb.h" 24239880efSDave Chinner #include "xfs_mount.h" 252a82b8beSDavid Chinner #include "xfs_inum.h" 262a82b8beSDavid Chinner #include "xfs_inode.h" 272a82b8beSDavid Chinner #include "xfs_bmap.h" 2868988114SDave Chinner #include "xfs_bmap_util.h" 292a82b8beSDavid Chinner #include "xfs_alloc.h" 302a82b8beSDavid Chinner #include "xfs_mru_cache.h" 31a4fbe6abSDave Chinner #include "xfs_dinode.h" 322a82b8beSDavid Chinner #include "xfs_filestream.h" 330b1b213fSChristoph Hellwig #include "xfs_trace.h" 342a82b8beSDavid Chinner 352a82b8beSDavid Chinner #ifdef XFS_FILESTREAMS_TRACE 362a82b8beSDavid Chinner 372a82b8beSDavid Chinner ktrace_t *xfs_filestreams_trace_buf; 382a82b8beSDavid Chinner 392a82b8beSDavid Chinner STATIC void 402a82b8beSDavid Chinner xfs_filestreams_trace( 412a82b8beSDavid Chinner xfs_mount_t *mp, /* mount point */ 422a82b8beSDavid Chinner int type, /* type of trace */ 432a82b8beSDavid Chinner const char *func, /* source function */ 442a82b8beSDavid Chinner int line, /* source line number */ 452a82b8beSDavid Chinner __psunsigned_t arg0, 462a82b8beSDavid Chinner __psunsigned_t arg1, 472a82b8beSDavid Chinner __psunsigned_t arg2, 482a82b8beSDavid Chinner __psunsigned_t arg3, 492a82b8beSDavid Chinner __psunsigned_t arg4, 502a82b8beSDavid Chinner __psunsigned_t arg5) 512a82b8beSDavid Chinner { 522a82b8beSDavid Chinner ktrace_enter(xfs_filestreams_trace_buf, 532a82b8beSDavid Chinner (void *)(__psint_t)(type | (line << 16)), 542a82b8beSDavid Chinner (void *)func, 552a82b8beSDavid Chinner (void *)(__psunsigned_t)current_pid(), 562a82b8beSDavid Chinner (void *)mp, 572a82b8beSDavid Chinner (void *)(__psunsigned_t)arg0, 582a82b8beSDavid Chinner (void *)(__psunsigned_t)arg1, 592a82b8beSDavid Chinner (void *)(__psunsigned_t)arg2, 602a82b8beSDavid Chinner (void *)(__psunsigned_t)arg3, 612a82b8beSDavid Chinner (void *)(__psunsigned_t)arg4, 622a82b8beSDavid Chinner (void *)(__psunsigned_t)arg5, 632a82b8beSDavid Chinner NULL, NULL, NULL, NULL, NULL, NULL); 642a82b8beSDavid Chinner } 652a82b8beSDavid Chinner 662a82b8beSDavid Chinner #define TRACE0(mp,t) TRACE6(mp,t,0,0,0,0,0,0) 672a82b8beSDavid Chinner #define TRACE1(mp,t,a0) TRACE6(mp,t,a0,0,0,0,0,0) 682a82b8beSDavid Chinner #define TRACE2(mp,t,a0,a1) TRACE6(mp,t,a0,a1,0,0,0,0) 692a82b8beSDavid Chinner #define TRACE3(mp,t,a0,a1,a2) TRACE6(mp,t,a0,a1,a2,0,0,0) 702a82b8beSDavid Chinner #define TRACE4(mp,t,a0,a1,a2,a3) TRACE6(mp,t,a0,a1,a2,a3,0,0) 712a82b8beSDavid Chinner #define TRACE5(mp,t,a0,a1,a2,a3,a4) TRACE6(mp,t,a0,a1,a2,a3,a4,0) 722a82b8beSDavid Chinner #define TRACE6(mp,t,a0,a1,a2,a3,a4,a5) \ 7334a622b2SHarvey Harrison xfs_filestreams_trace(mp, t, __func__, __LINE__, \ 742a82b8beSDavid Chinner (__psunsigned_t)a0, (__psunsigned_t)a1, \ 752a82b8beSDavid Chinner (__psunsigned_t)a2, (__psunsigned_t)a3, \ 762a82b8beSDavid Chinner (__psunsigned_t)a4, (__psunsigned_t)a5) 772a82b8beSDavid Chinner 782a82b8beSDavid Chinner #define TRACE_AG_SCAN(mp, ag, ag2) \ 792a82b8beSDavid Chinner TRACE2(mp, XFS_FSTRM_KTRACE_AGSCAN, ag, ag2); 802a82b8beSDavid Chinner #define TRACE_AG_PICK1(mp, max_ag, maxfree) \ 812a82b8beSDavid Chinner TRACE2(mp, XFS_FSTRM_KTRACE_AGPICK1, max_ag, maxfree); 822a82b8beSDavid Chinner #define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag) \ 832a82b8beSDavid Chinner TRACE6(mp, XFS_FSTRM_KTRACE_AGPICK2, ag, ag2, \ 842a82b8beSDavid Chinner cnt, free, scan, flag) 852a82b8beSDavid Chinner #define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2) \ 862a82b8beSDavid Chinner TRACE5(mp, XFS_FSTRM_KTRACE_UPDATE, ip, ag, cnt, ag2, cnt2) 872a82b8beSDavid Chinner #define TRACE_FREE(mp, ip, pip, ag, cnt) \ 882a82b8beSDavid Chinner TRACE4(mp, XFS_FSTRM_KTRACE_FREE, ip, pip, ag, cnt) 892a82b8beSDavid Chinner #define TRACE_LOOKUP(mp, ip, pip, ag, cnt) \ 902a82b8beSDavid Chinner TRACE4(mp, XFS_FSTRM_KTRACE_ITEM_LOOKUP, ip, pip, ag, cnt) 912a82b8beSDavid Chinner #define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt) \ 922a82b8beSDavid Chinner TRACE4(mp, XFS_FSTRM_KTRACE_ASSOCIATE, ip, pip, ag, cnt) 932a82b8beSDavid Chinner #define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt) \ 942a82b8beSDavid Chinner TRACE6(mp, XFS_FSTRM_KTRACE_MOVEAG, ip, pip, oag, ocnt, nag, ncnt) 952a82b8beSDavid Chinner #define TRACE_ORPHAN(mp, ip, ag) \ 962a82b8beSDavid Chinner TRACE2(mp, XFS_FSTRM_KTRACE_ORPHAN, ip, ag); 972a82b8beSDavid Chinner 982a82b8beSDavid Chinner 992a82b8beSDavid Chinner #else 1002a82b8beSDavid Chinner #define TRACE_AG_SCAN(mp, ag, ag2) 1012a82b8beSDavid Chinner #define TRACE_AG_PICK1(mp, max_ag, maxfree) 1022a82b8beSDavid Chinner #define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag) 1032a82b8beSDavid Chinner #define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2) 1042a82b8beSDavid Chinner #define TRACE_FREE(mp, ip, pip, ag, cnt) 1052a82b8beSDavid Chinner #define TRACE_LOOKUP(mp, ip, pip, ag, cnt) 1062a82b8beSDavid Chinner #define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt) 1072a82b8beSDavid Chinner #define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt) 1082a82b8beSDavid Chinner #define TRACE_ORPHAN(mp, ip, ag) 1092a82b8beSDavid Chinner #endif 1102a82b8beSDavid Chinner 1112a82b8beSDavid Chinner static kmem_zone_t *item_zone; 1122a82b8beSDavid Chinner 1132a82b8beSDavid Chinner /* 1142a82b8beSDavid Chinner * Structure for associating a file or a directory with an allocation group. 1152a82b8beSDavid Chinner * The parent directory pointer is only needed for files, but since there will 1162a82b8beSDavid Chinner * generally be vastly more files than directories in the cache, using the same 1172a82b8beSDavid Chinner * data structure simplifies the code with very little memory overhead. 1182a82b8beSDavid Chinner */ 1192a82b8beSDavid Chinner typedef struct fstrm_item 1202a82b8beSDavid Chinner { 121*22328d71SChristoph Hellwig struct xfs_mru_cache_elem mru; 1222a82b8beSDavid Chinner xfs_agnumber_t ag; /* AG currently in use for the file/directory. */ 1232a82b8beSDavid Chinner xfs_inode_t *ip; /* inode self-pointer. */ 1242a82b8beSDavid Chinner xfs_inode_t *pip; /* Parent directory inode pointer. */ 1252a82b8beSDavid Chinner } fstrm_item_t; 1262a82b8beSDavid Chinner 1270664ce8dSChristoph Hellwig /* 1280664ce8dSChristoph Hellwig * Allocation group filestream associations are tracked with per-ag atomic 1290664ce8dSChristoph Hellwig * counters. These counters allow _xfs_filestream_pick_ag() to tell whether a 1300664ce8dSChristoph Hellwig * particular AG already has active filestreams associated with it. The mount 1310664ce8dSChristoph Hellwig * point's m_peraglock is used to protect these counters from per-ag array 1320664ce8dSChristoph Hellwig * re-allocation during a growfs operation. When xfs_growfs_data_private() is 1330664ce8dSChristoph Hellwig * about to reallocate the array, it calls xfs_filestream_flush() with the 1340664ce8dSChristoph Hellwig * m_peraglock held in write mode. 1350664ce8dSChristoph Hellwig * 1360664ce8dSChristoph Hellwig * Since xfs_mru_cache_flush() guarantees that all the free functions for all 1370664ce8dSChristoph Hellwig * the cache elements have finished executing before it returns, it's safe for 1380664ce8dSChristoph Hellwig * the free functions to use the atomic counters without m_peraglock protection. 1390664ce8dSChristoph Hellwig * This allows the implementation of xfs_fstrm_free_func() to be agnostic about 1400664ce8dSChristoph Hellwig * whether it was called with the m_peraglock held in read mode, write mode or 1410664ce8dSChristoph Hellwig * not held at all. The race condition this addresses is the following: 1420664ce8dSChristoph Hellwig * 1430664ce8dSChristoph Hellwig * - The work queue scheduler fires and pulls a filestream directory cache 1440664ce8dSChristoph Hellwig * element off the LRU end of the cache for deletion, then gets pre-empted. 1450664ce8dSChristoph Hellwig * - A growfs operation grabs the m_peraglock in write mode, flushes all the 1460664ce8dSChristoph Hellwig * remaining items from the cache and reallocates the mount point's per-ag 1470664ce8dSChristoph Hellwig * array, resetting all the counters to zero. 1480664ce8dSChristoph Hellwig * - The work queue thread resumes and calls the free function for the element 1490664ce8dSChristoph Hellwig * it started cleaning up earlier. In the process it decrements the 1500664ce8dSChristoph Hellwig * filestreams counter for an AG that now has no references. 1510664ce8dSChristoph Hellwig * 1520664ce8dSChristoph Hellwig * With a shrinkfs feature, the above scenario could panic the system. 1530664ce8dSChristoph Hellwig * 1540664ce8dSChristoph Hellwig * All other uses of the following macros should be protected by either the 1550664ce8dSChristoph Hellwig * m_peraglock held in read mode, or the cache's internal locking exposed by the 1560664ce8dSChristoph Hellwig * interval between a call to xfs_mru_cache_lookup() and a call to 1570664ce8dSChristoph Hellwig * xfs_mru_cache_done(). In addition, the m_peraglock must be held in read mode 1580664ce8dSChristoph Hellwig * when new elements are added to the cache. 1590664ce8dSChristoph Hellwig * 1600664ce8dSChristoph Hellwig * Combined, these locking rules ensure that no associations will ever exist in 1610664ce8dSChristoph Hellwig * the cache that reference per-ag array elements that have since been 1620664ce8dSChristoph Hellwig * reallocated. 1630664ce8dSChristoph Hellwig */ 1640664ce8dSChristoph Hellwig static int 1650664ce8dSChristoph Hellwig xfs_filestream_peek_ag( 1660664ce8dSChristoph Hellwig xfs_mount_t *mp, 1670664ce8dSChristoph Hellwig xfs_agnumber_t agno) 1680664ce8dSChristoph Hellwig { 1690664ce8dSChristoph Hellwig struct xfs_perag *pag; 1700664ce8dSChristoph Hellwig int ret; 1710664ce8dSChristoph Hellwig 1720664ce8dSChristoph Hellwig pag = xfs_perag_get(mp, agno); 1730664ce8dSChristoph Hellwig ret = atomic_read(&pag->pagf_fstrms); 1740664ce8dSChristoph Hellwig xfs_perag_put(pag); 1750664ce8dSChristoph Hellwig return ret; 1760664ce8dSChristoph Hellwig } 1770664ce8dSChristoph Hellwig 1780664ce8dSChristoph Hellwig static int 1790664ce8dSChristoph Hellwig xfs_filestream_get_ag( 1800664ce8dSChristoph Hellwig xfs_mount_t *mp, 1810664ce8dSChristoph Hellwig xfs_agnumber_t agno) 1820664ce8dSChristoph Hellwig { 1830664ce8dSChristoph Hellwig struct xfs_perag *pag; 1840664ce8dSChristoph Hellwig int ret; 1850664ce8dSChristoph Hellwig 1860664ce8dSChristoph Hellwig pag = xfs_perag_get(mp, agno); 1870664ce8dSChristoph Hellwig ret = atomic_inc_return(&pag->pagf_fstrms); 1880664ce8dSChristoph Hellwig xfs_perag_put(pag); 1890664ce8dSChristoph Hellwig return ret; 1900664ce8dSChristoph Hellwig } 1910664ce8dSChristoph Hellwig 1920664ce8dSChristoph Hellwig static void 1930664ce8dSChristoph Hellwig xfs_filestream_put_ag( 1940664ce8dSChristoph Hellwig xfs_mount_t *mp, 1950664ce8dSChristoph Hellwig xfs_agnumber_t agno) 1960664ce8dSChristoph Hellwig { 1970664ce8dSChristoph Hellwig struct xfs_perag *pag; 1980664ce8dSChristoph Hellwig 1990664ce8dSChristoph Hellwig pag = xfs_perag_get(mp, agno); 2000664ce8dSChristoph Hellwig atomic_dec(&pag->pagf_fstrms); 2010664ce8dSChristoph Hellwig xfs_perag_put(pag); 2020664ce8dSChristoph Hellwig } 2032a82b8beSDavid Chinner 2042a82b8beSDavid Chinner /* 2052a82b8beSDavid Chinner * Scan the AGs starting at startag looking for an AG that isn't in use and has 2062a82b8beSDavid Chinner * at least minlen blocks free. 2072a82b8beSDavid Chinner */ 2082a82b8beSDavid Chinner static int 2092a82b8beSDavid Chinner _xfs_filestream_pick_ag( 2102a82b8beSDavid Chinner xfs_mount_t *mp, 2112a82b8beSDavid Chinner xfs_agnumber_t startag, 2122a82b8beSDavid Chinner xfs_agnumber_t *agp, 2132a82b8beSDavid Chinner int flags, 2142a82b8beSDavid Chinner xfs_extlen_t minlen) 2152a82b8beSDavid Chinner { 2164196ac08SDave Chinner int streams, max_streams; 2172a82b8beSDavid Chinner int err, trylock, nscan; 2186cc87645SDave Chinner xfs_extlen_t longest, free, minfree, maxfree = 0; 2192a82b8beSDavid Chinner xfs_agnumber_t ag, max_ag = NULLAGNUMBER; 2202a82b8beSDavid Chinner struct xfs_perag *pag; 2212a82b8beSDavid Chinner 2222a82b8beSDavid Chinner /* 2% of an AG's blocks must be free for it to be chosen. */ 2232a82b8beSDavid Chinner minfree = mp->m_sb.sb_agblocks / 50; 2242a82b8beSDavid Chinner 2252a82b8beSDavid Chinner ag = startag; 2262a82b8beSDavid Chinner *agp = NULLAGNUMBER; 2272a82b8beSDavid Chinner 2282a82b8beSDavid Chinner /* For the first pass, don't sleep trying to init the per-AG. */ 2292a82b8beSDavid Chinner trylock = XFS_ALLOC_FLAG_TRYLOCK; 2302a82b8beSDavid Chinner 2312a82b8beSDavid Chinner for (nscan = 0; 1; nscan++) { 2324196ac08SDave Chinner pag = xfs_perag_get(mp, ag); 2334196ac08SDave Chinner TRACE_AG_SCAN(mp, ag, atomic_read(&pag->pagf_fstrms)); 2342a82b8beSDavid Chinner 2352a82b8beSDavid Chinner if (!pag->pagf_init) { 2362a82b8beSDavid Chinner err = xfs_alloc_pagf_init(mp, NULL, ag, trylock); 2374196ac08SDave Chinner if (err && !trylock) { 2384196ac08SDave Chinner xfs_perag_put(pag); 2392a82b8beSDavid Chinner return err; 2402a82b8beSDavid Chinner } 2414196ac08SDave Chinner } 2422a82b8beSDavid Chinner 2432a82b8beSDavid Chinner /* Might fail sometimes during the 1st pass with trylock set. */ 2442a82b8beSDavid Chinner if (!pag->pagf_init) 2452a82b8beSDavid Chinner goto next_ag; 2462a82b8beSDavid Chinner 2472a82b8beSDavid Chinner /* Keep track of the AG with the most free blocks. */ 2482a82b8beSDavid Chinner if (pag->pagf_freeblks > maxfree) { 2492a82b8beSDavid Chinner maxfree = pag->pagf_freeblks; 2504196ac08SDave Chinner max_streams = atomic_read(&pag->pagf_fstrms); 2512a82b8beSDavid Chinner max_ag = ag; 2522a82b8beSDavid Chinner } 2532a82b8beSDavid Chinner 2542a82b8beSDavid Chinner /* 2552a82b8beSDavid Chinner * The AG reference count does two things: it enforces mutual 2562a82b8beSDavid Chinner * exclusion when examining the suitability of an AG in this 2572a82b8beSDavid Chinner * loop, and it guards against two filestreams being established 2582a82b8beSDavid Chinner * in the same AG as each other. 2592a82b8beSDavid Chinner */ 2602a82b8beSDavid Chinner if (xfs_filestream_get_ag(mp, ag) > 1) { 2612a82b8beSDavid Chinner xfs_filestream_put_ag(mp, ag); 2622a82b8beSDavid Chinner goto next_ag; 2632a82b8beSDavid Chinner } 2642a82b8beSDavid Chinner 2656cc87645SDave Chinner longest = xfs_alloc_longest_free_extent(mp, pag); 2662a82b8beSDavid Chinner if (((minlen && longest >= minlen) || 2672a82b8beSDavid Chinner (!minlen && pag->pagf_freeblks >= minfree)) && 2682a82b8beSDavid Chinner (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) || 2692a82b8beSDavid Chinner (flags & XFS_PICK_LOWSPACE))) { 2702a82b8beSDavid Chinner 2712a82b8beSDavid Chinner /* Break out, retaining the reference on the AG. */ 2722a82b8beSDavid Chinner free = pag->pagf_freeblks; 2734196ac08SDave Chinner streams = atomic_read(&pag->pagf_fstrms); 2744196ac08SDave Chinner xfs_perag_put(pag); 2752a82b8beSDavid Chinner *agp = ag; 2762a82b8beSDavid Chinner break; 2772a82b8beSDavid Chinner } 2782a82b8beSDavid Chinner 2792a82b8beSDavid Chinner /* Drop the reference on this AG, it's not usable. */ 2802a82b8beSDavid Chinner xfs_filestream_put_ag(mp, ag); 2812a82b8beSDavid Chinner next_ag: 2824196ac08SDave Chinner xfs_perag_put(pag); 2832a82b8beSDavid Chinner /* Move to the next AG, wrapping to AG 0 if necessary. */ 2842a82b8beSDavid Chinner if (++ag >= mp->m_sb.sb_agcount) 2852a82b8beSDavid Chinner ag = 0; 2862a82b8beSDavid Chinner 2872a82b8beSDavid Chinner /* If a full pass of the AGs hasn't been done yet, continue. */ 2882a82b8beSDavid Chinner if (ag != startag) 2892a82b8beSDavid Chinner continue; 2902a82b8beSDavid Chinner 2912a82b8beSDavid Chinner /* Allow sleeping in xfs_alloc_pagf_init() on the 2nd pass. */ 2922a82b8beSDavid Chinner if (trylock != 0) { 2932a82b8beSDavid Chinner trylock = 0; 2942a82b8beSDavid Chinner continue; 2952a82b8beSDavid Chinner } 2962a82b8beSDavid Chinner 2972a82b8beSDavid Chinner /* Finally, if lowspace wasn't set, set it for the 3rd pass. */ 2982a82b8beSDavid Chinner if (!(flags & XFS_PICK_LOWSPACE)) { 2992a82b8beSDavid Chinner flags |= XFS_PICK_LOWSPACE; 3002a82b8beSDavid Chinner continue; 3012a82b8beSDavid Chinner } 3022a82b8beSDavid Chinner 3032a82b8beSDavid Chinner /* 3042a82b8beSDavid Chinner * Take the AG with the most free space, regardless of whether 3052a82b8beSDavid Chinner * it's already in use by another filestream. 3062a82b8beSDavid Chinner */ 3072a82b8beSDavid Chinner if (max_ag != NULLAGNUMBER) { 3082a82b8beSDavid Chinner xfs_filestream_get_ag(mp, max_ag); 3092a82b8beSDavid Chinner TRACE_AG_PICK1(mp, max_ag, maxfree); 3104196ac08SDave Chinner streams = max_streams; 3112a82b8beSDavid Chinner free = maxfree; 3122a82b8beSDavid Chinner *agp = max_ag; 3132a82b8beSDavid Chinner break; 3142a82b8beSDavid Chinner } 3152a82b8beSDavid Chinner 3162a82b8beSDavid Chinner /* take AG 0 if none matched */ 3172a82b8beSDavid Chinner TRACE_AG_PICK1(mp, max_ag, maxfree); 3182a82b8beSDavid Chinner *agp = 0; 3192a82b8beSDavid Chinner return 0; 3202a82b8beSDavid Chinner } 3212a82b8beSDavid Chinner 3224196ac08SDave Chinner TRACE_AG_PICK2(mp, startag, *agp, streams, free, nscan, flags); 3232a82b8beSDavid Chinner 3242a82b8beSDavid Chinner return 0; 3252a82b8beSDavid Chinner } 3262a82b8beSDavid Chinner 3272a82b8beSDavid Chinner /* 3282a82b8beSDavid Chinner * Set the allocation group number for a file or a directory, updating inode 3291c1c6ebcSDave Chinner * references and per-AG references as appropriate. 3302a82b8beSDavid Chinner */ 3312a82b8beSDavid Chinner static int 3322a82b8beSDavid Chinner _xfs_filestream_update_ag( 3332a82b8beSDavid Chinner xfs_inode_t *ip, 3342a82b8beSDavid Chinner xfs_inode_t *pip, 3352a82b8beSDavid Chinner xfs_agnumber_t ag) 3362a82b8beSDavid Chinner { 3372a82b8beSDavid Chinner int err = 0; 3382a82b8beSDavid Chinner xfs_mount_t *mp; 3392a82b8beSDavid Chinner fstrm_item_t *item; 3402a82b8beSDavid Chinner xfs_agnumber_t old_ag; 3412a82b8beSDavid Chinner xfs_inode_t *old_pip; 342*22328d71SChristoph Hellwig struct xfs_mru_cache_elem *mru; 3432a82b8beSDavid Chinner 3442a82b8beSDavid Chinner /* 3452a82b8beSDavid Chinner * Either ip is a regular file and pip is a directory, or ip is a 3462a82b8beSDavid Chinner * directory and pip is NULL. 3472a82b8beSDavid Chinner */ 348abbede1bSAl Viro ASSERT(ip && ((S_ISREG(ip->i_d.di_mode) && pip && 34903209378SAl Viro S_ISDIR(pip->i_d.di_mode)) || 35003209378SAl Viro (S_ISDIR(ip->i_d.di_mode) && !pip))); 3512a82b8beSDavid Chinner 3522a82b8beSDavid Chinner mp = ip->i_mount; 3532a82b8beSDavid Chinner 354*22328d71SChristoph Hellwig mru = xfs_mru_cache_lookup(mp->m_filestream, ip->i_ino); 355*22328d71SChristoph Hellwig if (mru) { 356*22328d71SChristoph Hellwig item = container_of(mru, fstrm_item_t, mru); 357*22328d71SChristoph Hellwig 3582a82b8beSDavid Chinner ASSERT(item->ip == ip); 3592a82b8beSDavid Chinner old_ag = item->ag; 3602a82b8beSDavid Chinner item->ag = ag; 3612a82b8beSDavid Chinner old_pip = item->pip; 3622a82b8beSDavid Chinner item->pip = pip; 363*22328d71SChristoph Hellwig xfs_mru_cache_done(mp->m_filestream); 3642a82b8beSDavid Chinner 3652a82b8beSDavid Chinner /* 3662a82b8beSDavid Chinner * If the AG has changed, drop the old ref and take a new one, 3672a82b8beSDavid Chinner * effectively transferring the reference from old to new AG. 3682a82b8beSDavid Chinner */ 3692a82b8beSDavid Chinner if (ag != old_ag) { 3702a82b8beSDavid Chinner xfs_filestream_put_ag(mp, old_ag); 3712a82b8beSDavid Chinner xfs_filestream_get_ag(mp, ag); 3722a82b8beSDavid Chinner } 3732a82b8beSDavid Chinner 3742a82b8beSDavid Chinner /* 3752a82b8beSDavid Chinner * If ip is a file and its pip has changed, drop the old ref and 3762a82b8beSDavid Chinner * take a new one. 3772a82b8beSDavid Chinner */ 3782a82b8beSDavid Chinner if (pip && pip != old_pip) { 3792a82b8beSDavid Chinner IRELE(old_pip); 3802a82b8beSDavid Chinner IHOLD(pip); 3812a82b8beSDavid Chinner } 3822a82b8beSDavid Chinner 3832a82b8beSDavid Chinner TRACE_UPDATE(mp, ip, old_ag, xfs_filestream_peek_ag(mp, old_ag), 3842a82b8beSDavid Chinner ag, xfs_filestream_peek_ag(mp, ag)); 3852a82b8beSDavid Chinner return 0; 3862a82b8beSDavid Chinner } 3872a82b8beSDavid Chinner 3882a82b8beSDavid Chinner item = kmem_zone_zalloc(item_zone, KM_MAYFAIL); 3892a82b8beSDavid Chinner if (!item) 3902a82b8beSDavid Chinner return ENOMEM; 3912a82b8beSDavid Chinner 3922a82b8beSDavid Chinner item->ag = ag; 3932a82b8beSDavid Chinner item->ip = ip; 3942a82b8beSDavid Chinner item->pip = pip; 3952a82b8beSDavid Chinner 396*22328d71SChristoph Hellwig err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru); 3972a82b8beSDavid Chinner if (err) { 3982a82b8beSDavid Chinner kmem_zone_free(item_zone, item); 3992a82b8beSDavid Chinner return err; 4002a82b8beSDavid Chinner } 4012a82b8beSDavid Chinner 4022a82b8beSDavid Chinner /* Take a reference on the AG. */ 4032a82b8beSDavid Chinner xfs_filestream_get_ag(mp, ag); 4042a82b8beSDavid Chinner 4052a82b8beSDavid Chinner /* 4062a82b8beSDavid Chinner * Take a reference on the inode itself regardless of whether it's a 4072a82b8beSDavid Chinner * regular file or a directory. 4082a82b8beSDavid Chinner */ 4092a82b8beSDavid Chinner IHOLD(ip); 4102a82b8beSDavid Chinner 4112a82b8beSDavid Chinner /* 4122a82b8beSDavid Chinner * In the case of a regular file, take a reference on the parent inode 4132a82b8beSDavid Chinner * as well to ensure it remains in-core. 4142a82b8beSDavid Chinner */ 4152a82b8beSDavid Chinner if (pip) 4162a82b8beSDavid Chinner IHOLD(pip); 4172a82b8beSDavid Chinner 4182a82b8beSDavid Chinner TRACE_UPDATE(mp, ip, ag, xfs_filestream_peek_ag(mp, ag), 4192a82b8beSDavid Chinner ag, xfs_filestream_peek_ag(mp, ag)); 4202a82b8beSDavid Chinner 4212a82b8beSDavid Chinner return 0; 4222a82b8beSDavid Chinner } 4232a82b8beSDavid Chinner 4242a82b8beSDavid Chinner /* xfs_fstrm_free_func(): callback for freeing cached stream items. */ 425a8272ce0SDavid Chinner STATIC void 4262a82b8beSDavid Chinner xfs_fstrm_free_func( 427*22328d71SChristoph Hellwig struct xfs_mru_cache_elem *mru) 4282a82b8beSDavid Chinner { 429*22328d71SChristoph Hellwig fstrm_item_t *item = 430*22328d71SChristoph Hellwig container_of(mru, fstrm_item_t, mru); 4312a82b8beSDavid Chinner xfs_inode_t *ip = item->ip; 4322a82b8beSDavid Chinner 4332a82b8beSDavid Chinner xfs_iflags_clear(ip, XFS_IFILESTREAM); 4342a82b8beSDavid Chinner 4352a82b8beSDavid Chinner /* Drop the reference taken on the AG when the item was added. */ 4360664ce8dSChristoph Hellwig xfs_filestream_put_ag(ip->i_mount, item->ag); 4372a82b8beSDavid Chinner 4382a82b8beSDavid Chinner TRACE_FREE(ip->i_mount, ip, item->pip, item->ag, 4392a82b8beSDavid Chinner xfs_filestream_peek_ag(ip->i_mount, item->ag)); 4402a82b8beSDavid Chinner 4412a82b8beSDavid Chinner /* 4422a82b8beSDavid Chinner * _xfs_filestream_update_ag() always takes a reference on the inode 4432a82b8beSDavid Chinner * itself, whether it's a file or a directory. Release it here. 4442a82b8beSDavid Chinner * This can result in the inode being freed and so we must 4452a82b8beSDavid Chinner * not hold any inode locks when freeing filesstreams objects 4462a82b8beSDavid Chinner * otherwise we can deadlock here. 4472a82b8beSDavid Chinner */ 4482a82b8beSDavid Chinner IRELE(ip); 4492a82b8beSDavid Chinner 4502a82b8beSDavid Chinner /* 4512a82b8beSDavid Chinner * In the case of a regular file, _xfs_filestream_update_ag() also 4522a82b8beSDavid Chinner * takes a ref on the parent inode to keep it in-core. Release that 4532a82b8beSDavid Chinner * too. 4542a82b8beSDavid Chinner */ 4552a82b8beSDavid Chinner if (item->pip) 4562a82b8beSDavid Chinner IRELE(item->pip); 4572a82b8beSDavid Chinner 4582a82b8beSDavid Chinner /* Finally, free the memory allocated for the item. */ 4592a82b8beSDavid Chinner kmem_zone_free(item_zone, item); 4602a82b8beSDavid Chinner } 4612a82b8beSDavid Chinner 4622a82b8beSDavid Chinner /* 4632a82b8beSDavid Chinner * xfs_filestream_init() is called at xfs initialisation time to set up the 4642a82b8beSDavid Chinner * memory zone that will be used for filestream data structure allocation. 4652a82b8beSDavid Chinner */ 4662a82b8beSDavid Chinner int 4672a82b8beSDavid Chinner xfs_filestream_init(void) 4682a82b8beSDavid Chinner { 4692a82b8beSDavid Chinner item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item"); 4709f8868ffSChristoph Hellwig if (!item_zone) 4719f8868ffSChristoph Hellwig return -ENOMEM; 4720b1b213fSChristoph Hellwig 4739f8868ffSChristoph Hellwig return 0; 4742a82b8beSDavid Chinner } 4752a82b8beSDavid Chinner 4762a82b8beSDavid Chinner /* 4772a82b8beSDavid Chinner * xfs_filestream_uninit() is called at xfs termination time to destroy the 4782a82b8beSDavid Chinner * memory zone that was used for filestream data structure allocation. 4792a82b8beSDavid Chinner */ 4802a82b8beSDavid Chinner void 4812a82b8beSDavid Chinner xfs_filestream_uninit(void) 4822a82b8beSDavid Chinner { 4832a82b8beSDavid Chinner kmem_zone_destroy(item_zone); 4842a82b8beSDavid Chinner } 4852a82b8beSDavid Chinner 4862a82b8beSDavid Chinner /* 4872a82b8beSDavid Chinner * xfs_filestream_mount() is called when a file system is mounted with the 4882a82b8beSDavid Chinner * filestream option. It is responsible for allocating the data structures 4892a82b8beSDavid Chinner * needed to track the new file system's file streams. 4902a82b8beSDavid Chinner */ 4912a82b8beSDavid Chinner int 4922a82b8beSDavid Chinner xfs_filestream_mount( 4932a82b8beSDavid Chinner xfs_mount_t *mp) 4942a82b8beSDavid Chinner { 4952a82b8beSDavid Chinner int err; 4962a82b8beSDavid Chinner unsigned int lifetime, grp_count; 4972a82b8beSDavid Chinner 4982a82b8beSDavid Chinner /* 4992a82b8beSDavid Chinner * The filestream timer tunable is currently fixed within the range of 5002a82b8beSDavid Chinner * one second to four minutes, with five seconds being the default. The 5012a82b8beSDavid Chinner * group count is somewhat arbitrary, but it'd be nice to adhere to the 5022a82b8beSDavid Chinner * timer tunable to within about 10 percent. This requires at least 10 5032a82b8beSDavid Chinner * groups. 5042a82b8beSDavid Chinner */ 5052a82b8beSDavid Chinner lifetime = xfs_fstrm_centisecs * 10; 5062a82b8beSDavid Chinner grp_count = 10; 5072a82b8beSDavid Chinner 5082a82b8beSDavid Chinner err = xfs_mru_cache_create(&mp->m_filestream, lifetime, grp_count, 509bcc7b445SEric Sandeen xfs_fstrm_free_func); 5102a82b8beSDavid Chinner 5112a82b8beSDavid Chinner return err; 5122a82b8beSDavid Chinner } 5132a82b8beSDavid Chinner 5142a82b8beSDavid Chinner /* 5152a82b8beSDavid Chinner * xfs_filestream_unmount() is called when a file system that was mounted with 5162a82b8beSDavid Chinner * the filestream option is unmounted. It drains the data structures created 5172a82b8beSDavid Chinner * to track the file system's file streams and frees all the memory that was 5182a82b8beSDavid Chinner * allocated. 5192a82b8beSDavid Chinner */ 5202a82b8beSDavid Chinner void 5212a82b8beSDavid Chinner xfs_filestream_unmount( 5222a82b8beSDavid Chinner xfs_mount_t *mp) 5232a82b8beSDavid Chinner { 5242a82b8beSDavid Chinner xfs_mru_cache_destroy(mp->m_filestream); 5252a82b8beSDavid Chinner } 5262a82b8beSDavid Chinner 5272a82b8beSDavid Chinner /* 5282a82b8beSDavid Chinner * Return the AG of the filestream the file or directory belongs to, or 5292a82b8beSDavid Chinner * NULLAGNUMBER otherwise. 5302a82b8beSDavid Chinner */ 5312a82b8beSDavid Chinner xfs_agnumber_t 5322a82b8beSDavid Chinner xfs_filestream_lookup_ag( 5332a82b8beSDavid Chinner xfs_inode_t *ip) 5342a82b8beSDavid Chinner { 535*22328d71SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 536*22328d71SChristoph Hellwig struct xfs_mru_cache_elem *mru; 5372a82b8beSDavid Chinner fstrm_item_t *item; 5382a82b8beSDavid Chinner xfs_agnumber_t ag; 5392a82b8beSDavid Chinner int ref; 5402a82b8beSDavid Chinner 54103209378SAl Viro if (!S_ISREG(ip->i_d.di_mode) && !S_ISDIR(ip->i_d.di_mode)) { 5422a82b8beSDavid Chinner ASSERT(0); 5432a82b8beSDavid Chinner return NULLAGNUMBER; 5442a82b8beSDavid Chinner } 5452a82b8beSDavid Chinner 546*22328d71SChristoph Hellwig mru = xfs_mru_cache_lookup(mp->m_filestream, ip->i_ino); 547*22328d71SChristoph Hellwig if (!mru) { 5482a82b8beSDavid Chinner TRACE_LOOKUP(ip->i_mount, ip, NULL, NULLAGNUMBER, 0); 5492a82b8beSDavid Chinner return NULLAGNUMBER; 5502a82b8beSDavid Chinner } 5512a82b8beSDavid Chinner 552*22328d71SChristoph Hellwig item = container_of(mru, fstrm_item_t, mru); 5532a82b8beSDavid Chinner ASSERT(ip == item->ip); 5542a82b8beSDavid Chinner ag = item->ag; 5552a82b8beSDavid Chinner ref = xfs_filestream_peek_ag(ip->i_mount, ag); 556*22328d71SChristoph Hellwig xfs_mru_cache_done(mp->m_filestream); 5572a82b8beSDavid Chinner 5582a82b8beSDavid Chinner TRACE_LOOKUP(ip->i_mount, ip, item->pip, ag, ref); 5592a82b8beSDavid Chinner return ag; 5602a82b8beSDavid Chinner } 5612a82b8beSDavid Chinner 5622a82b8beSDavid Chinner /* 5632a82b8beSDavid Chinner * xfs_filestream_associate() should only be called to associate a regular file 5642a82b8beSDavid Chinner * with its parent directory. Calling it with a child directory isn't 5652a82b8beSDavid Chinner * appropriate because filestreams don't apply to entire directory hierarchies. 5662a82b8beSDavid Chinner * Creating a file in a child directory of an existing filestream directory 5672a82b8beSDavid Chinner * starts a new filestream with its own allocation group association. 5682a82b8beSDavid Chinner * 5692a82b8beSDavid Chinner * Returns < 0 on error, 0 if successful association occurred, > 0 if 5702a82b8beSDavid Chinner * we failed to get an association because of locking issues. 5712a82b8beSDavid Chinner */ 5722a82b8beSDavid Chinner int 5732a82b8beSDavid Chinner xfs_filestream_associate( 5742a82b8beSDavid Chinner xfs_inode_t *pip, 5752a82b8beSDavid Chinner xfs_inode_t *ip) 5762a82b8beSDavid Chinner { 577*22328d71SChristoph Hellwig struct xfs_mru_cache_elem *mru; 5782a82b8beSDavid Chinner xfs_mount_t *mp; 5792a82b8beSDavid Chinner fstrm_item_t *item; 5802a82b8beSDavid Chinner xfs_agnumber_t ag, rotorstep, startag; 5812a82b8beSDavid Chinner int err = 0; 5822a82b8beSDavid Chinner 58303209378SAl Viro ASSERT(S_ISDIR(pip->i_d.di_mode)); 58403209378SAl Viro ASSERT(S_ISREG(ip->i_d.di_mode)); 58503209378SAl Viro if (!S_ISDIR(pip->i_d.di_mode) || !S_ISREG(ip->i_d.di_mode)) 5862a82b8beSDavid Chinner return -EINVAL; 5872a82b8beSDavid Chinner 5882a82b8beSDavid Chinner mp = pip->i_mount; 5892a82b8beSDavid Chinner 5902a82b8beSDavid Chinner /* 5912a82b8beSDavid Chinner * We have a problem, Houston. 5922a82b8beSDavid Chinner * 5932a82b8beSDavid Chinner * Taking the iolock here violates inode locking order - we already 5942a82b8beSDavid Chinner * hold the ilock. Hence if we block getting this lock we may never 5952a82b8beSDavid Chinner * wake. Unfortunately, that means if we can't get the lock, we're 5962a82b8beSDavid Chinner * screwed in terms of getting a stream association - we can't spin 5972a82b8beSDavid Chinner * waiting for the lock because someone else is waiting on the lock we 5982a82b8beSDavid Chinner * hold and we cannot drop that as we are in a transaction here. 5992a82b8beSDavid Chinner * 600075fe102SChristoph Hellwig * Lucky for us, this inversion is not a problem because it's a 601075fe102SChristoph Hellwig * directory inode that we are trying to lock here. 6022a82b8beSDavid Chinner * 6032a82b8beSDavid Chinner * So, if we can't get the iolock without sleeping then just give up 6042a82b8beSDavid Chinner */ 6051c1c6ebcSDave Chinner if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL)) 6062a82b8beSDavid Chinner return 1; 6072a82b8beSDavid Chinner 6082a82b8beSDavid Chinner /* If the parent directory is already in the cache, use its AG. */ 609*22328d71SChristoph Hellwig mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino); 610*22328d71SChristoph Hellwig if (mru) { 611*22328d71SChristoph Hellwig item = container_of(mru, fstrm_item_t, mru); 612*22328d71SChristoph Hellwig 6132a82b8beSDavid Chinner ASSERT(item->ip == pip); 6142a82b8beSDavid Chinner ag = item->ag; 615*22328d71SChristoph Hellwig xfs_mru_cache_done(mp->m_filestream); 6162a82b8beSDavid Chinner 6172a82b8beSDavid Chinner TRACE_LOOKUP(mp, pip, pip, ag, xfs_filestream_peek_ag(mp, ag)); 6182a82b8beSDavid Chinner err = _xfs_filestream_update_ag(ip, pip, ag); 6192a82b8beSDavid Chinner 6202a82b8beSDavid Chinner goto exit; 6212a82b8beSDavid Chinner } 6222a82b8beSDavid Chinner 6232a82b8beSDavid Chinner /* 6242a82b8beSDavid Chinner * Set the starting AG using the rotor for inode32, otherwise 6252a82b8beSDavid Chinner * use the directory inode's AG. 6262a82b8beSDavid Chinner */ 6272a82b8beSDavid Chinner if (mp->m_flags & XFS_MOUNT_32BITINODES) { 6282a82b8beSDavid Chinner rotorstep = xfs_rotorstep; 6292a82b8beSDavid Chinner startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount; 6302a82b8beSDavid Chinner mp->m_agfrotor = (mp->m_agfrotor + 1) % 6312a82b8beSDavid Chinner (mp->m_sb.sb_agcount * rotorstep); 6322a82b8beSDavid Chinner } else 6332a82b8beSDavid Chinner startag = XFS_INO_TO_AGNO(mp, pip->i_ino); 6342a82b8beSDavid Chinner 6352a82b8beSDavid Chinner /* Pick a new AG for the parent inode starting at startag. */ 6362a82b8beSDavid Chinner err = _xfs_filestream_pick_ag(mp, startag, &ag, 0, 0); 6372a82b8beSDavid Chinner if (err || ag == NULLAGNUMBER) 6382a82b8beSDavid Chinner goto exit_did_pick; 6392a82b8beSDavid Chinner 6402a82b8beSDavid Chinner /* Associate the parent inode with the AG. */ 6412a82b8beSDavid Chinner err = _xfs_filestream_update_ag(pip, NULL, ag); 6422a82b8beSDavid Chinner if (err) 6432a82b8beSDavid Chinner goto exit_did_pick; 6442a82b8beSDavid Chinner 6452a82b8beSDavid Chinner /* Associate the file inode with the AG. */ 6462a82b8beSDavid Chinner err = _xfs_filestream_update_ag(ip, pip, ag); 6472a82b8beSDavid Chinner if (err) 6482a82b8beSDavid Chinner goto exit_did_pick; 6492a82b8beSDavid Chinner 6502a82b8beSDavid Chinner TRACE_ASSOCIATE(mp, ip, pip, ag, xfs_filestream_peek_ag(mp, ag)); 6512a82b8beSDavid Chinner 6522a82b8beSDavid Chinner exit_did_pick: 6532a82b8beSDavid Chinner /* 6542a82b8beSDavid Chinner * If _xfs_filestream_pick_ag() returned a valid AG, remove the 6552a82b8beSDavid Chinner * reference it took on it, since the file and directory will have taken 6562a82b8beSDavid Chinner * their own now if they were successfully cached. 6572a82b8beSDavid Chinner */ 6582a82b8beSDavid Chinner if (ag != NULLAGNUMBER) 6592a82b8beSDavid Chinner xfs_filestream_put_ag(mp, ag); 6602a82b8beSDavid Chinner 6612a82b8beSDavid Chinner exit: 6622a82b8beSDavid Chinner xfs_iunlock(pip, XFS_IOLOCK_EXCL); 6632a82b8beSDavid Chinner return -err; 6642a82b8beSDavid Chinner } 6652a82b8beSDavid Chinner 6662a82b8beSDavid Chinner /* 6672a82b8beSDavid Chinner * Pick a new allocation group for the current file and its file stream. This 6682a82b8beSDavid Chinner * function is called by xfs_bmap_filestreams() with the mount point's per-ag 6692a82b8beSDavid Chinner * lock held. 6702a82b8beSDavid Chinner */ 6712a82b8beSDavid Chinner int 6722a82b8beSDavid Chinner xfs_filestream_new_ag( 67368988114SDave Chinner struct xfs_bmalloca *ap, 6742a82b8beSDavid Chinner xfs_agnumber_t *agp) 6752a82b8beSDavid Chinner { 676*22328d71SChristoph Hellwig struct xfs_mru_cache_elem *mru, *mru2; 6772a82b8beSDavid Chinner int flags, err; 6782a82b8beSDavid Chinner xfs_inode_t *ip, *pip = NULL; 6792a82b8beSDavid Chinner xfs_mount_t *mp; 6802a82b8beSDavid Chinner xfs_extlen_t minlen; 6812a82b8beSDavid Chinner fstrm_item_t *dir, *file; 6822a82b8beSDavid Chinner xfs_agnumber_t ag = NULLAGNUMBER; 6832a82b8beSDavid Chinner 6842a82b8beSDavid Chinner ip = ap->ip; 6852a82b8beSDavid Chinner mp = ip->i_mount; 6863a75667eSDave Chinner minlen = ap->length; 6872a82b8beSDavid Chinner *agp = NULLAGNUMBER; 6882a82b8beSDavid Chinner 6892a82b8beSDavid Chinner /* 6902a82b8beSDavid Chinner * Look for the file in the cache, removing it if it's found. Doing 6912a82b8beSDavid Chinner * this allows it to be held across the dir lookup that follows. 6922a82b8beSDavid Chinner */ 693*22328d71SChristoph Hellwig mru = xfs_mru_cache_remove(mp->m_filestream, ip->i_ino); 694*22328d71SChristoph Hellwig if (mru) { 695*22328d71SChristoph Hellwig file = container_of(mru, fstrm_item_t, mru); 6962a82b8beSDavid Chinner ASSERT(ip == file->ip); 6972a82b8beSDavid Chinner 6982a82b8beSDavid Chinner /* Save the file's parent inode and old AG number for later. */ 6992a82b8beSDavid Chinner pip = file->pip; 7002a82b8beSDavid Chinner ag = file->ag; 7012a82b8beSDavid Chinner 7022a82b8beSDavid Chinner /* Look for the file's directory in the cache. */ 703*22328d71SChristoph Hellwig mru2 = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino); 704*22328d71SChristoph Hellwig if (mru2) { 705*22328d71SChristoph Hellwig dir = container_of(mru2, fstrm_item_t, mru); 7062a82b8beSDavid Chinner ASSERT(pip == dir->ip); 7072a82b8beSDavid Chinner 7082a82b8beSDavid Chinner /* 7092a82b8beSDavid Chinner * If the directory has already moved on to a new AG, 7102a82b8beSDavid Chinner * use that AG as the new AG for the file. Don't 7112a82b8beSDavid Chinner * forget to twiddle the AG refcounts to match the 7122a82b8beSDavid Chinner * movement. 7132a82b8beSDavid Chinner */ 7142a82b8beSDavid Chinner if (dir->ag != file->ag) { 7152a82b8beSDavid Chinner xfs_filestream_put_ag(mp, file->ag); 7162a82b8beSDavid Chinner xfs_filestream_get_ag(mp, dir->ag); 7172a82b8beSDavid Chinner *agp = file->ag = dir->ag; 7182a82b8beSDavid Chinner } 7192a82b8beSDavid Chinner 720*22328d71SChristoph Hellwig xfs_mru_cache_done(mp->m_filestream); 7212a82b8beSDavid Chinner } 7222a82b8beSDavid Chinner 7232a82b8beSDavid Chinner /* 7242a82b8beSDavid Chinner * Put the file back in the cache. If this fails, the free 7252a82b8beSDavid Chinner * function needs to be called to tidy up in the same way as if 7262a82b8beSDavid Chinner * the item had simply expired from the cache. 7272a82b8beSDavid Chinner */ 728*22328d71SChristoph Hellwig err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, mru); 7292a82b8beSDavid Chinner if (err) { 730*22328d71SChristoph Hellwig xfs_fstrm_free_func(mru); 7312a82b8beSDavid Chinner return err; 7322a82b8beSDavid Chinner } 7332a82b8beSDavid Chinner 7342a82b8beSDavid Chinner /* 7352a82b8beSDavid Chinner * If the file's AG was moved to the directory's new AG, there's 7362a82b8beSDavid Chinner * nothing more to be done. 7372a82b8beSDavid Chinner */ 7382a82b8beSDavid Chinner if (*agp != NULLAGNUMBER) { 7392a82b8beSDavid Chinner TRACE_MOVEAG(mp, ip, pip, 7402a82b8beSDavid Chinner ag, xfs_filestream_peek_ag(mp, ag), 7412a82b8beSDavid Chinner *agp, xfs_filestream_peek_ag(mp, *agp)); 7422a82b8beSDavid Chinner return 0; 7432a82b8beSDavid Chinner } 7442a82b8beSDavid Chinner } 7452a82b8beSDavid Chinner 7462a82b8beSDavid Chinner /* 7472a82b8beSDavid Chinner * If the file's parent directory is known, take its iolock in exclusive 7482a82b8beSDavid Chinner * mode to prevent two sibling files from racing each other to migrate 7492a82b8beSDavid Chinner * themselves and their parent to different AGs. 750785ce418SChristoph Hellwig * 751785ce418SChristoph Hellwig * Note that we lock the parent directory iolock inside the child 752785ce418SChristoph Hellwig * iolock here. That's fine as we never hold both parent and child 753785ce418SChristoph Hellwig * iolock in any other place. This is different from the ilock, 754785ce418SChristoph Hellwig * which requires locking of the child after the parent for namespace 755785ce418SChristoph Hellwig * operations. 7562a82b8beSDavid Chinner */ 7572a82b8beSDavid Chinner if (pip) 758785ce418SChristoph Hellwig xfs_ilock(pip, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); 7592a82b8beSDavid Chinner 7602a82b8beSDavid Chinner /* 7612a82b8beSDavid Chinner * A new AG needs to be found for the file. If the file's parent 7622a82b8beSDavid Chinner * directory is also known, it will be moved to the new AG as well to 7632a82b8beSDavid Chinner * ensure that files created inside it in future use the new AG. 7642a82b8beSDavid Chinner */ 7652a82b8beSDavid Chinner ag = (ag == NULLAGNUMBER) ? 0 : (ag + 1) % mp->m_sb.sb_agcount; 7662a82b8beSDavid Chinner flags = (ap->userdata ? XFS_PICK_USERDATA : 0) | 7670937e0fdSDave Chinner (ap->flist->xbf_low ? XFS_PICK_LOWSPACE : 0); 7682a82b8beSDavid Chinner 7692a82b8beSDavid Chinner err = _xfs_filestream_pick_ag(mp, ag, agp, flags, minlen); 7702a82b8beSDavid Chinner if (err || *agp == NULLAGNUMBER) 7712a82b8beSDavid Chinner goto exit; 7722a82b8beSDavid Chinner 7732a82b8beSDavid Chinner /* 7742a82b8beSDavid Chinner * If the file wasn't found in the file cache, then its parent directory 7752a82b8beSDavid Chinner * inode isn't known. For this to have happened, the file must either 7762a82b8beSDavid Chinner * be pre-existing, or it was created long enough ago that its cache 7772a82b8beSDavid Chinner * entry has expired. This isn't the sort of usage that the filestreams 7782a82b8beSDavid Chinner * allocator is trying to optimise, so there's no point trying to track 7792a82b8beSDavid Chinner * its new AG somehow in the filestream data structures. 7802a82b8beSDavid Chinner */ 7812a82b8beSDavid Chinner if (!pip) { 7822a82b8beSDavid Chinner TRACE_ORPHAN(mp, ip, *agp); 7832a82b8beSDavid Chinner goto exit; 7842a82b8beSDavid Chinner } 7852a82b8beSDavid Chinner 7862a82b8beSDavid Chinner /* Associate the parent inode with the AG. */ 7872a82b8beSDavid Chinner err = _xfs_filestream_update_ag(pip, NULL, *agp); 7882a82b8beSDavid Chinner if (err) 7892a82b8beSDavid Chinner goto exit; 7902a82b8beSDavid Chinner 7912a82b8beSDavid Chinner /* Associate the file inode with the AG. */ 7922a82b8beSDavid Chinner err = _xfs_filestream_update_ag(ip, pip, *agp); 7932a82b8beSDavid Chinner if (err) 7942a82b8beSDavid Chinner goto exit; 7952a82b8beSDavid Chinner 7962a82b8beSDavid Chinner TRACE_MOVEAG(mp, ip, pip, NULLAGNUMBER, 0, 7972a82b8beSDavid Chinner *agp, xfs_filestream_peek_ag(mp, *agp)); 7982a82b8beSDavid Chinner 7992a82b8beSDavid Chinner exit: 8002a82b8beSDavid Chinner /* 8012a82b8beSDavid Chinner * If _xfs_filestream_pick_ag() returned a valid AG, remove the 8022a82b8beSDavid Chinner * reference it took on it, since the file and directory will have taken 8032a82b8beSDavid Chinner * their own now if they were successfully cached. 8042a82b8beSDavid Chinner */ 8052a82b8beSDavid Chinner if (*agp != NULLAGNUMBER) 8062a82b8beSDavid Chinner xfs_filestream_put_ag(mp, *agp); 8072a82b8beSDavid Chinner else 8082a82b8beSDavid Chinner *agp = 0; 8092a82b8beSDavid Chinner 8102a82b8beSDavid Chinner if (pip) 8112a82b8beSDavid Chinner xfs_iunlock(pip, XFS_IOLOCK_EXCL); 8122a82b8beSDavid Chinner 8132a82b8beSDavid Chinner return err; 8142a82b8beSDavid Chinner } 8152a82b8beSDavid Chinner 8162a82b8beSDavid Chinner /* 8172a82b8beSDavid Chinner * Remove an association between an inode and a filestream object. 8182a82b8beSDavid Chinner * Typically this is done on last close of an unlinked file. 8192a82b8beSDavid Chinner */ 8202a82b8beSDavid Chinner void 8212a82b8beSDavid Chinner xfs_filestream_deassociate( 8222a82b8beSDavid Chinner xfs_inode_t *ip) 8232a82b8beSDavid Chinner { 824*22328d71SChristoph Hellwig xfs_mru_cache_delete(ip->i_mount->m_filestream, ip->i_ino); 8252a82b8beSDavid Chinner } 826