10b61f8a4SDave Chinner // SPDX-License-Identifier: GPL-2.0 22a82b8beSDavid Chinner /* 32a82b8beSDavid Chinner * Copyright (c) 2006-2007 Silicon Graphics, Inc. 42cd2ef6aSChristoph Hellwig * Copyright (c) 2014 Christoph Hellwig. 52a82b8beSDavid Chinner * All Rights Reserved. 62a82b8beSDavid Chinner */ 72a82b8beSDavid Chinner #include "xfs.h" 85467b34bSDarrick J. Wong #include "xfs_shared.h" 9a4fbe6abSDave Chinner #include "xfs_format.h" 10239880efSDave Chinner #include "xfs_log_format.h" 11239880efSDave Chinner #include "xfs_trans_resv.h" 12239880efSDave Chinner #include "xfs_mount.h" 132a82b8beSDavid Chinner #include "xfs_inode.h" 142a82b8beSDavid Chinner #include "xfs_bmap.h" 158f7747adSDave Chinner #include "xfs_bmap_util.h" 162a82b8beSDavid Chinner #include "xfs_alloc.h" 172a82b8beSDavid Chinner #include "xfs_mru_cache.h" 180b1b213fSChristoph Hellwig #include "xfs_trace.h" 199bbafc71SDave Chinner #include "xfs_ag.h" 203fd129b6SDarrick J. Wong #include "xfs_ag_resv.h" 213e3673e3SBrian Foster #include "xfs_trans.h" 22f368b29bSDarrick J. Wong #include "xfs_filestream.h" 232a82b8beSDavid Chinner 242cd2ef6aSChristoph Hellwig struct xfs_fstrm_item { 2522328d71SChristoph Hellwig struct xfs_mru_cache_elem mru; 263054faceSDave Chinner struct xfs_perag *pag; /* AG in use for this directory */ 272cd2ef6aSChristoph Hellwig }; 282cd2ef6aSChristoph Hellwig 292cd2ef6aSChristoph Hellwig enum xfs_fstrm_alloc { 302cd2ef6aSChristoph Hellwig XFS_PICK_USERDATA = 1, 312cd2ef6aSChristoph Hellwig XFS_PICK_LOWSPACE = 2, 322cd2ef6aSChristoph Hellwig }; 332a82b8beSDavid Chinner 342cd2ef6aSChristoph Hellwig static void 352cd2ef6aSChristoph Hellwig xfs_fstrm_free_func( 367fcd3efaSChristoph Hellwig void *data, 372cd2ef6aSChristoph Hellwig struct xfs_mru_cache_elem *mru) 382cd2ef6aSChristoph Hellwig { 392cd2ef6aSChristoph Hellwig struct xfs_fstrm_item *item = 402cd2ef6aSChristoph Hellwig container_of(mru, struct xfs_fstrm_item, mru); 413054faceSDave Chinner struct xfs_perag *pag = item->pag; 422cd2ef6aSChristoph Hellwig 43*571e2592SDave Chinner trace_xfs_filestream_free(pag, mru->key); 443054faceSDave Chinner atomic_dec(&pag->pagf_fstrms); 453054faceSDave Chinner xfs_perag_rele(pag); 462cd2ef6aSChristoph Hellwig 471919addaSChristoph Hellwig kmem_free(item); 482cd2ef6aSChristoph Hellwig } 492cd2ef6aSChristoph Hellwig 502a82b8beSDavid Chinner /* 512a82b8beSDavid Chinner * Scan the AGs starting at startag looking for an AG that isn't in use and has 522a82b8beSDavid Chinner * at least minlen blocks free. 532a82b8beSDavid Chinner */ 542a82b8beSDavid Chinner static int 552cd2ef6aSChristoph Hellwig xfs_filestream_pick_ag( 562cd2ef6aSChristoph Hellwig struct xfs_inode *ip, 572a82b8beSDavid Chinner xfs_agnumber_t *agp, 582a82b8beSDavid Chinner int flags, 59ba34de8dSDave Chinner xfs_extlen_t *longest) 602a82b8beSDavid Chinner { 612cd2ef6aSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 622cd2ef6aSChristoph Hellwig struct xfs_fstrm_item *item; 632cd2ef6aSChristoph Hellwig struct xfs_perag *pag; 643054faceSDave Chinner struct xfs_perag *max_pag = NULL; 65ba34de8dSDave Chinner xfs_extlen_t minlen = *longest; 66ba34de8dSDave Chinner xfs_extlen_t free = 0, minfree, maxfree = 0; 67eb70aa2dSDave Chinner xfs_agnumber_t start_agno = *agp; 68eb70aa2dSDave Chinner xfs_agnumber_t agno; 69eb70aa2dSDave Chinner int err, trylock; 702cd2ef6aSChristoph Hellwig 71c19b3b05SDave Chinner ASSERT(S_ISDIR(VFS_I(ip)->i_mode)); 722a82b8beSDavid Chinner 732a82b8beSDavid Chinner /* 2% of an AG's blocks must be free for it to be chosen. */ 742a82b8beSDavid Chinner minfree = mp->m_sb.sb_agblocks / 50; 752a82b8beSDavid Chinner 762a82b8beSDavid Chinner *agp = NULLAGNUMBER; 772a82b8beSDavid Chinner 782a82b8beSDavid Chinner /* For the first pass, don't sleep trying to init the per-AG. */ 792a82b8beSDavid Chinner trylock = XFS_ALLOC_FLAG_TRYLOCK; 802a82b8beSDavid Chinner 81eb70aa2dSDave Chinner restart: 82eb70aa2dSDave Chinner for_each_perag_wrap(mp, start_agno, agno, pag) { 83*571e2592SDave Chinner trace_xfs_filestream_scan(pag, ip->i_ino); 84ba34de8dSDave Chinner *longest = 0; 85ba34de8dSDave Chinner err = xfs_bmap_longest_free_extent(pag, NULL, longest); 86f48e2df8SDarrick J. Wong if (err) { 873054faceSDave Chinner xfs_perag_rele(pag); 8805cf492aSDave Chinner if (err != -EAGAIN) 893054faceSDave Chinner break; 90f48e2df8SDarrick J. Wong /* Couldn't lock the AGF, skip this AG. */ 91eb70aa2dSDave Chinner err = 0; 92f650df71SBrian Foster goto next_ag; 932a82b8beSDavid Chinner } 942a82b8beSDavid Chinner 952a82b8beSDavid Chinner /* Keep track of the AG with the most free blocks. */ 962a82b8beSDavid Chinner if (pag->pagf_freeblks > maxfree) { 972a82b8beSDavid Chinner maxfree = pag->pagf_freeblks; 983054faceSDave Chinner if (max_pag) 993054faceSDave Chinner xfs_perag_rele(max_pag); 1003054faceSDave Chinner atomic_inc(&pag->pag_active_ref); 1013054faceSDave Chinner max_pag = pag; 1022a82b8beSDavid Chinner } 1032a82b8beSDavid Chinner 1042a82b8beSDavid Chinner /* 1052a82b8beSDavid Chinner * The AG reference count does two things: it enforces mutual 1062a82b8beSDavid Chinner * exclusion when examining the suitability of an AG in this 1072a82b8beSDavid Chinner * loop, and it guards against two filestreams being established 1082a82b8beSDavid Chinner * in the same AG as each other. 1092a82b8beSDavid Chinner */ 110eb70aa2dSDave Chinner if (atomic_inc_return(&pag->pagf_fstrms) <= 1) { 111ba34de8dSDave Chinner if (((minlen && *longest >= minlen) || 1122a82b8beSDavid Chinner (!minlen && pag->pagf_freeblks >= minfree)) && 1137ac2ff8bSDave Chinner (!xfs_perag_prefers_metadata(pag) || 1147ac2ff8bSDave Chinner !(flags & XFS_PICK_USERDATA) || 1152a82b8beSDavid Chinner (flags & XFS_PICK_LOWSPACE))) { 1162a82b8beSDavid Chinner /* Break out, retaining the reference on the AG. */ 1172a82b8beSDavid Chinner free = pag->pagf_freeblks; 1182a82b8beSDavid Chinner break; 1192a82b8beSDavid Chinner } 120eb70aa2dSDave Chinner } 1212a82b8beSDavid Chinner 1222a82b8beSDavid Chinner /* Drop the reference on this AG, it's not usable. */ 1233054faceSDave Chinner atomic_dec(&pag->pagf_fstrms); 124eb70aa2dSDave Chinner } 1252a82b8beSDavid Chinner 126eb70aa2dSDave Chinner if (err) { 127eb70aa2dSDave Chinner xfs_perag_rele(pag); 128eb70aa2dSDave Chinner if (max_pag) 129eb70aa2dSDave Chinner xfs_perag_rele(max_pag); 130eb70aa2dSDave Chinner return err; 131eb70aa2dSDave Chinner } 1322a82b8beSDavid Chinner 133eb70aa2dSDave Chinner if (!pag) { 13476b47e52SDave Chinner /* Allow sleeping in xfs_alloc_read_agf() on the 2nd pass. */ 135eb70aa2dSDave Chinner if (trylock) { 1362a82b8beSDavid Chinner trylock = 0; 137eb70aa2dSDave Chinner goto restart; 1382a82b8beSDavid Chinner } 1392a82b8beSDavid Chinner 1402a82b8beSDavid Chinner /* Finally, if lowspace wasn't set, set it for the 3rd pass. */ 1412a82b8beSDavid Chinner if (!(flags & XFS_PICK_LOWSPACE)) { 1422a82b8beSDavid Chinner flags |= XFS_PICK_LOWSPACE; 143eb70aa2dSDave Chinner goto restart; 1442a82b8beSDavid Chinner } 1452a82b8beSDavid Chinner 1462a82b8beSDavid Chinner /* 147eb70aa2dSDave Chinner * No unassociated AGs are available, so select the AG with the 148eb70aa2dSDave Chinner * most free space, regardless of whether it's already in use by 149eb70aa2dSDave Chinner * another filestream. It none suit, return NULLAGNUMBER. 1502a82b8beSDavid Chinner */ 151eb70aa2dSDave Chinner if (!max_pag) { 1523054faceSDave Chinner *agp = NULLAGNUMBER; 153*571e2592SDave Chinner trace_xfs_filestream_pick(ip, NULL, free); 1542a82b8beSDavid Chinner return 0; 1553054faceSDave Chinner } 156eb70aa2dSDave Chinner pag = max_pag; 157eb70aa2dSDave Chinner free = maxfree; 158eb70aa2dSDave Chinner atomic_inc(&pag->pagf_fstrms); 159eb70aa2dSDave Chinner } else if (max_pag) { 160eb70aa2dSDave Chinner xfs_perag_rele(max_pag); 161eb70aa2dSDave Chinner } 162eb70aa2dSDave Chinner 163*571e2592SDave Chinner trace_xfs_filestream_pick(ip, pag, free); 1642a82b8beSDavid Chinner 1652451337dSDave Chinner err = -ENOMEM; 1661919addaSChristoph Hellwig item = kmem_alloc(sizeof(*item), KM_MAYFAIL); 1672a82b8beSDavid Chinner if (!item) 1682cd2ef6aSChristoph Hellwig goto out_put_ag; 1692a82b8beSDavid Chinner 1703054faceSDave Chinner item->pag = pag; 1712a82b8beSDavid Chinner 17222328d71SChristoph Hellwig err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru); 1732a82b8beSDavid Chinner if (err) { 1742451337dSDave Chinner if (err == -EEXIST) 1752cd2ef6aSChristoph Hellwig err = 0; 1762cd2ef6aSChristoph Hellwig goto out_free_item; 1772cd2ef6aSChristoph Hellwig } 1782cd2ef6aSChristoph Hellwig 1793054faceSDave Chinner *agp = pag->pag_agno; 1802cd2ef6aSChristoph Hellwig return 0; 1812cd2ef6aSChristoph Hellwig 1822cd2ef6aSChristoph Hellwig out_free_item: 1831919addaSChristoph Hellwig kmem_free(item); 1842cd2ef6aSChristoph Hellwig out_put_ag: 1853054faceSDave Chinner atomic_dec(&pag->pagf_fstrms); 1863054faceSDave Chinner xfs_perag_rele(pag); 1872a82b8beSDavid Chinner return err; 1882a82b8beSDavid Chinner } 1892a82b8beSDavid Chinner 1902cd2ef6aSChristoph Hellwig static struct xfs_inode * 1912cd2ef6aSChristoph Hellwig xfs_filestream_get_parent( 1922cd2ef6aSChristoph Hellwig struct xfs_inode *ip) 1932a82b8beSDavid Chinner { 1942cd2ef6aSChristoph Hellwig struct inode *inode = VFS_I(ip), *dir = NULL; 1952cd2ef6aSChristoph Hellwig struct dentry *dentry, *parent; 1962a82b8beSDavid Chinner 1972cd2ef6aSChristoph Hellwig dentry = d_find_alias(inode); 1982cd2ef6aSChristoph Hellwig if (!dentry) 1992cd2ef6aSChristoph Hellwig goto out; 2002a82b8beSDavid Chinner 2012cd2ef6aSChristoph Hellwig parent = dget_parent(dentry); 2022cd2ef6aSChristoph Hellwig if (!parent) 2032cd2ef6aSChristoph Hellwig goto out_dput; 2042a82b8beSDavid Chinner 2052b0143b5SDavid Howells dir = igrab(d_inode(parent)); 2062cd2ef6aSChristoph Hellwig dput(parent); 2072a82b8beSDavid Chinner 2082cd2ef6aSChristoph Hellwig out_dput: 2092cd2ef6aSChristoph Hellwig dput(dentry); 2102cd2ef6aSChristoph Hellwig out: 2112cd2ef6aSChristoph Hellwig return dir ? XFS_I(dir) : NULL; 2122a82b8beSDavid Chinner } 2132a82b8beSDavid Chinner 2142a82b8beSDavid Chinner /* 215f38b46bbSDave Chinner * Lookup the mru cache for an existing association. If one exists and we can 216f38b46bbSDave Chinner * use it, return with the agno and blen indicating that the allocation will 217f38b46bbSDave Chinner * proceed with that association. 218f38b46bbSDave Chinner * 219f38b46bbSDave Chinner * If we have no association, or we cannot use the current one and have to 220f38b46bbSDave Chinner * destroy it, return with blen = 0 and agno pointing at the next agno to try. 221f38b46bbSDave Chinner */ 222f38b46bbSDave Chinner int 223f38b46bbSDave Chinner xfs_filestream_select_ag_mru( 224f38b46bbSDave Chinner struct xfs_bmalloca *ap, 225f38b46bbSDave Chinner struct xfs_alloc_arg *args, 226f38b46bbSDave Chinner struct xfs_inode *pip, 227f38b46bbSDave Chinner xfs_agnumber_t *agno, 228f38b46bbSDave Chinner xfs_extlen_t *blen) 229f38b46bbSDave Chinner { 230f38b46bbSDave Chinner struct xfs_mount *mp = ap->ip->i_mount; 231f38b46bbSDave Chinner struct xfs_perag *pag; 232f38b46bbSDave Chinner struct xfs_mru_cache_elem *mru; 233f38b46bbSDave Chinner int error; 234f38b46bbSDave Chinner 235f38b46bbSDave Chinner mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino); 236f38b46bbSDave Chinner if (!mru) 237f38b46bbSDave Chinner goto out_default_agno; 238f38b46bbSDave Chinner 2393054faceSDave Chinner pag = container_of(mru, struct xfs_fstrm_item, mru)->pag; 240f38b46bbSDave Chinner xfs_mru_cache_done(mp->m_filestream); 241f38b46bbSDave Chinner 242*571e2592SDave Chinner trace_xfs_filestream_lookup(pag, ap->ip->i_ino); 243f38b46bbSDave Chinner 2443054faceSDave Chinner ap->blkno = XFS_AGB_TO_FSB(args->mp, pag->pag_agno, 0); 245f38b46bbSDave Chinner xfs_bmap_adjacent(ap); 246f38b46bbSDave Chinner 247f38b46bbSDave Chinner error = xfs_bmap_longest_free_extent(pag, args->tp, blen); 248f38b46bbSDave Chinner if (error) { 249f38b46bbSDave Chinner if (error != -EAGAIN) 250f38b46bbSDave Chinner return error; 251f38b46bbSDave Chinner *blen = 0; 252f38b46bbSDave Chinner } 253f38b46bbSDave Chinner 254f38b46bbSDave Chinner /* 255f38b46bbSDave Chinner * We are done if there's still enough contiguous free space to succeed. 256f38b46bbSDave Chinner */ 2573054faceSDave Chinner *agno = pag->pag_agno; 258f38b46bbSDave Chinner if (*blen >= args->maxlen) 259f38b46bbSDave Chinner return 0; 260f38b46bbSDave Chinner 261f38b46bbSDave Chinner /* Changing parent AG association now, so remove the existing one. */ 262f38b46bbSDave Chinner mru = xfs_mru_cache_remove(mp->m_filestream, pip->i_ino); 263f38b46bbSDave Chinner if (mru) { 264f38b46bbSDave Chinner struct xfs_fstrm_item *item = 265f38b46bbSDave Chinner container_of(mru, struct xfs_fstrm_item, mru); 2663054faceSDave Chinner *agno = (item->pag->pag_agno + 1) % mp->m_sb.sb_agcount; 267f38b46bbSDave Chinner xfs_fstrm_free_func(mp, mru); 268f38b46bbSDave Chinner return 0; 269f38b46bbSDave Chinner } 270f38b46bbSDave Chinner 271f38b46bbSDave Chinner out_default_agno: 272f38b46bbSDave Chinner if (xfs_is_inode32(mp)) { 273f38b46bbSDave Chinner xfs_agnumber_t rotorstep = xfs_rotorstep; 274f38b46bbSDave Chinner *agno = (mp->m_agfrotor / rotorstep) % 275f38b46bbSDave Chinner mp->m_sb.sb_agcount; 276f38b46bbSDave Chinner mp->m_agfrotor = (mp->m_agfrotor + 1) % 277f38b46bbSDave Chinner (mp->m_sb.sb_agcount * rotorstep); 278f38b46bbSDave Chinner return 0; 279f38b46bbSDave Chinner } 280f38b46bbSDave Chinner *agno = XFS_INO_TO_AGNO(mp, pip->i_ino); 281f38b46bbSDave Chinner return 0; 282f38b46bbSDave Chinner 283f38b46bbSDave Chinner } 284f38b46bbSDave Chinner 285f38b46bbSDave Chinner /* 286a52dc2adSDave Chinner * Search for an allocation group with a single extent large enough for 287f38b46bbSDave Chinner * the request. If one isn't found, then adjust the minimum allocation 288f38b46bbSDave Chinner * size to the largest space found. 289a52dc2adSDave Chinner */ 290a52dc2adSDave Chinner int 291a52dc2adSDave Chinner xfs_filestream_select_ag( 2928f7747adSDave Chinner struct xfs_bmalloca *ap, 2938f7747adSDave Chinner struct xfs_alloc_arg *args, 2948f7747adSDave Chinner xfs_extlen_t *blen) 2958f7747adSDave Chinner { 2968f7747adSDave Chinner struct xfs_mount *mp = ap->ip->i_mount; 297a52dc2adSDave Chinner struct xfs_inode *pip = NULL; 298f38b46bbSDave Chinner xfs_agnumber_t agno; 299ba34de8dSDave Chinner int flags = 0; 300f38b46bbSDave Chinner int error; 3018f7747adSDave Chinner 3028f7747adSDave Chinner args->total = ap->total; 303a52dc2adSDave Chinner *blen = 0; 3048f7747adSDave Chinner 305a52dc2adSDave Chinner pip = xfs_filestream_get_parent(ap->ip); 306a52dc2adSDave Chinner if (!pip) { 307a52dc2adSDave Chinner agno = 0; 308ba34de8dSDave Chinner goto out_select; 309a52dc2adSDave Chinner } 3108f7747adSDave Chinner 311f38b46bbSDave Chinner error = xfs_filestream_select_ag_mru(ap, args, pip, &agno, blen); 312f38b46bbSDave Chinner if (error || *blen >= args->maxlen) 313f38b46bbSDave Chinner goto out_rele; 314a52dc2adSDave Chinner 315a52dc2adSDave Chinner ap->blkno = XFS_AGB_TO_FSB(args->mp, agno, 0); 316a52dc2adSDave Chinner xfs_bmap_adjacent(ap); 317a52dc2adSDave Chinner 318a52dc2adSDave Chinner /* 319a52dc2adSDave Chinner * If there is very little free space before we start a filestreams 320a52dc2adSDave Chinner * allocation, we're almost guaranteed to fail to find a better AG with 321a52dc2adSDave Chinner * larger free space available so we don't even try. 322a52dc2adSDave Chinner */ 323a52dc2adSDave Chinner if (ap->tp->t_flags & XFS_TRANS_LOWMODE) 324ba34de8dSDave Chinner goto out_select; 3258f7747adSDave Chinner 326ba34de8dSDave Chinner if (ap->datatype & XFS_ALLOC_USERDATA) 327ba34de8dSDave Chinner flags |= XFS_PICK_USERDATA; 328ba34de8dSDave Chinner if (ap->tp->t_flags & XFS_TRANS_LOWMODE) 329ba34de8dSDave Chinner flags |= XFS_PICK_LOWSPACE; 330ba34de8dSDave Chinner 331ba34de8dSDave Chinner *blen = ap->length; 332ba34de8dSDave Chinner error = xfs_filestream_pick_ag(pip, &agno, flags, blen); 333a52dc2adSDave Chinner if (agno == NULLAGNUMBER) { 334a52dc2adSDave Chinner agno = 0; 3358f7747adSDave Chinner *blen = 0; 3368f7747adSDave Chinner } 3378f7747adSDave Chinner 3388f7747adSDave Chinner out_select: 339a52dc2adSDave Chinner ap->blkno = XFS_AGB_TO_FSB(mp, agno, 0); 340f38b46bbSDave Chinner out_rele: 341ba34de8dSDave Chinner xfs_irele(pip); 342ba34de8dSDave Chinner return error; 343ba34de8dSDave Chinner 344ba34de8dSDave Chinner } 3458f7747adSDave Chinner 3462a82b8beSDavid Chinner void 3472a82b8beSDavid Chinner xfs_filestream_deassociate( 3482cd2ef6aSChristoph Hellwig struct xfs_inode *ip) 3492a82b8beSDavid Chinner { 35022328d71SChristoph Hellwig xfs_mru_cache_delete(ip->i_mount->m_filestream, ip->i_ino); 3512a82b8beSDavid Chinner } 3522cd2ef6aSChristoph Hellwig 3532cd2ef6aSChristoph Hellwig int 3542cd2ef6aSChristoph Hellwig xfs_filestream_mount( 3552cd2ef6aSChristoph Hellwig xfs_mount_t *mp) 3562cd2ef6aSChristoph Hellwig { 3572cd2ef6aSChristoph Hellwig /* 3582cd2ef6aSChristoph Hellwig * The filestream timer tunable is currently fixed within the range of 3592cd2ef6aSChristoph Hellwig * one second to four minutes, with five seconds being the default. The 3602cd2ef6aSChristoph Hellwig * group count is somewhat arbitrary, but it'd be nice to adhere to the 3612cd2ef6aSChristoph Hellwig * timer tunable to within about 10 percent. This requires at least 10 3622cd2ef6aSChristoph Hellwig * groups. 3632cd2ef6aSChristoph Hellwig */ 3647fcd3efaSChristoph Hellwig return xfs_mru_cache_create(&mp->m_filestream, mp, 3657fcd3efaSChristoph Hellwig xfs_fstrm_centisecs * 10, 10, xfs_fstrm_free_func); 3662cd2ef6aSChristoph Hellwig } 3672cd2ef6aSChristoph Hellwig 3682cd2ef6aSChristoph Hellwig void 3692cd2ef6aSChristoph Hellwig xfs_filestream_unmount( 3702cd2ef6aSChristoph Hellwig xfs_mount_t *mp) 3712cd2ef6aSChristoph Hellwig { 3722cd2ef6aSChristoph Hellwig xfs_mru_cache_destroy(mp->m_filestream); 3732cd2ef6aSChristoph Hellwig } 374