1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2006-2007 Silicon Graphics, Inc. 4 * Copyright (c) 2014 Christoph Hellwig. 5 * All Rights Reserved. 6 */ 7 #include "xfs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_inode.h" 14 #include "xfs_bmap.h" 15 #include "xfs_bmap_util.h" 16 #include "xfs_alloc.h" 17 #include "xfs_mru_cache.h" 18 #include "xfs_trace.h" 19 #include "xfs_ag.h" 20 #include "xfs_ag_resv.h" 21 #include "xfs_trans.h" 22 #include "xfs_filestream.h" 23 24 struct xfs_fstrm_item { 25 struct xfs_mru_cache_elem mru; 26 xfs_agnumber_t ag; /* AG in use for this directory */ 27 }; 28 29 enum xfs_fstrm_alloc { 30 XFS_PICK_USERDATA = 1, 31 XFS_PICK_LOWSPACE = 2, 32 }; 33 34 /* 35 * Allocation group filestream associations are tracked with per-ag atomic 36 * counters. These counters allow xfs_filestream_pick_ag() to tell whether a 37 * particular AG already has active filestreams associated with it. 38 */ 39 int 40 xfs_filestream_peek_ag( 41 xfs_mount_t *mp, 42 xfs_agnumber_t agno) 43 { 44 struct xfs_perag *pag; 45 int ret; 46 47 pag = xfs_perag_get(mp, agno); 48 ret = atomic_read(&pag->pagf_fstrms); 49 xfs_perag_put(pag); 50 return ret; 51 } 52 53 static int 54 xfs_filestream_get_ag( 55 xfs_mount_t *mp, 56 xfs_agnumber_t agno) 57 { 58 struct xfs_perag *pag; 59 int ret; 60 61 pag = xfs_perag_get(mp, agno); 62 ret = atomic_inc_return(&pag->pagf_fstrms); 63 xfs_perag_put(pag); 64 return ret; 65 } 66 67 static void 68 xfs_filestream_put_ag( 69 xfs_mount_t *mp, 70 xfs_agnumber_t agno) 71 { 72 struct xfs_perag *pag; 73 74 pag = xfs_perag_get(mp, agno); 75 atomic_dec(&pag->pagf_fstrms); 76 xfs_perag_put(pag); 77 } 78 79 static void 80 xfs_fstrm_free_func( 81 void *data, 82 struct xfs_mru_cache_elem *mru) 83 { 84 struct xfs_mount *mp = data; 85 struct xfs_fstrm_item *item = 86 container_of(mru, struct xfs_fstrm_item, mru); 87 88 xfs_filestream_put_ag(mp, item->ag); 89 trace_xfs_filestream_free(mp, mru->key, item->ag); 90 91 kmem_free(item); 92 } 93 94 /* 95 * Scan the AGs starting at startag looking for an AG that isn't in use and has 96 * at least minlen blocks free. 97 */ 98 static int 99 xfs_filestream_pick_ag( 100 struct xfs_inode *ip, 101 xfs_agnumber_t startag, 102 xfs_agnumber_t *agp, 103 int flags, 104 xfs_extlen_t minlen) 105 { 106 struct xfs_mount *mp = ip->i_mount; 107 struct xfs_fstrm_item *item; 108 struct xfs_perag *pag; 109 xfs_extlen_t longest, free = 0, minfree, maxfree = 0; 110 xfs_agnumber_t ag, max_ag = NULLAGNUMBER; 111 int err, trylock, nscan; 112 113 ASSERT(S_ISDIR(VFS_I(ip)->i_mode)); 114 115 /* 2% of an AG's blocks must be free for it to be chosen. */ 116 minfree = mp->m_sb.sb_agblocks / 50; 117 118 ag = startag; 119 *agp = NULLAGNUMBER; 120 121 /* For the first pass, don't sleep trying to init the per-AG. */ 122 trylock = XFS_ALLOC_FLAG_TRYLOCK; 123 124 for (nscan = 0; 1; nscan++) { 125 trace_xfs_filestream_scan(mp, ip->i_ino, ag); 126 127 pag = xfs_perag_get(mp, ag); 128 longest = 0; 129 err = xfs_bmap_longest_free_extent(pag, NULL, &longest); 130 if (err) { 131 xfs_perag_put(pag); 132 if (err != -EAGAIN) 133 return err; 134 /* Couldn't lock the AGF, skip this AG. */ 135 goto next_ag; 136 } 137 138 /* Keep track of the AG with the most free blocks. */ 139 if (pag->pagf_freeblks > maxfree) { 140 maxfree = pag->pagf_freeblks; 141 max_ag = ag; 142 } 143 144 /* 145 * The AG reference count does two things: it enforces mutual 146 * exclusion when examining the suitability of an AG in this 147 * loop, and it guards against two filestreams being established 148 * in the same AG as each other. 149 */ 150 if (xfs_filestream_get_ag(mp, ag) > 1) { 151 xfs_filestream_put_ag(mp, ag); 152 goto next_ag; 153 } 154 155 if (((minlen && longest >= minlen) || 156 (!minlen && pag->pagf_freeblks >= minfree)) && 157 (!xfs_perag_prefers_metadata(pag) || 158 !(flags & XFS_PICK_USERDATA) || 159 (flags & XFS_PICK_LOWSPACE))) { 160 161 /* Break out, retaining the reference on the AG. */ 162 free = pag->pagf_freeblks; 163 xfs_perag_put(pag); 164 *agp = ag; 165 break; 166 } 167 168 /* Drop the reference on this AG, it's not usable. */ 169 xfs_filestream_put_ag(mp, ag); 170 next_ag: 171 xfs_perag_put(pag); 172 /* Move to the next AG, wrapping to AG 0 if necessary. */ 173 if (++ag >= mp->m_sb.sb_agcount) 174 ag = 0; 175 176 /* If a full pass of the AGs hasn't been done yet, continue. */ 177 if (ag != startag) 178 continue; 179 180 /* Allow sleeping in xfs_alloc_read_agf() on the 2nd pass. */ 181 if (trylock != 0) { 182 trylock = 0; 183 continue; 184 } 185 186 /* Finally, if lowspace wasn't set, set it for the 3rd pass. */ 187 if (!(flags & XFS_PICK_LOWSPACE)) { 188 flags |= XFS_PICK_LOWSPACE; 189 continue; 190 } 191 192 /* 193 * Take the AG with the most free space, regardless of whether 194 * it's already in use by another filestream. 195 */ 196 if (max_ag != NULLAGNUMBER) { 197 xfs_filestream_get_ag(mp, max_ag); 198 free = maxfree; 199 *agp = max_ag; 200 break; 201 } 202 203 /* take AG 0 if none matched */ 204 trace_xfs_filestream_pick(ip, *agp, free, nscan); 205 *agp = 0; 206 return 0; 207 } 208 209 trace_xfs_filestream_pick(ip, *agp, free, nscan); 210 211 if (*agp == NULLAGNUMBER) 212 return 0; 213 214 err = -ENOMEM; 215 item = kmem_alloc(sizeof(*item), KM_MAYFAIL); 216 if (!item) 217 goto out_put_ag; 218 219 item->ag = *agp; 220 221 err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru); 222 if (err) { 223 if (err == -EEXIST) 224 err = 0; 225 goto out_free_item; 226 } 227 228 return 0; 229 230 out_free_item: 231 kmem_free(item); 232 out_put_ag: 233 xfs_filestream_put_ag(mp, *agp); 234 return err; 235 } 236 237 static struct xfs_inode * 238 xfs_filestream_get_parent( 239 struct xfs_inode *ip) 240 { 241 struct inode *inode = VFS_I(ip), *dir = NULL; 242 struct dentry *dentry, *parent; 243 244 dentry = d_find_alias(inode); 245 if (!dentry) 246 goto out; 247 248 parent = dget_parent(dentry); 249 if (!parent) 250 goto out_dput; 251 252 dir = igrab(d_inode(parent)); 253 dput(parent); 254 255 out_dput: 256 dput(dentry); 257 out: 258 return dir ? XFS_I(dir) : NULL; 259 } 260 261 /* 262 * Pick a new allocation group for the current file and its file stream. 263 * 264 * This is called when the allocator can't find a suitable extent in the 265 * current AG, and we have to move the stream into a new AG with more space. 266 */ 267 static int 268 xfs_filestream_new_ag( 269 struct xfs_bmalloca *ap, 270 xfs_agnumber_t *agp) 271 { 272 struct xfs_inode *ip = ap->ip, *pip; 273 struct xfs_mount *mp = ip->i_mount; 274 xfs_extlen_t minlen = ap->length; 275 xfs_agnumber_t startag = 0; 276 int flags = 0; 277 int err = 0; 278 struct xfs_mru_cache_elem *mru; 279 280 *agp = NULLAGNUMBER; 281 282 pip = xfs_filestream_get_parent(ip); 283 if (!pip) 284 goto exit; 285 286 mru = xfs_mru_cache_remove(mp->m_filestream, pip->i_ino); 287 if (mru) { 288 struct xfs_fstrm_item *item = 289 container_of(mru, struct xfs_fstrm_item, mru); 290 startag = (item->ag + 1) % mp->m_sb.sb_agcount; 291 } 292 293 if (ap->datatype & XFS_ALLOC_USERDATA) 294 flags |= XFS_PICK_USERDATA; 295 if (ap->tp->t_flags & XFS_TRANS_LOWMODE) 296 flags |= XFS_PICK_LOWSPACE; 297 298 err = xfs_filestream_pick_ag(pip, startag, agp, flags, minlen); 299 300 /* 301 * Only free the item here so we skip over the old AG earlier. 302 */ 303 if (mru) 304 xfs_fstrm_free_func(mp, mru); 305 306 xfs_irele(pip); 307 exit: 308 if (*agp == NULLAGNUMBER) 309 *agp = 0; 310 return err; 311 } 312 313 /* 314 * Search for an allocation group with a single extent large enough for 315 * the request. If one isn't found, then the largest available free extent is 316 * returned as the best length possible. 317 */ 318 int 319 xfs_filestream_select_ag( 320 struct xfs_bmalloca *ap, 321 struct xfs_alloc_arg *args, 322 xfs_extlen_t *blen) 323 { 324 struct xfs_mount *mp = ap->ip->i_mount; 325 struct xfs_perag *pag; 326 struct xfs_inode *pip = NULL; 327 xfs_agnumber_t agno = NULLAGNUMBER; 328 struct xfs_mru_cache_elem *mru; 329 int error; 330 331 args->total = ap->total; 332 *blen = 0; 333 334 pip = xfs_filestream_get_parent(ap->ip); 335 if (!pip) { 336 agno = 0; 337 goto new_ag; 338 } 339 340 mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino); 341 if (mru) { 342 agno = container_of(mru, struct xfs_fstrm_item, mru)->ag; 343 xfs_mru_cache_done(mp->m_filestream); 344 345 trace_xfs_filestream_lookup(mp, ap->ip->i_ino, agno); 346 xfs_irele(pip); 347 348 ap->blkno = XFS_AGB_TO_FSB(args->mp, agno, 0); 349 xfs_bmap_adjacent(ap); 350 351 pag = xfs_perag_grab(mp, agno); 352 if (pag) { 353 error = xfs_bmap_longest_free_extent(pag, args->tp, blen); 354 xfs_perag_rele(pag); 355 if (error) { 356 if (error != -EAGAIN) 357 return error; 358 *blen = 0; 359 } 360 } 361 if (*blen >= args->maxlen) 362 goto out_select; 363 } else if (xfs_is_inode32(mp)) { 364 xfs_agnumber_t rotorstep = xfs_rotorstep; 365 agno = (mp->m_agfrotor / rotorstep) % 366 mp->m_sb.sb_agcount; 367 mp->m_agfrotor = (mp->m_agfrotor + 1) % 368 (mp->m_sb.sb_agcount * rotorstep); 369 xfs_irele(pip); 370 } else { 371 agno = XFS_INO_TO_AGNO(mp, pip->i_ino); 372 xfs_irele(pip); 373 } 374 375 new_ag: 376 ap->blkno = XFS_AGB_TO_FSB(args->mp, agno, 0); 377 xfs_bmap_adjacent(ap); 378 379 /* 380 * If there is very little free space before we start a filestreams 381 * allocation, we're almost guaranteed to fail to find a better AG with 382 * larger free space available so we don't even try. 383 */ 384 if (ap->tp->t_flags & XFS_TRANS_LOWMODE) 385 return 0; 386 387 error = xfs_filestream_new_ag(ap, &agno); 388 if (error) 389 return error; 390 if (agno == NULLAGNUMBER) { 391 agno = 0; 392 goto out_select; 393 } 394 395 pag = xfs_perag_grab(mp, agno); 396 if (!pag) 397 goto out_select; 398 399 error = xfs_bmap_longest_free_extent(pag, args->tp, blen); 400 xfs_perag_rele(pag); 401 if (error) { 402 if (error != -EAGAIN) 403 return error; 404 *blen = 0; 405 } 406 407 out_select: 408 ap->blkno = XFS_AGB_TO_FSB(mp, agno, 0); 409 return 0; 410 } 411 412 413 void 414 xfs_filestream_deassociate( 415 struct xfs_inode *ip) 416 { 417 xfs_mru_cache_delete(ip->i_mount->m_filestream, ip->i_ino); 418 } 419 420 int 421 xfs_filestream_mount( 422 xfs_mount_t *mp) 423 { 424 /* 425 * The filestream timer tunable is currently fixed within the range of 426 * one second to four minutes, with five seconds being the default. The 427 * group count is somewhat arbitrary, but it'd be nice to adhere to the 428 * timer tunable to within about 10 percent. This requires at least 10 429 * groups. 430 */ 431 return xfs_mru_cache_create(&mp->m_filestream, mp, 432 xfs_fstrm_centisecs * 10, 10, xfs_fstrm_free_func); 433 } 434 435 void 436 xfs_filestream_unmount( 437 xfs_mount_t *mp) 438 { 439 xfs_mru_cache_destroy(mp->m_filestream); 440 } 441