1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2006-2007 Silicon Graphics, Inc. 4 * Copyright (c) 2014 Christoph Hellwig. 5 * All Rights Reserved. 6 */ 7 #include "xfs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_inode.h" 14 #include "xfs_bmap.h" 15 #include "xfs_bmap_util.h" 16 #include "xfs_alloc.h" 17 #include "xfs_mru_cache.h" 18 #include "xfs_trace.h" 19 #include "xfs_ag.h" 20 #include "xfs_ag_resv.h" 21 #include "xfs_trans.h" 22 #include "xfs_filestream.h" 23 24 struct xfs_fstrm_item { 25 struct xfs_mru_cache_elem mru; 26 struct xfs_perag *pag; /* AG in use for this directory */ 27 }; 28 29 enum xfs_fstrm_alloc { 30 XFS_PICK_USERDATA = 1, 31 XFS_PICK_LOWSPACE = 2, 32 }; 33 34 static void 35 xfs_fstrm_free_func( 36 void *data, 37 struct xfs_mru_cache_elem *mru) 38 { 39 struct xfs_fstrm_item *item = 40 container_of(mru, struct xfs_fstrm_item, mru); 41 struct xfs_perag *pag = item->pag; 42 43 trace_xfs_filestream_free(pag, mru->key); 44 atomic_dec(&pag->pagf_fstrms); 45 xfs_perag_rele(pag); 46 47 kmem_free(item); 48 } 49 50 /* 51 * Scan the AGs starting at start_agno looking for an AG that isn't in use and 52 * has at least minlen blocks free. If no AG is found to match the allocation 53 * requirements, pick the AG with the most free space in it. 54 */ 55 static int 56 xfs_filestream_pick_ag( 57 struct xfs_alloc_arg *args, 58 xfs_ino_t pino, 59 xfs_agnumber_t start_agno, 60 int flags, 61 xfs_extlen_t *longest) 62 { 63 struct xfs_mount *mp = args->mp; 64 struct xfs_perag *pag; 65 struct xfs_perag *max_pag = NULL; 66 xfs_extlen_t minlen = *longest; 67 xfs_extlen_t free = 0, minfree, maxfree = 0; 68 xfs_agnumber_t agno; 69 bool first_pass = true; 70 int err; 71 72 /* 2% of an AG's blocks must be free for it to be chosen. */ 73 minfree = mp->m_sb.sb_agblocks / 50; 74 75 restart: 76 for_each_perag_wrap(mp, start_agno, agno, pag) { 77 trace_xfs_filestream_scan(pag, pino); 78 *longest = 0; 79 err = xfs_bmap_longest_free_extent(pag, NULL, longest); 80 if (err) { 81 xfs_perag_rele(pag); 82 if (err != -EAGAIN) 83 break; 84 /* Couldn't lock the AGF, skip this AG. */ 85 err = 0; 86 continue; 87 } 88 89 /* Keep track of the AG with the most free blocks. */ 90 if (pag->pagf_freeblks > maxfree) { 91 maxfree = pag->pagf_freeblks; 92 if (max_pag) 93 xfs_perag_rele(max_pag); 94 atomic_inc(&pag->pag_active_ref); 95 max_pag = pag; 96 } 97 98 /* 99 * The AG reference count does two things: it enforces mutual 100 * exclusion when examining the suitability of an AG in this 101 * loop, and it guards against two filestreams being established 102 * in the same AG as each other. 103 */ 104 if (atomic_inc_return(&pag->pagf_fstrms) <= 1) { 105 if (((minlen && *longest >= minlen) || 106 (!minlen && pag->pagf_freeblks >= minfree)) && 107 (!xfs_perag_prefers_metadata(pag) || 108 !(flags & XFS_PICK_USERDATA) || 109 (flags & XFS_PICK_LOWSPACE))) { 110 /* Break out, retaining the reference on the AG. */ 111 free = pag->pagf_freeblks; 112 break; 113 } 114 } 115 116 /* Drop the reference on this AG, it's not usable. */ 117 atomic_dec(&pag->pagf_fstrms); 118 } 119 120 if (err) { 121 xfs_perag_rele(pag); 122 if (max_pag) 123 xfs_perag_rele(max_pag); 124 return err; 125 } 126 127 if (!pag) { 128 /* 129 * Allow a second pass to give xfs_bmap_longest_free_extent() 130 * another attempt at locking AGFs that it might have skipped 131 * over before we fail. 132 */ 133 if (first_pass) { 134 first_pass = false; 135 goto restart; 136 } 137 138 /* 139 * We must be low on data space, so run a final lowspace 140 * optimised selection pass if we haven't already. 141 */ 142 if (!(flags & XFS_PICK_LOWSPACE)) { 143 flags |= XFS_PICK_LOWSPACE; 144 goto restart; 145 } 146 147 /* 148 * No unassociated AGs are available, so select the AG with the 149 * most free space, regardless of whether it's already in use by 150 * another filestream. It none suit, just use whatever AG we can 151 * grab. 152 */ 153 if (!max_pag) { 154 for_each_perag_wrap(args->mp, 0, start_agno, args->pag) 155 break; 156 atomic_inc(&args->pag->pagf_fstrms); 157 *longest = 0; 158 } else { 159 pag = max_pag; 160 free = maxfree; 161 atomic_inc(&pag->pagf_fstrms); 162 } 163 } else if (max_pag) { 164 xfs_perag_rele(max_pag); 165 } 166 167 trace_xfs_filestream_pick(pag, pino, free); 168 args->pag = pag; 169 return 0; 170 171 } 172 173 static struct xfs_inode * 174 xfs_filestream_get_parent( 175 struct xfs_inode *ip) 176 { 177 struct inode *inode = VFS_I(ip), *dir = NULL; 178 struct dentry *dentry, *parent; 179 180 dentry = d_find_alias(inode); 181 if (!dentry) 182 goto out; 183 184 parent = dget_parent(dentry); 185 if (!parent) 186 goto out_dput; 187 188 dir = igrab(d_inode(parent)); 189 dput(parent); 190 191 out_dput: 192 dput(dentry); 193 out: 194 return dir ? XFS_I(dir) : NULL; 195 } 196 197 /* 198 * Lookup the mru cache for an existing association. If one exists and we can 199 * use it, return with an active perag reference indicating that the allocation 200 * will proceed with that association. 201 * 202 * If we have no association, or we cannot use the current one and have to 203 * destroy it, return with longest = 0 to tell the caller to create a new 204 * association. 205 */ 206 static int 207 xfs_filestream_lookup_association( 208 struct xfs_bmalloca *ap, 209 struct xfs_alloc_arg *args, 210 xfs_ino_t pino, 211 xfs_extlen_t *longest) 212 { 213 struct xfs_mount *mp = args->mp; 214 struct xfs_perag *pag; 215 struct xfs_mru_cache_elem *mru; 216 int error = 0; 217 218 *longest = 0; 219 mru = xfs_mru_cache_lookup(mp->m_filestream, pino); 220 if (!mru) 221 return 0; 222 /* 223 * Grab the pag and take an extra active reference for the caller whilst 224 * the mru item cannot go away. This means we'll pin the perag with 225 * the reference we get here even if the filestreams association is torn 226 * down immediately after we mark the lookup as done. 227 */ 228 pag = container_of(mru, struct xfs_fstrm_item, mru)->pag; 229 atomic_inc(&pag->pag_active_ref); 230 xfs_mru_cache_done(mp->m_filestream); 231 232 trace_xfs_filestream_lookup(pag, ap->ip->i_ino); 233 234 ap->blkno = XFS_AGB_TO_FSB(args->mp, pag->pag_agno, 0); 235 xfs_bmap_adjacent(ap); 236 237 /* 238 * If there is very little free space before we start a filestreams 239 * allocation, we're almost guaranteed to fail to find a large enough 240 * free space available so just use the cached AG. 241 */ 242 if (ap->tp->t_flags & XFS_TRANS_LOWMODE) { 243 *longest = 1; 244 goto out_done; 245 } 246 247 error = xfs_bmap_longest_free_extent(pag, args->tp, longest); 248 if (error == -EAGAIN) 249 error = 0; 250 if (error || *longest < args->maxlen) { 251 /* We aren't going to use this perag */ 252 *longest = 0; 253 xfs_perag_rele(pag); 254 return error; 255 } 256 257 out_done: 258 args->pag = pag; 259 return 0; 260 } 261 262 static int 263 xfs_filestream_create_association( 264 struct xfs_bmalloca *ap, 265 struct xfs_alloc_arg *args, 266 xfs_ino_t pino, 267 xfs_extlen_t *longest) 268 { 269 struct xfs_mount *mp = args->mp; 270 struct xfs_mru_cache_elem *mru; 271 struct xfs_fstrm_item *item; 272 xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, pino); 273 int flags = 0; 274 int error; 275 276 /* Changing parent AG association now, so remove the existing one. */ 277 mru = xfs_mru_cache_remove(mp->m_filestream, pino); 278 if (mru) { 279 struct xfs_fstrm_item *item = 280 container_of(mru, struct xfs_fstrm_item, mru); 281 282 agno = (item->pag->pag_agno + 1) % mp->m_sb.sb_agcount; 283 xfs_fstrm_free_func(mp, mru); 284 } else if (xfs_is_inode32(mp)) { 285 xfs_agnumber_t rotorstep = xfs_rotorstep; 286 287 agno = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount; 288 mp->m_agfrotor = (mp->m_agfrotor + 1) % 289 (mp->m_sb.sb_agcount * rotorstep); 290 } 291 292 ap->blkno = XFS_AGB_TO_FSB(args->mp, agno, 0); 293 xfs_bmap_adjacent(ap); 294 295 if (ap->datatype & XFS_ALLOC_USERDATA) 296 flags |= XFS_PICK_USERDATA; 297 if (ap->tp->t_flags & XFS_TRANS_LOWMODE) 298 flags |= XFS_PICK_LOWSPACE; 299 300 *longest = ap->length; 301 error = xfs_filestream_pick_ag(args, pino, agno, flags, longest); 302 if (error) 303 return error; 304 305 /* 306 * We are going to use this perag now, so create an assoication for it. 307 * xfs_filestream_pick_ag() has already bumped the perag fstrms counter 308 * for us, so all we need to do here is take another active reference to 309 * the perag for the cached association. 310 * 311 * If we fail to store the association, we need to drop the fstrms 312 * counter as well as drop the perag reference we take here for the 313 * item. We do not need to return an error for this failure - as long as 314 * we return a referenced AG, the allocation can still go ahead just 315 * fine. 316 */ 317 item = kmem_alloc(sizeof(*item), KM_MAYFAIL); 318 if (!item) 319 goto out_put_fstrms; 320 321 atomic_inc(&args->pag->pag_active_ref); 322 item->pag = args->pag; 323 error = xfs_mru_cache_insert(mp->m_filestream, pino, &item->mru); 324 if (error) 325 goto out_free_item; 326 return 0; 327 328 out_free_item: 329 xfs_perag_rele(item->pag); 330 kmem_free(item); 331 out_put_fstrms: 332 atomic_dec(&args->pag->pagf_fstrms); 333 return 0; 334 } 335 336 /* 337 * Search for an allocation group with a single extent large enough for 338 * the request. First we look for an existing association and use that if it 339 * is found. Otherwise, we create a new association by selecting an AG that fits 340 * the allocation criteria. 341 * 342 * We return with a referenced perag in args->pag to indicate which AG we are 343 * allocating into or an error with no references held. 344 */ 345 int 346 xfs_filestream_select_ag( 347 struct xfs_bmalloca *ap, 348 struct xfs_alloc_arg *args, 349 xfs_extlen_t *longest) 350 { 351 struct xfs_mount *mp = args->mp; 352 struct xfs_inode *pip; 353 xfs_ino_t ino = 0; 354 int error = 0; 355 356 *longest = 0; 357 args->total = ap->total; 358 pip = xfs_filestream_get_parent(ap->ip); 359 if (pip) { 360 ino = pip->i_ino; 361 error = xfs_filestream_lookup_association(ap, args, ino, 362 longest); 363 xfs_irele(pip); 364 if (error) 365 return error; 366 if (*longest >= args->maxlen) 367 goto out_select; 368 if (ap->tp->t_flags & XFS_TRANS_LOWMODE) 369 goto out_select; 370 } 371 372 error = xfs_filestream_create_association(ap, args, ino, longest); 373 if (error) 374 return error; 375 376 out_select: 377 ap->blkno = XFS_AGB_TO_FSB(mp, args->pag->pag_agno, 0); 378 return 0; 379 } 380 381 void 382 xfs_filestream_deassociate( 383 struct xfs_inode *ip) 384 { 385 xfs_mru_cache_delete(ip->i_mount->m_filestream, ip->i_ino); 386 } 387 388 int 389 xfs_filestream_mount( 390 xfs_mount_t *mp) 391 { 392 /* 393 * The filestream timer tunable is currently fixed within the range of 394 * one second to four minutes, with five seconds being the default. The 395 * group count is somewhat arbitrary, but it'd be nice to adhere to the 396 * timer tunable to within about 10 percent. This requires at least 10 397 * groups. 398 */ 399 return xfs_mru_cache_create(&mp->m_filestream, mp, 400 xfs_fstrm_centisecs * 10, 10, xfs_fstrm_free_func); 401 } 402 403 void 404 xfs_filestream_unmount( 405 xfs_mount_t *mp) 406 { 407 xfs_mru_cache_destroy(mp->m_filestream); 408 } 409