xref: /openbmc/linux/fs/xfs/xfs_filestream.c (revision a52dc2ad363088d0e0ab05a71f0496e2377e5cc9)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2006-2007 Silicon Graphics, Inc.
4  * Copyright (c) 2014 Christoph Hellwig.
5  * All Rights Reserved.
6  */
7 #include "xfs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_inode.h"
14 #include "xfs_bmap.h"
15 #include "xfs_bmap_util.h"
16 #include "xfs_alloc.h"
17 #include "xfs_mru_cache.h"
18 #include "xfs_trace.h"
19 #include "xfs_ag.h"
20 #include "xfs_ag_resv.h"
21 #include "xfs_trans.h"
22 #include "xfs_filestream.h"
23 
24 struct xfs_fstrm_item {
25 	struct xfs_mru_cache_elem	mru;
26 	xfs_agnumber_t			ag; /* AG in use for this directory */
27 };
28 
29 enum xfs_fstrm_alloc {
30 	XFS_PICK_USERDATA = 1,
31 	XFS_PICK_LOWSPACE = 2,
32 };
33 
34 /*
35  * Allocation group filestream associations are tracked with per-ag atomic
36  * counters.  These counters allow xfs_filestream_pick_ag() to tell whether a
37  * particular AG already has active filestreams associated with it.
38  */
39 int
40 xfs_filestream_peek_ag(
41 	xfs_mount_t	*mp,
42 	xfs_agnumber_t	agno)
43 {
44 	struct xfs_perag *pag;
45 	int		ret;
46 
47 	pag = xfs_perag_get(mp, agno);
48 	ret = atomic_read(&pag->pagf_fstrms);
49 	xfs_perag_put(pag);
50 	return ret;
51 }
52 
53 static int
54 xfs_filestream_get_ag(
55 	xfs_mount_t	*mp,
56 	xfs_agnumber_t	agno)
57 {
58 	struct xfs_perag *pag;
59 	int		ret;
60 
61 	pag = xfs_perag_get(mp, agno);
62 	ret = atomic_inc_return(&pag->pagf_fstrms);
63 	xfs_perag_put(pag);
64 	return ret;
65 }
66 
67 static void
68 xfs_filestream_put_ag(
69 	xfs_mount_t	*mp,
70 	xfs_agnumber_t	agno)
71 {
72 	struct xfs_perag *pag;
73 
74 	pag = xfs_perag_get(mp, agno);
75 	atomic_dec(&pag->pagf_fstrms);
76 	xfs_perag_put(pag);
77 }
78 
79 static void
80 xfs_fstrm_free_func(
81 	void			*data,
82 	struct xfs_mru_cache_elem *mru)
83 {
84 	struct xfs_mount	*mp = data;
85 	struct xfs_fstrm_item	*item =
86 		container_of(mru, struct xfs_fstrm_item, mru);
87 
88 	xfs_filestream_put_ag(mp, item->ag);
89 	trace_xfs_filestream_free(mp, mru->key, item->ag);
90 
91 	kmem_free(item);
92 }
93 
94 /*
95  * Scan the AGs starting at startag looking for an AG that isn't in use and has
96  * at least minlen blocks free.
97  */
98 static int
99 xfs_filestream_pick_ag(
100 	struct xfs_inode	*ip,
101 	xfs_agnumber_t		startag,
102 	xfs_agnumber_t		*agp,
103 	int			flags,
104 	xfs_extlen_t		minlen)
105 {
106 	struct xfs_mount	*mp = ip->i_mount;
107 	struct xfs_fstrm_item	*item;
108 	struct xfs_perag	*pag;
109 	xfs_extlen_t		longest, free = 0, minfree, maxfree = 0;
110 	xfs_agnumber_t		ag, max_ag = NULLAGNUMBER;
111 	int			err, trylock, nscan;
112 
113 	ASSERT(S_ISDIR(VFS_I(ip)->i_mode));
114 
115 	/* 2% of an AG's blocks must be free for it to be chosen. */
116 	minfree = mp->m_sb.sb_agblocks / 50;
117 
118 	ag = startag;
119 	*agp = NULLAGNUMBER;
120 
121 	/* For the first pass, don't sleep trying to init the per-AG. */
122 	trylock = XFS_ALLOC_FLAG_TRYLOCK;
123 
124 	for (nscan = 0; 1; nscan++) {
125 		trace_xfs_filestream_scan(mp, ip->i_ino, ag);
126 
127 		pag = xfs_perag_get(mp, ag);
128 		longest = 0;
129 		err = xfs_bmap_longest_free_extent(pag, NULL, &longest);
130 		if (err) {
131 			xfs_perag_put(pag);
132 			if (err != -EAGAIN)
133 				return err;
134 			/* Couldn't lock the AGF, skip this AG. */
135 			goto next_ag;
136 		}
137 
138 		/* Keep track of the AG with the most free blocks. */
139 		if (pag->pagf_freeblks > maxfree) {
140 			maxfree = pag->pagf_freeblks;
141 			max_ag = ag;
142 		}
143 
144 		/*
145 		 * The AG reference count does two things: it enforces mutual
146 		 * exclusion when examining the suitability of an AG in this
147 		 * loop, and it guards against two filestreams being established
148 		 * in the same AG as each other.
149 		 */
150 		if (xfs_filestream_get_ag(mp, ag) > 1) {
151 			xfs_filestream_put_ag(mp, ag);
152 			goto next_ag;
153 		}
154 
155 		if (((minlen && longest >= minlen) ||
156 		     (!minlen && pag->pagf_freeblks >= minfree)) &&
157 		    (!xfs_perag_prefers_metadata(pag) ||
158 		     !(flags & XFS_PICK_USERDATA) ||
159 		     (flags & XFS_PICK_LOWSPACE))) {
160 
161 			/* Break out, retaining the reference on the AG. */
162 			free = pag->pagf_freeblks;
163 			xfs_perag_put(pag);
164 			*agp = ag;
165 			break;
166 		}
167 
168 		/* Drop the reference on this AG, it's not usable. */
169 		xfs_filestream_put_ag(mp, ag);
170 next_ag:
171 		xfs_perag_put(pag);
172 		/* Move to the next AG, wrapping to AG 0 if necessary. */
173 		if (++ag >= mp->m_sb.sb_agcount)
174 			ag = 0;
175 
176 		/* If a full pass of the AGs hasn't been done yet, continue. */
177 		if (ag != startag)
178 			continue;
179 
180 		/* Allow sleeping in xfs_alloc_read_agf() on the 2nd pass. */
181 		if (trylock != 0) {
182 			trylock = 0;
183 			continue;
184 		}
185 
186 		/* Finally, if lowspace wasn't set, set it for the 3rd pass. */
187 		if (!(flags & XFS_PICK_LOWSPACE)) {
188 			flags |= XFS_PICK_LOWSPACE;
189 			continue;
190 		}
191 
192 		/*
193 		 * Take the AG with the most free space, regardless of whether
194 		 * it's already in use by another filestream.
195 		 */
196 		if (max_ag != NULLAGNUMBER) {
197 			xfs_filestream_get_ag(mp, max_ag);
198 			free = maxfree;
199 			*agp = max_ag;
200 			break;
201 		}
202 
203 		/* take AG 0 if none matched */
204 		trace_xfs_filestream_pick(ip, *agp, free, nscan);
205 		*agp = 0;
206 		return 0;
207 	}
208 
209 	trace_xfs_filestream_pick(ip, *agp, free, nscan);
210 
211 	if (*agp == NULLAGNUMBER)
212 		return 0;
213 
214 	err = -ENOMEM;
215 	item = kmem_alloc(sizeof(*item), KM_MAYFAIL);
216 	if (!item)
217 		goto out_put_ag;
218 
219 	item->ag = *agp;
220 
221 	err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru);
222 	if (err) {
223 		if (err == -EEXIST)
224 			err = 0;
225 		goto out_free_item;
226 	}
227 
228 	return 0;
229 
230 out_free_item:
231 	kmem_free(item);
232 out_put_ag:
233 	xfs_filestream_put_ag(mp, *agp);
234 	return err;
235 }
236 
237 static struct xfs_inode *
238 xfs_filestream_get_parent(
239 	struct xfs_inode	*ip)
240 {
241 	struct inode		*inode = VFS_I(ip), *dir = NULL;
242 	struct dentry		*dentry, *parent;
243 
244 	dentry = d_find_alias(inode);
245 	if (!dentry)
246 		goto out;
247 
248 	parent = dget_parent(dentry);
249 	if (!parent)
250 		goto out_dput;
251 
252 	dir = igrab(d_inode(parent));
253 	dput(parent);
254 
255 out_dput:
256 	dput(dentry);
257 out:
258 	return dir ? XFS_I(dir) : NULL;
259 }
260 
261 /*
262  * Pick a new allocation group for the current file and its file stream.
263  *
264  * This is called when the allocator can't find a suitable extent in the
265  * current AG, and we have to move the stream into a new AG with more space.
266  */
267 static int
268 xfs_filestream_new_ag(
269 	struct xfs_bmalloca	*ap,
270 	xfs_agnumber_t		*agp)
271 {
272 	struct xfs_inode	*ip = ap->ip, *pip;
273 	struct xfs_mount	*mp = ip->i_mount;
274 	xfs_extlen_t		minlen = ap->length;
275 	xfs_agnumber_t		startag = 0;
276 	int			flags = 0;
277 	int			err = 0;
278 	struct xfs_mru_cache_elem *mru;
279 
280 	*agp = NULLAGNUMBER;
281 
282 	pip = xfs_filestream_get_parent(ip);
283 	if (!pip)
284 		goto exit;
285 
286 	mru = xfs_mru_cache_remove(mp->m_filestream, pip->i_ino);
287 	if (mru) {
288 		struct xfs_fstrm_item *item =
289 			container_of(mru, struct xfs_fstrm_item, mru);
290 		startag = (item->ag + 1) % mp->m_sb.sb_agcount;
291 	}
292 
293 	if (ap->datatype & XFS_ALLOC_USERDATA)
294 		flags |= XFS_PICK_USERDATA;
295 	if (ap->tp->t_flags & XFS_TRANS_LOWMODE)
296 		flags |= XFS_PICK_LOWSPACE;
297 
298 	err = xfs_filestream_pick_ag(pip, startag, agp, flags, minlen);
299 
300 	/*
301 	 * Only free the item here so we skip over the old AG earlier.
302 	 */
303 	if (mru)
304 		xfs_fstrm_free_func(mp, mru);
305 
306 	xfs_irele(pip);
307 exit:
308 	if (*agp == NULLAGNUMBER)
309 		*agp = 0;
310 	return err;
311 }
312 
313 /*
314  * Search for an allocation group with a single extent large enough for
315  * the request.  If one isn't found, then the largest available free extent is
316  * returned as the best length possible.
317  */
318 int
319 xfs_filestream_select_ag(
320 	struct xfs_bmalloca	*ap,
321 	struct xfs_alloc_arg	*args,
322 	xfs_extlen_t		*blen)
323 {
324 	struct xfs_mount	*mp = ap->ip->i_mount;
325 	struct xfs_perag	*pag;
326 	struct xfs_inode	*pip = NULL;
327 	xfs_agnumber_t		agno = NULLAGNUMBER;
328 	struct xfs_mru_cache_elem *mru;
329 	int			error;
330 
331 	args->total = ap->total;
332 	*blen = 0;
333 
334 	pip = xfs_filestream_get_parent(ap->ip);
335 	if (!pip) {
336 		agno = 0;
337 		goto new_ag;
338 	}
339 
340 	mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino);
341 	if (mru) {
342 		agno = container_of(mru, struct xfs_fstrm_item, mru)->ag;
343 		xfs_mru_cache_done(mp->m_filestream);
344 
345 		trace_xfs_filestream_lookup(mp, ap->ip->i_ino, agno);
346 		xfs_irele(pip);
347 
348 		ap->blkno = XFS_AGB_TO_FSB(args->mp, agno, 0);
349 		xfs_bmap_adjacent(ap);
350 
351 		pag = xfs_perag_grab(mp, agno);
352 		if (pag) {
353 			error = xfs_bmap_longest_free_extent(pag, args->tp, blen);
354 			xfs_perag_rele(pag);
355 			if (error) {
356 				if (error != -EAGAIN)
357 					return error;
358 				*blen = 0;
359 			}
360 		}
361 		if (*blen >= args->maxlen)
362 			goto out_select;
363 	} else if (xfs_is_inode32(mp)) {
364 		xfs_agnumber_t	 rotorstep = xfs_rotorstep;
365 		agno = (mp->m_agfrotor / rotorstep) %
366 				mp->m_sb.sb_agcount;
367 		mp->m_agfrotor = (mp->m_agfrotor + 1) %
368 				 (mp->m_sb.sb_agcount * rotorstep);
369 		xfs_irele(pip);
370 	} else {
371 		agno = XFS_INO_TO_AGNO(mp, pip->i_ino);
372 		xfs_irele(pip);
373 	}
374 
375 new_ag:
376 	ap->blkno = XFS_AGB_TO_FSB(args->mp, agno, 0);
377 	xfs_bmap_adjacent(ap);
378 
379 	/*
380 	 * If there is very little free space before we start a filestreams
381 	 * allocation, we're almost guaranteed to fail to find a better AG with
382 	 * larger free space available so we don't even try.
383 	 */
384 	if (ap->tp->t_flags & XFS_TRANS_LOWMODE)
385 		return 0;
386 
387 	error = xfs_filestream_new_ag(ap, &agno);
388 	if (error)
389 		return error;
390 	if (agno == NULLAGNUMBER) {
391 		agno = 0;
392 		goto out_select;
393 	}
394 
395 	pag = xfs_perag_grab(mp, agno);
396 	if (!pag)
397 		goto out_select;
398 
399 	error = xfs_bmap_longest_free_extent(pag, args->tp, blen);
400 	xfs_perag_rele(pag);
401 	if (error) {
402 		if (error != -EAGAIN)
403 			return error;
404 		*blen = 0;
405 	}
406 
407 out_select:
408 	ap->blkno = XFS_AGB_TO_FSB(mp, agno, 0);
409 	return 0;
410 }
411 
412 
413 void
414 xfs_filestream_deassociate(
415 	struct xfs_inode	*ip)
416 {
417 	xfs_mru_cache_delete(ip->i_mount->m_filestream, ip->i_ino);
418 }
419 
420 int
421 xfs_filestream_mount(
422 	xfs_mount_t	*mp)
423 {
424 	/*
425 	 * The filestream timer tunable is currently fixed within the range of
426 	 * one second to four minutes, with five seconds being the default.  The
427 	 * group count is somewhat arbitrary, but it'd be nice to adhere to the
428 	 * timer tunable to within about 10 percent.  This requires at least 10
429 	 * groups.
430 	 */
431 	return xfs_mru_cache_create(&mp->m_filestream, mp,
432 			xfs_fstrm_centisecs * 10, 10, xfs_fstrm_free_func);
433 }
434 
435 void
436 xfs_filestream_unmount(
437 	xfs_mount_t	*mp)
438 {
439 	xfs_mru_cache_destroy(mp->m_filestream);
440 }
441