xref: /openbmc/linux/fs/xfs/xfs_filestream.c (revision f38b46bbfa76a854c4c2a27b1617d66fefbb3f80)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2006-2007 Silicon Graphics, Inc.
4  * Copyright (c) 2014 Christoph Hellwig.
5  * All Rights Reserved.
6  */
7 #include "xfs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_inode.h"
14 #include "xfs_bmap.h"
15 #include "xfs_bmap_util.h"
16 #include "xfs_alloc.h"
17 #include "xfs_mru_cache.h"
18 #include "xfs_trace.h"
19 #include "xfs_ag.h"
20 #include "xfs_ag_resv.h"
21 #include "xfs_trans.h"
22 #include "xfs_filestream.h"
23 
24 struct xfs_fstrm_item {
25 	struct xfs_mru_cache_elem	mru;
26 	xfs_agnumber_t			ag; /* AG in use for this directory */
27 };
28 
29 enum xfs_fstrm_alloc {
30 	XFS_PICK_USERDATA = 1,
31 	XFS_PICK_LOWSPACE = 2,
32 };
33 
34 /*
35  * Allocation group filestream associations are tracked with per-ag atomic
36  * counters.  These counters allow xfs_filestream_pick_ag() to tell whether a
37  * particular AG already has active filestreams associated with it.
38  */
39 int
40 xfs_filestream_peek_ag(
41 	xfs_mount_t	*mp,
42 	xfs_agnumber_t	agno)
43 {
44 	struct xfs_perag *pag;
45 	int		ret;
46 
47 	pag = xfs_perag_get(mp, agno);
48 	ret = atomic_read(&pag->pagf_fstrms);
49 	xfs_perag_put(pag);
50 	return ret;
51 }
52 
53 static int
54 xfs_filestream_get_ag(
55 	xfs_mount_t	*mp,
56 	xfs_agnumber_t	agno)
57 {
58 	struct xfs_perag *pag;
59 	int		ret;
60 
61 	pag = xfs_perag_get(mp, agno);
62 	ret = atomic_inc_return(&pag->pagf_fstrms);
63 	xfs_perag_put(pag);
64 	return ret;
65 }
66 
67 static void
68 xfs_filestream_put_ag(
69 	xfs_mount_t	*mp,
70 	xfs_agnumber_t	agno)
71 {
72 	struct xfs_perag *pag;
73 
74 	pag = xfs_perag_get(mp, agno);
75 	atomic_dec(&pag->pagf_fstrms);
76 	xfs_perag_put(pag);
77 }
78 
79 static void
80 xfs_fstrm_free_func(
81 	void			*data,
82 	struct xfs_mru_cache_elem *mru)
83 {
84 	struct xfs_mount	*mp = data;
85 	struct xfs_fstrm_item	*item =
86 		container_of(mru, struct xfs_fstrm_item, mru);
87 
88 	xfs_filestream_put_ag(mp, item->ag);
89 	trace_xfs_filestream_free(mp, mru->key, item->ag);
90 
91 	kmem_free(item);
92 }
93 
94 /*
95  * Scan the AGs starting at startag looking for an AG that isn't in use and has
96  * at least minlen blocks free.
97  */
98 static int
99 xfs_filestream_pick_ag(
100 	struct xfs_inode	*ip,
101 	xfs_agnumber_t		*agp,
102 	int			flags,
103 	xfs_extlen_t		*longest)
104 {
105 	struct xfs_mount	*mp = ip->i_mount;
106 	struct xfs_fstrm_item	*item;
107 	struct xfs_perag	*pag;
108 	xfs_extlen_t		minlen = *longest;
109 	xfs_extlen_t		free = 0, minfree, maxfree = 0;
110 	xfs_agnumber_t		startag = *agp;
111 	xfs_agnumber_t		ag = startag;
112 	xfs_agnumber_t		max_ag = NULLAGNUMBER;
113 	int			err, trylock, nscan;
114 
115 	ASSERT(S_ISDIR(VFS_I(ip)->i_mode));
116 
117 	/* 2% of an AG's blocks must be free for it to be chosen. */
118 	minfree = mp->m_sb.sb_agblocks / 50;
119 
120 	*agp = NULLAGNUMBER;
121 
122 	/* For the first pass, don't sleep trying to init the per-AG. */
123 	trylock = XFS_ALLOC_FLAG_TRYLOCK;
124 
125 	for (nscan = 0; 1; nscan++) {
126 		trace_xfs_filestream_scan(mp, ip->i_ino, ag);
127 
128 		pag = xfs_perag_get(mp, ag);
129 		*longest = 0;
130 		err = xfs_bmap_longest_free_extent(pag, NULL, longest);
131 		if (err) {
132 			xfs_perag_put(pag);
133 			if (err != -EAGAIN)
134 				return err;
135 			/* Couldn't lock the AGF, skip this AG. */
136 			goto next_ag;
137 		}
138 
139 		/* Keep track of the AG with the most free blocks. */
140 		if (pag->pagf_freeblks > maxfree) {
141 			maxfree = pag->pagf_freeblks;
142 			max_ag = ag;
143 		}
144 
145 		/*
146 		 * The AG reference count does two things: it enforces mutual
147 		 * exclusion when examining the suitability of an AG in this
148 		 * loop, and it guards against two filestreams being established
149 		 * in the same AG as each other.
150 		 */
151 		if (xfs_filestream_get_ag(mp, ag) > 1) {
152 			xfs_filestream_put_ag(mp, ag);
153 			goto next_ag;
154 		}
155 
156 		if (((minlen && *longest >= minlen) ||
157 		     (!minlen && pag->pagf_freeblks >= minfree)) &&
158 		    (!xfs_perag_prefers_metadata(pag) ||
159 		     !(flags & XFS_PICK_USERDATA) ||
160 		     (flags & XFS_PICK_LOWSPACE))) {
161 
162 			/* Break out, retaining the reference on the AG. */
163 			free = pag->pagf_freeblks;
164 			xfs_perag_put(pag);
165 			*agp = ag;
166 			break;
167 		}
168 
169 		/* Drop the reference on this AG, it's not usable. */
170 		xfs_filestream_put_ag(mp, ag);
171 next_ag:
172 		xfs_perag_put(pag);
173 		/* Move to the next AG, wrapping to AG 0 if necessary. */
174 		if (++ag >= mp->m_sb.sb_agcount)
175 			ag = 0;
176 
177 		/* If a full pass of the AGs hasn't been done yet, continue. */
178 		if (ag != startag)
179 			continue;
180 
181 		/* Allow sleeping in xfs_alloc_read_agf() on the 2nd pass. */
182 		if (trylock != 0) {
183 			trylock = 0;
184 			continue;
185 		}
186 
187 		/* Finally, if lowspace wasn't set, set it for the 3rd pass. */
188 		if (!(flags & XFS_PICK_LOWSPACE)) {
189 			flags |= XFS_PICK_LOWSPACE;
190 			continue;
191 		}
192 
193 		/*
194 		 * Take the AG with the most free space, regardless of whether
195 		 * it's already in use by another filestream.
196 		 */
197 		if (max_ag != NULLAGNUMBER) {
198 			xfs_filestream_get_ag(mp, max_ag);
199 			free = maxfree;
200 			*agp = max_ag;
201 			break;
202 		}
203 
204 		/* take AG 0 if none matched */
205 		trace_xfs_filestream_pick(ip, *agp, free, nscan);
206 		*agp = 0;
207 		return 0;
208 	}
209 
210 	trace_xfs_filestream_pick(ip, *agp, free, nscan);
211 
212 	if (*agp == NULLAGNUMBER)
213 		return 0;
214 
215 	err = -ENOMEM;
216 	item = kmem_alloc(sizeof(*item), KM_MAYFAIL);
217 	if (!item)
218 		goto out_put_ag;
219 
220 	item->ag = *agp;
221 
222 	err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru);
223 	if (err) {
224 		if (err == -EEXIST)
225 			err = 0;
226 		goto out_free_item;
227 	}
228 
229 	return 0;
230 
231 out_free_item:
232 	kmem_free(item);
233 out_put_ag:
234 	xfs_filestream_put_ag(mp, *agp);
235 	return err;
236 }
237 
238 static struct xfs_inode *
239 xfs_filestream_get_parent(
240 	struct xfs_inode	*ip)
241 {
242 	struct inode		*inode = VFS_I(ip), *dir = NULL;
243 	struct dentry		*dentry, *parent;
244 
245 	dentry = d_find_alias(inode);
246 	if (!dentry)
247 		goto out;
248 
249 	parent = dget_parent(dentry);
250 	if (!parent)
251 		goto out_dput;
252 
253 	dir = igrab(d_inode(parent));
254 	dput(parent);
255 
256 out_dput:
257 	dput(dentry);
258 out:
259 	return dir ? XFS_I(dir) : NULL;
260 }
261 
262 /*
263  * Lookup the mru cache for an existing association. If one exists and we can
264  * use it, return with the agno and blen indicating that the allocation will
265  * proceed with that association.
266  *
267  * If we have no association, or we cannot use the current one and have to
268  * destroy it, return with blen = 0 and agno pointing at the next agno to try.
269  */
270 int
271 xfs_filestream_select_ag_mru(
272 	struct xfs_bmalloca	*ap,
273 	struct xfs_alloc_arg	*args,
274 	struct xfs_inode	*pip,
275 	xfs_agnumber_t		*agno,
276 	xfs_extlen_t		*blen)
277 {
278 	struct xfs_mount	*mp = ap->ip->i_mount;
279 	struct xfs_perag	*pag;
280 	struct xfs_mru_cache_elem *mru;
281 	int			error;
282 
283 	mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino);
284 	if (!mru)
285 		goto out_default_agno;
286 
287 	*agno = container_of(mru, struct xfs_fstrm_item, mru)->ag;
288 	xfs_mru_cache_done(mp->m_filestream);
289 
290 	trace_xfs_filestream_lookup(mp, ap->ip->i_ino, *agno);
291 
292 	ap->blkno = XFS_AGB_TO_FSB(args->mp, *agno, 0);
293 	xfs_bmap_adjacent(ap);
294 
295 	pag = xfs_perag_grab(mp, *agno);
296 	if (!pag)
297 		goto out_default_agno;
298 
299 	error = xfs_bmap_longest_free_extent(pag, args->tp, blen);
300 	xfs_perag_rele(pag);
301 	if (error) {
302 		if (error != -EAGAIN)
303 			return error;
304 		*blen = 0;
305 	}
306 
307 	/*
308 	 * We are done if there's still enough contiguous free space to succeed.
309 	 */
310 	if (*blen >= args->maxlen)
311 		return 0;
312 
313 	/* Changing parent AG association now, so remove the existing one. */
314 	mru = xfs_mru_cache_remove(mp->m_filestream, pip->i_ino);
315 	if (mru) {
316 		struct xfs_fstrm_item *item =
317 			container_of(mru, struct xfs_fstrm_item, mru);
318 		*agno = (item->ag + 1) % mp->m_sb.sb_agcount;
319 		xfs_fstrm_free_func(mp, mru);
320 		return 0;
321 	}
322 
323 out_default_agno:
324 	if (xfs_is_inode32(mp)) {
325 		xfs_agnumber_t	 rotorstep = xfs_rotorstep;
326 		*agno = (mp->m_agfrotor / rotorstep) %
327 				mp->m_sb.sb_agcount;
328 		mp->m_agfrotor = (mp->m_agfrotor + 1) %
329 				 (mp->m_sb.sb_agcount * rotorstep);
330 		return 0;
331 	}
332 	*agno = XFS_INO_TO_AGNO(mp, pip->i_ino);
333 	return 0;
334 
335 }
336 
337 /*
338  * Search for an allocation group with a single extent large enough for
339  * the request.  If one isn't found, then adjust the minimum allocation
340  * size to the largest space found.
341  */
342 int
343 xfs_filestream_select_ag(
344 	struct xfs_bmalloca	*ap,
345 	struct xfs_alloc_arg	*args,
346 	xfs_extlen_t		*blen)
347 {
348 	struct xfs_mount	*mp = ap->ip->i_mount;
349 	struct xfs_inode	*pip = NULL;
350 	xfs_agnumber_t		agno;
351 	int			flags = 0;
352 	int			error;
353 
354 	args->total = ap->total;
355 	*blen = 0;
356 
357 	pip = xfs_filestream_get_parent(ap->ip);
358 	if (!pip) {
359 		agno = 0;
360 		goto out_select;
361 	}
362 
363 	error = xfs_filestream_select_ag_mru(ap, args, pip, &agno, blen);
364 	if (error || *blen >= args->maxlen)
365 		goto out_rele;
366 
367 	ap->blkno = XFS_AGB_TO_FSB(args->mp, agno, 0);
368 	xfs_bmap_adjacent(ap);
369 
370 	/*
371 	 * If there is very little free space before we start a filestreams
372 	 * allocation, we're almost guaranteed to fail to find a better AG with
373 	 * larger free space available so we don't even try.
374 	 */
375 	if (ap->tp->t_flags & XFS_TRANS_LOWMODE)
376 		goto out_select;
377 
378 	if (ap->datatype & XFS_ALLOC_USERDATA)
379 		flags |= XFS_PICK_USERDATA;
380 	if (ap->tp->t_flags & XFS_TRANS_LOWMODE)
381 		flags |= XFS_PICK_LOWSPACE;
382 
383 	*blen = ap->length;
384 	error = xfs_filestream_pick_ag(pip, &agno, flags, blen);
385 	if (agno == NULLAGNUMBER) {
386 		agno = 0;
387 		*blen = 0;
388 	}
389 
390 out_select:
391 	ap->blkno = XFS_AGB_TO_FSB(mp, agno, 0);
392 out_rele:
393 	xfs_irele(pip);
394 	return error;
395 
396 }
397 
398 void
399 xfs_filestream_deassociate(
400 	struct xfs_inode	*ip)
401 {
402 	xfs_mru_cache_delete(ip->i_mount->m_filestream, ip->i_ino);
403 }
404 
405 int
406 xfs_filestream_mount(
407 	xfs_mount_t	*mp)
408 {
409 	/*
410 	 * The filestream timer tunable is currently fixed within the range of
411 	 * one second to four minutes, with five seconds being the default.  The
412 	 * group count is somewhat arbitrary, but it'd be nice to adhere to the
413 	 * timer tunable to within about 10 percent.  This requires at least 10
414 	 * groups.
415 	 */
416 	return xfs_mru_cache_create(&mp->m_filestream, mp,
417 			xfs_fstrm_centisecs * 10, 10, xfs_fstrm_free_func);
418 }
419 
420 void
421 xfs_filestream_unmount(
422 	xfs_mount_t	*mp)
423 {
424 	xfs_mru_cache_destroy(mp->m_filestream);
425 }
426