xref: /openbmc/linux/fs/xfs/xfs_filestream.c (revision 3054face139f9c77566a90a0524dd85c2f38c7f2)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2006-2007 Silicon Graphics, Inc.
4  * Copyright (c) 2014 Christoph Hellwig.
5  * All Rights Reserved.
6  */
7 #include "xfs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_inode.h"
14 #include "xfs_bmap.h"
15 #include "xfs_bmap_util.h"
16 #include "xfs_alloc.h"
17 #include "xfs_mru_cache.h"
18 #include "xfs_trace.h"
19 #include "xfs_ag.h"
20 #include "xfs_ag_resv.h"
21 #include "xfs_trans.h"
22 #include "xfs_filestream.h"
23 
24 struct xfs_fstrm_item {
25 	struct xfs_mru_cache_elem	mru;
26 	struct xfs_perag		*pag; /* AG in use for this directory */
27 };
28 
29 enum xfs_fstrm_alloc {
30 	XFS_PICK_USERDATA = 1,
31 	XFS_PICK_LOWSPACE = 2,
32 };
33 
34 /*
35  * Allocation group filestream associations are tracked with per-ag atomic
36  * counters.  These counters allow xfs_filestream_pick_ag() to tell whether a
37  * particular AG already has active filestreams associated with it.
38  */
39 int
40 xfs_filestream_peek_ag(
41 	xfs_mount_t	*mp,
42 	xfs_agnumber_t	agno)
43 {
44 	struct xfs_perag *pag;
45 	int		ret;
46 
47 	pag = xfs_perag_get(mp, agno);
48 	ret = atomic_read(&pag->pagf_fstrms);
49 	xfs_perag_put(pag);
50 	return ret;
51 }
52 
53 static void
54 xfs_fstrm_free_func(
55 	void			*data,
56 	struct xfs_mru_cache_elem *mru)
57 {
58 	struct xfs_fstrm_item	*item =
59 		container_of(mru, struct xfs_fstrm_item, mru);
60 	struct xfs_perag	*pag = item->pag;
61 
62 	trace_xfs_filestream_free(pag->pag_mount, mru->key, pag->pag_agno);
63 	atomic_dec(&pag->pagf_fstrms);
64 	xfs_perag_rele(pag);
65 
66 	kmem_free(item);
67 }
68 
69 /*
70  * Scan the AGs starting at startag looking for an AG that isn't in use and has
71  * at least minlen blocks free.
72  */
73 static int
74 xfs_filestream_pick_ag(
75 	struct xfs_inode	*ip,
76 	xfs_agnumber_t		*agp,
77 	int			flags,
78 	xfs_extlen_t		*longest)
79 {
80 	struct xfs_mount	*mp = ip->i_mount;
81 	struct xfs_fstrm_item	*item;
82 	struct xfs_perag	*pag;
83 	struct xfs_perag	*max_pag = NULL;
84 	xfs_extlen_t		minlen = *longest;
85 	xfs_extlen_t		free = 0, minfree, maxfree = 0;
86 	xfs_agnumber_t		startag = *agp;
87 	xfs_agnumber_t		ag = startag;
88 	int			err, trylock, nscan;
89 
90 	ASSERT(S_ISDIR(VFS_I(ip)->i_mode));
91 
92 	/* 2% of an AG's blocks must be free for it to be chosen. */
93 	minfree = mp->m_sb.sb_agblocks / 50;
94 
95 	*agp = NULLAGNUMBER;
96 
97 	/* For the first pass, don't sleep trying to init the per-AG. */
98 	trylock = XFS_ALLOC_FLAG_TRYLOCK;
99 
100 	for (nscan = 0; 1; nscan++) {
101 		trace_xfs_filestream_scan(mp, ip->i_ino, ag);
102 
103 		err = 0;
104 		pag = xfs_perag_grab(mp, ag);
105 		if (!pag)
106 			goto next_ag;
107 		*longest = 0;
108 		err = xfs_bmap_longest_free_extent(pag, NULL, longest);
109 		if (err) {
110 			xfs_perag_rele(pag);
111 			if (err != -EAGAIN)
112 				break;
113 			/* Couldn't lock the AGF, skip this AG. */
114 			goto next_ag;
115 		}
116 
117 		/* Keep track of the AG with the most free blocks. */
118 		if (pag->pagf_freeblks > maxfree) {
119 			maxfree = pag->pagf_freeblks;
120 			if (max_pag)
121 				xfs_perag_rele(max_pag);
122 			atomic_inc(&pag->pag_active_ref);
123 			max_pag = pag;
124 		}
125 
126 		/*
127 		 * The AG reference count does two things: it enforces mutual
128 		 * exclusion when examining the suitability of an AG in this
129 		 * loop, and it guards against two filestreams being established
130 		 * in the same AG as each other.
131 		 */
132 		if (atomic_inc_return(&pag->pagf_fstrms) > 1) {
133 			atomic_dec(&pag->pagf_fstrms);
134 			xfs_perag_rele(pag);
135 			goto next_ag;
136 		}
137 
138 		if (((minlen && *longest >= minlen) ||
139 		     (!minlen && pag->pagf_freeblks >= minfree)) &&
140 		    (!xfs_perag_prefers_metadata(pag) ||
141 		     !(flags & XFS_PICK_USERDATA) ||
142 		     (flags & XFS_PICK_LOWSPACE))) {
143 
144 			/* Break out, retaining the reference on the AG. */
145 			free = pag->pagf_freeblks;
146 			break;
147 		}
148 
149 		/* Drop the reference on this AG, it's not usable. */
150 		atomic_dec(&pag->pagf_fstrms);
151 next_ag:
152 		/* Move to the next AG, wrapping to AG 0 if necessary. */
153 		if (++ag >= mp->m_sb.sb_agcount)
154 			ag = 0;
155 
156 		/* If a full pass of the AGs hasn't been done yet, continue. */
157 		if (ag != startag)
158 			continue;
159 
160 		/* Allow sleeping in xfs_alloc_read_agf() on the 2nd pass. */
161 		if (trylock != 0) {
162 			trylock = 0;
163 			continue;
164 		}
165 
166 		/* Finally, if lowspace wasn't set, set it for the 3rd pass. */
167 		if (!(flags & XFS_PICK_LOWSPACE)) {
168 			flags |= XFS_PICK_LOWSPACE;
169 			continue;
170 		}
171 
172 		/*
173 		 * Take the AG with the most free space, regardless of whether
174 		 * it's already in use by another filestream.
175 		 */
176 		if (max_pag) {
177 			pag = max_pag;
178 			atomic_inc(&pag->pagf_fstrms);
179 			free = maxfree;
180 			break;
181 		}
182 
183 		/* take AG 0 if none matched */
184 		trace_xfs_filestream_pick(ip, *agp, free, nscan);
185 		*agp = 0;
186 		return 0;
187 	}
188 
189 	trace_xfs_filestream_pick(ip, pag ? pag->pag_agno : NULLAGNUMBER,
190 			free, nscan);
191 
192 	if (max_pag)
193 		xfs_perag_rele(max_pag);
194 
195 	if (err)
196 		return err;
197 
198 	if (!pag) {
199 		*agp = NULLAGNUMBER;
200 		return 0;
201 	}
202 
203 	err = -ENOMEM;
204 	item = kmem_alloc(sizeof(*item), KM_MAYFAIL);
205 	if (!item)
206 		goto out_put_ag;
207 
208 	item->pag = pag;
209 
210 	err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru);
211 	if (err) {
212 		if (err == -EEXIST)
213 			err = 0;
214 		goto out_free_item;
215 	}
216 
217 	*agp = pag->pag_agno;
218 	return 0;
219 
220 out_free_item:
221 	kmem_free(item);
222 out_put_ag:
223 	atomic_dec(&pag->pagf_fstrms);
224 	xfs_perag_rele(pag);
225 	return err;
226 }
227 
228 static struct xfs_inode *
229 xfs_filestream_get_parent(
230 	struct xfs_inode	*ip)
231 {
232 	struct inode		*inode = VFS_I(ip), *dir = NULL;
233 	struct dentry		*dentry, *parent;
234 
235 	dentry = d_find_alias(inode);
236 	if (!dentry)
237 		goto out;
238 
239 	parent = dget_parent(dentry);
240 	if (!parent)
241 		goto out_dput;
242 
243 	dir = igrab(d_inode(parent));
244 	dput(parent);
245 
246 out_dput:
247 	dput(dentry);
248 out:
249 	return dir ? XFS_I(dir) : NULL;
250 }
251 
252 /*
253  * Lookup the mru cache for an existing association. If one exists and we can
254  * use it, return with the agno and blen indicating that the allocation will
255  * proceed with that association.
256  *
257  * If we have no association, or we cannot use the current one and have to
258  * destroy it, return with blen = 0 and agno pointing at the next agno to try.
259  */
260 int
261 xfs_filestream_select_ag_mru(
262 	struct xfs_bmalloca	*ap,
263 	struct xfs_alloc_arg	*args,
264 	struct xfs_inode	*pip,
265 	xfs_agnumber_t		*agno,
266 	xfs_extlen_t		*blen)
267 {
268 	struct xfs_mount	*mp = ap->ip->i_mount;
269 	struct xfs_perag	*pag;
270 	struct xfs_mru_cache_elem *mru;
271 	int			error;
272 
273 	mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino);
274 	if (!mru)
275 		goto out_default_agno;
276 
277 	pag = container_of(mru, struct xfs_fstrm_item, mru)->pag;
278 	xfs_mru_cache_done(mp->m_filestream);
279 
280 	trace_xfs_filestream_lookup(mp, ap->ip->i_ino, pag->pag_agno);
281 
282 	ap->blkno = XFS_AGB_TO_FSB(args->mp, pag->pag_agno, 0);
283 	xfs_bmap_adjacent(ap);
284 
285 	error = xfs_bmap_longest_free_extent(pag, args->tp, blen);
286 	if (error) {
287 		if (error != -EAGAIN)
288 			return error;
289 		*blen = 0;
290 	}
291 
292 	/*
293 	 * We are done if there's still enough contiguous free space to succeed.
294 	 */
295 	*agno = pag->pag_agno;
296 	if (*blen >= args->maxlen)
297 		return 0;
298 
299 	/* Changing parent AG association now, so remove the existing one. */
300 	mru = xfs_mru_cache_remove(mp->m_filestream, pip->i_ino);
301 	if (mru) {
302 		struct xfs_fstrm_item *item =
303 			container_of(mru, struct xfs_fstrm_item, mru);
304 		*agno = (item->pag->pag_agno + 1) % mp->m_sb.sb_agcount;
305 		xfs_fstrm_free_func(mp, mru);
306 		return 0;
307 	}
308 
309 out_default_agno:
310 	if (xfs_is_inode32(mp)) {
311 		xfs_agnumber_t	 rotorstep = xfs_rotorstep;
312 		*agno = (mp->m_agfrotor / rotorstep) %
313 				mp->m_sb.sb_agcount;
314 		mp->m_agfrotor = (mp->m_agfrotor + 1) %
315 				 (mp->m_sb.sb_agcount * rotorstep);
316 		return 0;
317 	}
318 	*agno = XFS_INO_TO_AGNO(mp, pip->i_ino);
319 	return 0;
320 
321 }
322 
323 /*
324  * Search for an allocation group with a single extent large enough for
325  * the request.  If one isn't found, then adjust the minimum allocation
326  * size to the largest space found.
327  */
328 int
329 xfs_filestream_select_ag(
330 	struct xfs_bmalloca	*ap,
331 	struct xfs_alloc_arg	*args,
332 	xfs_extlen_t		*blen)
333 {
334 	struct xfs_mount	*mp = ap->ip->i_mount;
335 	struct xfs_inode	*pip = NULL;
336 	xfs_agnumber_t		agno;
337 	int			flags = 0;
338 	int			error;
339 
340 	args->total = ap->total;
341 	*blen = 0;
342 
343 	pip = xfs_filestream_get_parent(ap->ip);
344 	if (!pip) {
345 		agno = 0;
346 		goto out_select;
347 	}
348 
349 	error = xfs_filestream_select_ag_mru(ap, args, pip, &agno, blen);
350 	if (error || *blen >= args->maxlen)
351 		goto out_rele;
352 
353 	ap->blkno = XFS_AGB_TO_FSB(args->mp, agno, 0);
354 	xfs_bmap_adjacent(ap);
355 
356 	/*
357 	 * If there is very little free space before we start a filestreams
358 	 * allocation, we're almost guaranteed to fail to find a better AG with
359 	 * larger free space available so we don't even try.
360 	 */
361 	if (ap->tp->t_flags & XFS_TRANS_LOWMODE)
362 		goto out_select;
363 
364 	if (ap->datatype & XFS_ALLOC_USERDATA)
365 		flags |= XFS_PICK_USERDATA;
366 	if (ap->tp->t_flags & XFS_TRANS_LOWMODE)
367 		flags |= XFS_PICK_LOWSPACE;
368 
369 	*blen = ap->length;
370 	error = xfs_filestream_pick_ag(pip, &agno, flags, blen);
371 	if (agno == NULLAGNUMBER) {
372 		agno = 0;
373 		*blen = 0;
374 	}
375 
376 out_select:
377 	ap->blkno = XFS_AGB_TO_FSB(mp, agno, 0);
378 out_rele:
379 	xfs_irele(pip);
380 	return error;
381 
382 }
383 
384 void
385 xfs_filestream_deassociate(
386 	struct xfs_inode	*ip)
387 {
388 	xfs_mru_cache_delete(ip->i_mount->m_filestream, ip->i_ino);
389 }
390 
391 int
392 xfs_filestream_mount(
393 	xfs_mount_t	*mp)
394 {
395 	/*
396 	 * The filestream timer tunable is currently fixed within the range of
397 	 * one second to four minutes, with five seconds being the default.  The
398 	 * group count is somewhat arbitrary, but it'd be nice to adhere to the
399 	 * timer tunable to within about 10 percent.  This requires at least 10
400 	 * groups.
401 	 */
402 	return xfs_mru_cache_create(&mp->m_filestream, mp,
403 			xfs_fstrm_centisecs * 10, 10, xfs_fstrm_free_func);
404 }
405 
406 void
407 xfs_filestream_unmount(
408 	xfs_mount_t	*mp)
409 {
410 	xfs_mru_cache_destroy(mp->m_filestream);
411 }
412