xref: /openbmc/linux/fs/xfs/xfs_filestream.c (revision 3e43877a)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2006-2007 Silicon Graphics, Inc.
4  * Copyright (c) 2014 Christoph Hellwig.
5  * All Rights Reserved.
6  */
7 #include "xfs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_inode.h"
14 #include "xfs_bmap.h"
15 #include "xfs_bmap_util.h"
16 #include "xfs_alloc.h"
17 #include "xfs_mru_cache.h"
18 #include "xfs_trace.h"
19 #include "xfs_ag.h"
20 #include "xfs_ag_resv.h"
21 #include "xfs_trans.h"
22 #include "xfs_filestream.h"
23 
24 struct xfs_fstrm_item {
25 	struct xfs_mru_cache_elem	mru;
26 	xfs_agnumber_t			ag; /* AG in use for this directory */
27 };
28 
29 enum xfs_fstrm_alloc {
30 	XFS_PICK_USERDATA = 1,
31 	XFS_PICK_LOWSPACE = 2,
32 };
33 
34 /*
35  * Allocation group filestream associations are tracked with per-ag atomic
36  * counters.  These counters allow xfs_filestream_pick_ag() to tell whether a
37  * particular AG already has active filestreams associated with it.
38  */
39 int
40 xfs_filestream_peek_ag(
41 	xfs_mount_t	*mp,
42 	xfs_agnumber_t	agno)
43 {
44 	struct xfs_perag *pag;
45 	int		ret;
46 
47 	pag = xfs_perag_get(mp, agno);
48 	ret = atomic_read(&pag->pagf_fstrms);
49 	xfs_perag_put(pag);
50 	return ret;
51 }
52 
53 static int
54 xfs_filestream_get_ag(
55 	xfs_mount_t	*mp,
56 	xfs_agnumber_t	agno)
57 {
58 	struct xfs_perag *pag;
59 	int		ret;
60 
61 	pag = xfs_perag_get(mp, agno);
62 	ret = atomic_inc_return(&pag->pagf_fstrms);
63 	xfs_perag_put(pag);
64 	return ret;
65 }
66 
67 static void
68 xfs_filestream_put_ag(
69 	xfs_mount_t	*mp,
70 	xfs_agnumber_t	agno)
71 {
72 	struct xfs_perag *pag;
73 
74 	pag = xfs_perag_get(mp, agno);
75 	atomic_dec(&pag->pagf_fstrms);
76 	xfs_perag_put(pag);
77 }
78 
79 static void
80 xfs_fstrm_free_func(
81 	void			*data,
82 	struct xfs_mru_cache_elem *mru)
83 {
84 	struct xfs_mount	*mp = data;
85 	struct xfs_fstrm_item	*item =
86 		container_of(mru, struct xfs_fstrm_item, mru);
87 
88 	xfs_filestream_put_ag(mp, item->ag);
89 	trace_xfs_filestream_free(mp, mru->key, item->ag);
90 
91 	kmem_free(item);
92 }
93 
94 /*
95  * Scan the AGs starting at startag looking for an AG that isn't in use and has
96  * at least minlen blocks free.
97  */
98 static int
99 xfs_filestream_pick_ag(
100 	struct xfs_inode	*ip,
101 	xfs_agnumber_t		*agp,
102 	int			flags,
103 	xfs_extlen_t		*longest)
104 {
105 	struct xfs_mount	*mp = ip->i_mount;
106 	struct xfs_fstrm_item	*item;
107 	struct xfs_perag	*pag;
108 	xfs_extlen_t		minlen = *longest;
109 	xfs_extlen_t		free = 0, minfree, maxfree = 0;
110 	xfs_agnumber_t		startag = *agp;
111 	xfs_agnumber_t		ag = startag;
112 	xfs_agnumber_t		max_ag = NULLAGNUMBER;
113 	int			err, trylock, nscan;
114 
115 	ASSERT(S_ISDIR(VFS_I(ip)->i_mode));
116 
117 	/* 2% of an AG's blocks must be free for it to be chosen. */
118 	minfree = mp->m_sb.sb_agblocks / 50;
119 
120 	*agp = NULLAGNUMBER;
121 
122 	/* For the first pass, don't sleep trying to init the per-AG. */
123 	trylock = XFS_ALLOC_FLAG_TRYLOCK;
124 
125 	for (nscan = 0; 1; nscan++) {
126 		trace_xfs_filestream_scan(mp, ip->i_ino, ag);
127 
128 		pag = xfs_perag_get(mp, ag);
129 		*longest = 0;
130 		err = xfs_bmap_longest_free_extent(pag, NULL, longest);
131 		if (err) {
132 			xfs_perag_put(pag);
133 			if (err != -EAGAIN)
134 				return err;
135 			/* Couldn't lock the AGF, skip this AG. */
136 			goto next_ag;
137 		}
138 
139 		/* Keep track of the AG with the most free blocks. */
140 		if (pag->pagf_freeblks > maxfree) {
141 			maxfree = pag->pagf_freeblks;
142 			max_ag = ag;
143 		}
144 
145 		/*
146 		 * The AG reference count does two things: it enforces mutual
147 		 * exclusion when examining the suitability of an AG in this
148 		 * loop, and it guards against two filestreams being established
149 		 * in the same AG as each other.
150 		 */
151 		if (xfs_filestream_get_ag(mp, ag) > 1) {
152 			xfs_filestream_put_ag(mp, ag);
153 			goto next_ag;
154 		}
155 
156 		if (((minlen && *longest >= minlen) ||
157 		     (!minlen && pag->pagf_freeblks >= minfree)) &&
158 		    (!xfs_perag_prefers_metadata(pag) ||
159 		     !(flags & XFS_PICK_USERDATA) ||
160 		     (flags & XFS_PICK_LOWSPACE))) {
161 
162 			/* Break out, retaining the reference on the AG. */
163 			free = pag->pagf_freeblks;
164 			xfs_perag_put(pag);
165 			*agp = ag;
166 			break;
167 		}
168 
169 		/* Drop the reference on this AG, it's not usable. */
170 		xfs_filestream_put_ag(mp, ag);
171 next_ag:
172 		xfs_perag_put(pag);
173 		/* Move to the next AG, wrapping to AG 0 if necessary. */
174 		if (++ag >= mp->m_sb.sb_agcount)
175 			ag = 0;
176 
177 		/* If a full pass of the AGs hasn't been done yet, continue. */
178 		if (ag != startag)
179 			continue;
180 
181 		/* Allow sleeping in xfs_alloc_read_agf() on the 2nd pass. */
182 		if (trylock != 0) {
183 			trylock = 0;
184 			continue;
185 		}
186 
187 		/* Finally, if lowspace wasn't set, set it for the 3rd pass. */
188 		if (!(flags & XFS_PICK_LOWSPACE)) {
189 			flags |= XFS_PICK_LOWSPACE;
190 			continue;
191 		}
192 
193 		/*
194 		 * Take the AG with the most free space, regardless of whether
195 		 * it's already in use by another filestream.
196 		 */
197 		if (max_ag != NULLAGNUMBER) {
198 			xfs_filestream_get_ag(mp, max_ag);
199 			free = maxfree;
200 			*agp = max_ag;
201 			break;
202 		}
203 
204 		/* take AG 0 if none matched */
205 		trace_xfs_filestream_pick(ip, *agp, free, nscan);
206 		*agp = 0;
207 		return 0;
208 	}
209 
210 	trace_xfs_filestream_pick(ip, *agp, free, nscan);
211 
212 	if (*agp == NULLAGNUMBER)
213 		return 0;
214 
215 	err = -ENOMEM;
216 	item = kmem_alloc(sizeof(*item), KM_MAYFAIL);
217 	if (!item)
218 		goto out_put_ag;
219 
220 	item->ag = *agp;
221 
222 	err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru);
223 	if (err) {
224 		if (err == -EEXIST)
225 			err = 0;
226 		goto out_free_item;
227 	}
228 
229 	return 0;
230 
231 out_free_item:
232 	kmem_free(item);
233 out_put_ag:
234 	xfs_filestream_put_ag(mp, *agp);
235 	return err;
236 }
237 
238 static struct xfs_inode *
239 xfs_filestream_get_parent(
240 	struct xfs_inode	*ip)
241 {
242 	struct inode		*inode = VFS_I(ip), *dir = NULL;
243 	struct dentry		*dentry, *parent;
244 
245 	dentry = d_find_alias(inode);
246 	if (!dentry)
247 		goto out;
248 
249 	parent = dget_parent(dentry);
250 	if (!parent)
251 		goto out_dput;
252 
253 	dir = igrab(d_inode(parent));
254 	dput(parent);
255 
256 out_dput:
257 	dput(dentry);
258 out:
259 	return dir ? XFS_I(dir) : NULL;
260 }
261 
262 /*
263  * Search for an allocation group with a single extent large enough for
264  * the request.  If one isn't found, then the largest available free extent is
265  * returned as the best length possible.
266  */
267 int
268 xfs_filestream_select_ag(
269 	struct xfs_bmalloca	*ap,
270 	struct xfs_alloc_arg	*args,
271 	xfs_extlen_t		*blen)
272 {
273 	struct xfs_mount	*mp = ap->ip->i_mount;
274 	struct xfs_perag	*pag;
275 	struct xfs_inode	*pip = NULL;
276 	xfs_agnumber_t		agno = NULLAGNUMBER;
277 	struct xfs_mru_cache_elem *mru;
278 	int			flags = 0;
279 	int			error = 0;
280 
281 	args->total = ap->total;
282 	*blen = 0;
283 
284 	pip = xfs_filestream_get_parent(ap->ip);
285 	if (!pip) {
286 		agno = 0;
287 		goto out_select;
288 	}
289 
290 	mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino);
291 	if (mru) {
292 		agno = container_of(mru, struct xfs_fstrm_item, mru)->ag;
293 		xfs_mru_cache_done(mp->m_filestream);
294 		mru = NULL;
295 
296 		trace_xfs_filestream_lookup(mp, ap->ip->i_ino, agno);
297 		xfs_irele(pip);
298 
299 		ap->blkno = XFS_AGB_TO_FSB(args->mp, agno, 0);
300 		xfs_bmap_adjacent(ap);
301 
302 		pag = xfs_perag_grab(mp, agno);
303 		if (pag) {
304 			error = xfs_bmap_longest_free_extent(pag, args->tp, blen);
305 			xfs_perag_rele(pag);
306 			if (error) {
307 				if (error != -EAGAIN)
308 					goto out_error;
309 				*blen = 0;
310 			}
311 		}
312 		if (*blen >= args->maxlen)
313 			goto out_select;
314 	} else if (xfs_is_inode32(mp)) {
315 		xfs_agnumber_t	 rotorstep = xfs_rotorstep;
316 		agno = (mp->m_agfrotor / rotorstep) %
317 				mp->m_sb.sb_agcount;
318 		mp->m_agfrotor = (mp->m_agfrotor + 1) %
319 				 (mp->m_sb.sb_agcount * rotorstep);
320 	} else {
321 		agno = XFS_INO_TO_AGNO(mp, pip->i_ino);
322 	}
323 
324 	/* Changing parent AG association now, so remove the existing one. */
325 	mru = xfs_mru_cache_remove(mp->m_filestream, pip->i_ino);
326 	if (mru) {
327 		struct xfs_fstrm_item *item =
328 			container_of(mru, struct xfs_fstrm_item, mru);
329 		agno = (item->ag + 1) % mp->m_sb.sb_agcount;
330 		xfs_fstrm_free_func(mp, mru);
331 	}
332 	ap->blkno = XFS_AGB_TO_FSB(args->mp, agno, 0);
333 	xfs_bmap_adjacent(ap);
334 
335 	/*
336 	 * If there is very little free space before we start a filestreams
337 	 * allocation, we're almost guaranteed to fail to find a better AG with
338 	 * larger free space available so we don't even try.
339 	 */
340 	if (ap->tp->t_flags & XFS_TRANS_LOWMODE)
341 		goto out_select;
342 
343 	if (ap->datatype & XFS_ALLOC_USERDATA)
344 		flags |= XFS_PICK_USERDATA;
345 	if (ap->tp->t_flags & XFS_TRANS_LOWMODE)
346 		flags |= XFS_PICK_LOWSPACE;
347 
348 	*blen = ap->length;
349 	error = xfs_filestream_pick_ag(pip, &agno, flags, blen);
350 	if (error)
351 		goto out_error;
352 	if (agno == NULLAGNUMBER) {
353 		agno = 0;
354 		*blen = 0;
355 	}
356 
357 out_select:
358 	ap->blkno = XFS_AGB_TO_FSB(mp, agno, 0);
359 out_error:
360 	xfs_irele(pip);
361 	return error;
362 
363 }
364 
365 void
366 xfs_filestream_deassociate(
367 	struct xfs_inode	*ip)
368 {
369 	xfs_mru_cache_delete(ip->i_mount->m_filestream, ip->i_ino);
370 }
371 
372 int
373 xfs_filestream_mount(
374 	xfs_mount_t	*mp)
375 {
376 	/*
377 	 * The filestream timer tunable is currently fixed within the range of
378 	 * one second to four minutes, with five seconds being the default.  The
379 	 * group count is somewhat arbitrary, but it'd be nice to adhere to the
380 	 * timer tunable to within about 10 percent.  This requires at least 10
381 	 * groups.
382 	 */
383 	return xfs_mru_cache_create(&mp->m_filestream, mp,
384 			xfs_fstrm_centisecs * 10, 10, xfs_fstrm_free_func);
385 }
386 
387 void
388 xfs_filestream_unmount(
389 	xfs_mount_t	*mp)
390 {
391 	xfs_mru_cache_destroy(mp->m_filestream);
392 }
393