xref: /openbmc/linux/fs/xfs/libxfs/xfs_ag_resv.c (revision fca3aa16)
1 /*
2  * Copyright (C) 2016 Oracle.  All Rights Reserved.
3  *
4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it would be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write the Free Software Foundation,
18  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
19  */
20 #include "xfs.h"
21 #include "xfs_fs.h"
22 #include "xfs_shared.h"
23 #include "xfs_format.h"
24 #include "xfs_log_format.h"
25 #include "xfs_trans_resv.h"
26 #include "xfs_sb.h"
27 #include "xfs_mount.h"
28 #include "xfs_defer.h"
29 #include "xfs_alloc.h"
30 #include "xfs_errortag.h"
31 #include "xfs_error.h"
32 #include "xfs_trace.h"
33 #include "xfs_cksum.h"
34 #include "xfs_trans.h"
35 #include "xfs_bit.h"
36 #include "xfs_bmap.h"
37 #include "xfs_bmap_btree.h"
38 #include "xfs_ag_resv.h"
39 #include "xfs_trans_space.h"
40 #include "xfs_rmap_btree.h"
41 #include "xfs_btree.h"
42 #include "xfs_refcount_btree.h"
43 #include "xfs_ialloc_btree.h"
44 
45 /*
46  * Per-AG Block Reservations
47  *
48  * For some kinds of allocation group metadata structures, it is advantageous
49  * to reserve a small number of blocks in each AG so that future expansions of
50  * that data structure do not encounter ENOSPC because errors during a btree
51  * split cause the filesystem to go offline.
52  *
53  * Prior to the introduction of reflink, this wasn't an issue because the free
54  * space btrees maintain a reserve of space (the AGFL) to handle any expansion
55  * that may be necessary; and allocations of other metadata (inodes, BMBT,
56  * dir/attr) aren't restricted to a single AG.  However, with reflink it is
57  * possible to allocate all the space in an AG, have subsequent reflink/CoW
58  * activity expand the refcount btree, and discover that there's no space left
59  * to handle that expansion.  Since we can calculate the maximum size of the
60  * refcount btree, we can reserve space for it and avoid ENOSPC.
61  *
62  * Handling per-AG reservations consists of three changes to the allocator's
63  * behavior:  First, because these reservations are always needed, we decrease
64  * the ag_max_usable counter to reflect the size of the AG after the reserved
65  * blocks are taken.  Second, the reservations must be reflected in the
66  * fdblocks count to maintain proper accounting.  Third, each AG must maintain
67  * its own reserved block counter so that we can calculate the amount of space
68  * that must remain free to maintain the reservations.  Fourth, the "remaining
69  * reserved blocks" count must be used when calculating the length of the
70  * longest free extent in an AG and to clamp maxlen in the per-AG allocation
71  * functions.  In other words, we maintain a virtual allocation via in-core
72  * accounting tricks so that we don't have to clean up after a crash. :)
73  *
74  * Reserved blocks can be managed by passing one of the enum xfs_ag_resv_type
75  * values via struct xfs_alloc_arg or directly to the xfs_free_extent
76  * function.  It might seem a little funny to maintain a reservoir of blocks
77  * to feed another reservoir, but the AGFL only holds enough blocks to get
78  * through the next transaction.  The per-AG reservation is to ensure (we
79  * hope) that each AG never runs out of blocks.  Each data structure wanting
80  * to use the reservation system should update ask/used in xfs_ag_resv_init.
81  */
82 
83 /*
84  * Are we critically low on blocks?  For now we'll define that as the number
85  * of blocks we can get our hands on being less than 10% of what we reserved
86  * or less than some arbitrary number (maximum btree height).
87  */
88 bool
89 xfs_ag_resv_critical(
90 	struct xfs_perag		*pag,
91 	enum xfs_ag_resv_type		type)
92 {
93 	xfs_extlen_t			avail;
94 	xfs_extlen_t			orig;
95 
96 	switch (type) {
97 	case XFS_AG_RESV_METADATA:
98 		avail = pag->pagf_freeblks - pag->pag_rmapbt_resv.ar_reserved;
99 		orig = pag->pag_meta_resv.ar_asked;
100 		break;
101 	case XFS_AG_RESV_RMAPBT:
102 		avail = pag->pagf_freeblks + pag->pagf_flcount -
103 			pag->pag_meta_resv.ar_reserved;
104 		orig = pag->pag_rmapbt_resv.ar_asked;
105 		break;
106 	default:
107 		ASSERT(0);
108 		return false;
109 	}
110 
111 	trace_xfs_ag_resv_critical(pag, type, avail);
112 
113 	/* Critically low if less than 10% or max btree height remains. */
114 	return XFS_TEST_ERROR(avail < orig / 10 || avail < XFS_BTREE_MAXLEVELS,
115 			pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL);
116 }
117 
118 /*
119  * How many blocks are reserved but not used, and therefore must not be
120  * allocated away?
121  */
122 xfs_extlen_t
123 xfs_ag_resv_needed(
124 	struct xfs_perag		*pag,
125 	enum xfs_ag_resv_type		type)
126 {
127 	xfs_extlen_t			len;
128 
129 	len = pag->pag_meta_resv.ar_reserved + pag->pag_rmapbt_resv.ar_reserved;
130 	switch (type) {
131 	case XFS_AG_RESV_METADATA:
132 	case XFS_AG_RESV_RMAPBT:
133 		len -= xfs_perag_resv(pag, type)->ar_reserved;
134 		break;
135 	case XFS_AG_RESV_NONE:
136 		/* empty */
137 		break;
138 	default:
139 		ASSERT(0);
140 	}
141 
142 	trace_xfs_ag_resv_needed(pag, type, len);
143 
144 	return len;
145 }
146 
147 /* Clean out a reservation */
148 static int
149 __xfs_ag_resv_free(
150 	struct xfs_perag		*pag,
151 	enum xfs_ag_resv_type		type)
152 {
153 	struct xfs_ag_resv		*resv;
154 	xfs_extlen_t			oldresv;
155 	int				error;
156 
157 	trace_xfs_ag_resv_free(pag, type, 0);
158 
159 	resv = xfs_perag_resv(pag, type);
160 	if (pag->pag_agno == 0)
161 		pag->pag_mount->m_ag_max_usable += resv->ar_asked;
162 	/*
163 	 * RMAPBT blocks come from the AGFL and AGFL blocks are always
164 	 * considered "free", so whatever was reserved at mount time must be
165 	 * given back at umount.
166 	 */
167 	if (type == XFS_AG_RESV_RMAPBT)
168 		oldresv = resv->ar_orig_reserved;
169 	else
170 		oldresv = resv->ar_reserved;
171 	error = xfs_mod_fdblocks(pag->pag_mount, oldresv, true);
172 	resv->ar_reserved = 0;
173 	resv->ar_asked = 0;
174 
175 	if (error)
176 		trace_xfs_ag_resv_free_error(pag->pag_mount, pag->pag_agno,
177 				error, _RET_IP_);
178 	return error;
179 }
180 
181 /* Free a per-AG reservation. */
182 int
183 xfs_ag_resv_free(
184 	struct xfs_perag		*pag)
185 {
186 	int				error;
187 	int				err2;
188 
189 	error = __xfs_ag_resv_free(pag, XFS_AG_RESV_RMAPBT);
190 	err2 = __xfs_ag_resv_free(pag, XFS_AG_RESV_METADATA);
191 	if (err2 && !error)
192 		error = err2;
193 	return error;
194 }
195 
196 static int
197 __xfs_ag_resv_init(
198 	struct xfs_perag		*pag,
199 	enum xfs_ag_resv_type		type,
200 	xfs_extlen_t			ask,
201 	xfs_extlen_t			used)
202 {
203 	struct xfs_mount		*mp = pag->pag_mount;
204 	struct xfs_ag_resv		*resv;
205 	int				error;
206 	xfs_extlen_t			reserved;
207 
208 	if (used > ask)
209 		ask = used;
210 	reserved = ask - used;
211 
212 	error = xfs_mod_fdblocks(mp, -(int64_t)reserved, true);
213 	if (error) {
214 		trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno,
215 				error, _RET_IP_);
216 		xfs_warn(mp,
217 "Per-AG reservation for AG %u failed.  Filesystem may run out of space.",
218 				pag->pag_agno);
219 		return error;
220 	}
221 
222 	/*
223 	 * Reduce the maximum per-AG allocation length by however much we're
224 	 * trying to reserve for an AG.  Since this is a filesystem-wide
225 	 * counter, we only make the adjustment for AG 0.  This assumes that
226 	 * there aren't any AGs hungrier for per-AG reservation than AG 0.
227 	 */
228 	if (pag->pag_agno == 0)
229 		mp->m_ag_max_usable -= ask;
230 
231 	resv = xfs_perag_resv(pag, type);
232 	resv->ar_asked = ask;
233 	resv->ar_reserved = resv->ar_orig_reserved = reserved;
234 
235 	trace_xfs_ag_resv_init(pag, type, ask);
236 	return 0;
237 }
238 
239 /* Create a per-AG block reservation. */
240 int
241 xfs_ag_resv_init(
242 	struct xfs_perag		*pag)
243 {
244 	struct xfs_mount		*mp = pag->pag_mount;
245 	xfs_agnumber_t			agno = pag->pag_agno;
246 	xfs_extlen_t			ask;
247 	xfs_extlen_t			used;
248 	int				error = 0;
249 
250 	/* Create the metadata reservation. */
251 	if (pag->pag_meta_resv.ar_asked == 0) {
252 		ask = used = 0;
253 
254 		error = xfs_refcountbt_calc_reserves(mp, agno, &ask, &used);
255 		if (error)
256 			goto out;
257 
258 		error = xfs_finobt_calc_reserves(mp, agno, &ask, &used);
259 		if (error)
260 			goto out;
261 
262 		error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
263 				ask, used);
264 		if (error) {
265 			/*
266 			 * Because we didn't have per-AG reservations when the
267 			 * finobt feature was added we might not be able to
268 			 * reserve all needed blocks.  Warn and fall back to the
269 			 * old and potentially buggy code in that case, but
270 			 * ensure we do have the reservation for the refcountbt.
271 			 */
272 			ask = used = 0;
273 
274 			mp->m_inotbt_nores = true;
275 
276 			error = xfs_refcountbt_calc_reserves(mp, agno, &ask,
277 					&used);
278 			if (error)
279 				goto out;
280 
281 			error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
282 					ask, used);
283 			if (error)
284 				goto out;
285 		}
286 	}
287 
288 	/* Create the RMAPBT metadata reservation */
289 	if (pag->pag_rmapbt_resv.ar_asked == 0) {
290 		ask = used = 0;
291 
292 		error = xfs_rmapbt_calc_reserves(mp, agno, &ask, &used);
293 		if (error)
294 			goto out;
295 
296 		error = __xfs_ag_resv_init(pag, XFS_AG_RESV_RMAPBT, ask, used);
297 		if (error)
298 			goto out;
299 	}
300 
301 #ifdef DEBUG
302 	/* need to read in the AGF for the ASSERT below to work */
303 	error = xfs_alloc_pagf_init(pag->pag_mount, NULL, pag->pag_agno, 0);
304 	if (error)
305 		return error;
306 
307 	ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
308 	       xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved <=
309 	       pag->pagf_freeblks + pag->pagf_flcount);
310 #endif
311 out:
312 	return error;
313 }
314 
315 /* Allocate a block from the reservation. */
316 void
317 xfs_ag_resv_alloc_extent(
318 	struct xfs_perag		*pag,
319 	enum xfs_ag_resv_type		type,
320 	struct xfs_alloc_arg		*args)
321 {
322 	struct xfs_ag_resv		*resv;
323 	xfs_extlen_t			len;
324 	uint				field;
325 
326 	trace_xfs_ag_resv_alloc_extent(pag, type, args->len);
327 
328 	switch (type) {
329 	case XFS_AG_RESV_AGFL:
330 		return;
331 	case XFS_AG_RESV_METADATA:
332 	case XFS_AG_RESV_RMAPBT:
333 		resv = xfs_perag_resv(pag, type);
334 		break;
335 	default:
336 		ASSERT(0);
337 		/* fall through */
338 	case XFS_AG_RESV_NONE:
339 		field = args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS :
340 				       XFS_TRANS_SB_FDBLOCKS;
341 		xfs_trans_mod_sb(args->tp, field, -(int64_t)args->len);
342 		return;
343 	}
344 
345 	len = min_t(xfs_extlen_t, args->len, resv->ar_reserved);
346 	resv->ar_reserved -= len;
347 	if (type == XFS_AG_RESV_RMAPBT)
348 		return;
349 	/* Allocations of reserved blocks only need on-disk sb updates... */
350 	xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS, -(int64_t)len);
351 	/* ...but non-reserved blocks need in-core and on-disk updates. */
352 	if (args->len > len)
353 		xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_FDBLOCKS,
354 				-((int64_t)args->len - len));
355 }
356 
357 /* Free a block to the reservation. */
358 void
359 xfs_ag_resv_free_extent(
360 	struct xfs_perag		*pag,
361 	enum xfs_ag_resv_type		type,
362 	struct xfs_trans		*tp,
363 	xfs_extlen_t			len)
364 {
365 	xfs_extlen_t			leftover;
366 	struct xfs_ag_resv		*resv;
367 
368 	trace_xfs_ag_resv_free_extent(pag, type, len);
369 
370 	switch (type) {
371 	case XFS_AG_RESV_AGFL:
372 		return;
373 	case XFS_AG_RESV_METADATA:
374 	case XFS_AG_RESV_RMAPBT:
375 		resv = xfs_perag_resv(pag, type);
376 		break;
377 	default:
378 		ASSERT(0);
379 		/* fall through */
380 	case XFS_AG_RESV_NONE:
381 		xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (int64_t)len);
382 		return;
383 	}
384 
385 	leftover = min_t(xfs_extlen_t, len, resv->ar_asked - resv->ar_reserved);
386 	resv->ar_reserved += leftover;
387 	if (type == XFS_AG_RESV_RMAPBT)
388 		return;
389 	/* Freeing into the reserved pool only requires on-disk update... */
390 	xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, len);
391 	/* ...but freeing beyond that requires in-core and on-disk update. */
392 	if (len > leftover)
393 		xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len - leftover);
394 }
395