xref: /openbmc/linux/fs/xfs/libxfs/xfs_alloc_btree.c (revision 56370ea6)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
4  * All Rights Reserved.
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_btree.h"
14 #include "xfs_btree_staging.h"
15 #include "xfs_alloc_btree.h"
16 #include "xfs_alloc.h"
17 #include "xfs_extent_busy.h"
18 #include "xfs_error.h"
19 #include "xfs_trace.h"
20 #include "xfs_trans.h"
21 #include "xfs_ag.h"
22 
23 
24 STATIC struct xfs_btree_cur *
25 xfs_allocbt_dup_cursor(
26 	struct xfs_btree_cur	*cur)
27 {
28 	return xfs_allocbt_init_cursor(cur->bc_mp, cur->bc_tp,
29 			cur->bc_ag.agbp, cur->bc_ag.pag, cur->bc_btnum);
30 }
31 
32 STATIC void
33 xfs_allocbt_set_root(
34 	struct xfs_btree_cur		*cur,
35 	const union xfs_btree_ptr	*ptr,
36 	int				inc)
37 {
38 	struct xfs_buf		*agbp = cur->bc_ag.agbp;
39 	struct xfs_agf		*agf = agbp->b_addr;
40 	int			btnum = cur->bc_btnum;
41 
42 	ASSERT(ptr->s != 0);
43 
44 	agf->agf_roots[btnum] = ptr->s;
45 	be32_add_cpu(&agf->agf_levels[btnum], inc);
46 	cur->bc_ag.pag->pagf_levels[btnum] += inc;
47 
48 	xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
49 }
50 
51 STATIC int
52 xfs_allocbt_alloc_block(
53 	struct xfs_btree_cur		*cur,
54 	const union xfs_btree_ptr	*start,
55 	union xfs_btree_ptr		*new,
56 	int				*stat)
57 {
58 	int			error;
59 	xfs_agblock_t		bno;
60 
61 	/* Allocate the new block from the freelist. If we can't, give up.  */
62 	error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_ag.agbp,
63 				       &bno, 1);
64 	if (error)
65 		return error;
66 
67 	if (bno == NULLAGBLOCK) {
68 		*stat = 0;
69 		return 0;
70 	}
71 
72 	atomic64_inc(&cur->bc_mp->m_allocbt_blks);
73 	xfs_extent_busy_reuse(cur->bc_mp, cur->bc_ag.agbp->b_pag, bno, 1, false);
74 
75 	new->s = cpu_to_be32(bno);
76 
77 	*stat = 1;
78 	return 0;
79 }
80 
81 STATIC int
82 xfs_allocbt_free_block(
83 	struct xfs_btree_cur	*cur,
84 	struct xfs_buf		*bp)
85 {
86 	struct xfs_buf		*agbp = cur->bc_ag.agbp;
87 	xfs_agblock_t		bno;
88 	int			error;
89 
90 	bno = xfs_daddr_to_agbno(cur->bc_mp, xfs_buf_daddr(bp));
91 	error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
92 	if (error)
93 		return error;
94 
95 	atomic64_dec(&cur->bc_mp->m_allocbt_blks);
96 	xfs_extent_busy_insert(cur->bc_tp, agbp->b_pag, bno, 1,
97 			      XFS_EXTENT_BUSY_SKIP_DISCARD);
98 	return 0;
99 }
100 
101 /*
102  * Update the longest extent in the AGF
103  */
104 STATIC void
105 xfs_allocbt_update_lastrec(
106 	struct xfs_btree_cur		*cur,
107 	const struct xfs_btree_block	*block,
108 	const union xfs_btree_rec	*rec,
109 	int				ptr,
110 	int				reason)
111 {
112 	struct xfs_agf		*agf = cur->bc_ag.agbp->b_addr;
113 	struct xfs_perag	*pag;
114 	__be32			len;
115 	int			numrecs;
116 
117 	ASSERT(cur->bc_btnum == XFS_BTNUM_CNT);
118 
119 	switch (reason) {
120 	case LASTREC_UPDATE:
121 		/*
122 		 * If this is the last leaf block and it's the last record,
123 		 * then update the size of the longest extent in the AG.
124 		 */
125 		if (ptr != xfs_btree_get_numrecs(block))
126 			return;
127 		len = rec->alloc.ar_blockcount;
128 		break;
129 	case LASTREC_INSREC:
130 		if (be32_to_cpu(rec->alloc.ar_blockcount) <=
131 		    be32_to_cpu(agf->agf_longest))
132 			return;
133 		len = rec->alloc.ar_blockcount;
134 		break;
135 	case LASTREC_DELREC:
136 		numrecs = xfs_btree_get_numrecs(block);
137 		if (ptr <= numrecs)
138 			return;
139 		ASSERT(ptr == numrecs + 1);
140 
141 		if (numrecs) {
142 			xfs_alloc_rec_t *rrp;
143 
144 			rrp = XFS_ALLOC_REC_ADDR(cur->bc_mp, block, numrecs);
145 			len = rrp->ar_blockcount;
146 		} else {
147 			len = 0;
148 		}
149 
150 		break;
151 	default:
152 		ASSERT(0);
153 		return;
154 	}
155 
156 	agf->agf_longest = len;
157 	pag = cur->bc_ag.agbp->b_pag;
158 	pag->pagf_longest = be32_to_cpu(len);
159 	xfs_alloc_log_agf(cur->bc_tp, cur->bc_ag.agbp, XFS_AGF_LONGEST);
160 }
161 
162 STATIC int
163 xfs_allocbt_get_minrecs(
164 	struct xfs_btree_cur	*cur,
165 	int			level)
166 {
167 	return cur->bc_mp->m_alloc_mnr[level != 0];
168 }
169 
170 STATIC int
171 xfs_allocbt_get_maxrecs(
172 	struct xfs_btree_cur	*cur,
173 	int			level)
174 {
175 	return cur->bc_mp->m_alloc_mxr[level != 0];
176 }
177 
178 STATIC void
179 xfs_allocbt_init_key_from_rec(
180 	union xfs_btree_key		*key,
181 	const union xfs_btree_rec	*rec)
182 {
183 	key->alloc.ar_startblock = rec->alloc.ar_startblock;
184 	key->alloc.ar_blockcount = rec->alloc.ar_blockcount;
185 }
186 
187 STATIC void
188 xfs_bnobt_init_high_key_from_rec(
189 	union xfs_btree_key		*key,
190 	const union xfs_btree_rec	*rec)
191 {
192 	__u32				x;
193 
194 	x = be32_to_cpu(rec->alloc.ar_startblock);
195 	x += be32_to_cpu(rec->alloc.ar_blockcount) - 1;
196 	key->alloc.ar_startblock = cpu_to_be32(x);
197 	key->alloc.ar_blockcount = 0;
198 }
199 
200 STATIC void
201 xfs_cntbt_init_high_key_from_rec(
202 	union xfs_btree_key		*key,
203 	const union xfs_btree_rec	*rec)
204 {
205 	key->alloc.ar_blockcount = rec->alloc.ar_blockcount;
206 	key->alloc.ar_startblock = 0;
207 }
208 
209 STATIC void
210 xfs_allocbt_init_rec_from_cur(
211 	struct xfs_btree_cur	*cur,
212 	union xfs_btree_rec	*rec)
213 {
214 	rec->alloc.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock);
215 	rec->alloc.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount);
216 }
217 
218 STATIC void
219 xfs_allocbt_init_ptr_from_cur(
220 	struct xfs_btree_cur	*cur,
221 	union xfs_btree_ptr	*ptr)
222 {
223 	struct xfs_agf		*agf = cur->bc_ag.agbp->b_addr;
224 
225 	ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno));
226 
227 	ptr->s = agf->agf_roots[cur->bc_btnum];
228 }
229 
230 STATIC int64_t
231 xfs_bnobt_key_diff(
232 	struct xfs_btree_cur		*cur,
233 	const union xfs_btree_key	*key)
234 {
235 	struct xfs_alloc_rec_incore	*rec = &cur->bc_rec.a;
236 	const struct xfs_alloc_rec	*kp = &key->alloc;
237 
238 	return (int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
239 }
240 
241 STATIC int64_t
242 xfs_cntbt_key_diff(
243 	struct xfs_btree_cur		*cur,
244 	const union xfs_btree_key	*key)
245 {
246 	struct xfs_alloc_rec_incore	*rec = &cur->bc_rec.a;
247 	const struct xfs_alloc_rec	*kp = &key->alloc;
248 	int64_t				diff;
249 
250 	diff = (int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount;
251 	if (diff)
252 		return diff;
253 
254 	return (int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
255 }
256 
257 STATIC int64_t
258 xfs_bnobt_diff_two_keys(
259 	struct xfs_btree_cur		*cur,
260 	const union xfs_btree_key	*k1,
261 	const union xfs_btree_key	*k2)
262 {
263 	return (int64_t)be32_to_cpu(k1->alloc.ar_startblock) -
264 			  be32_to_cpu(k2->alloc.ar_startblock);
265 }
266 
267 STATIC int64_t
268 xfs_cntbt_diff_two_keys(
269 	struct xfs_btree_cur		*cur,
270 	const union xfs_btree_key	*k1,
271 	const union xfs_btree_key	*k2)
272 {
273 	int64_t				diff;
274 
275 	diff =  be32_to_cpu(k1->alloc.ar_blockcount) -
276 		be32_to_cpu(k2->alloc.ar_blockcount);
277 	if (diff)
278 		return diff;
279 
280 	return  be32_to_cpu(k1->alloc.ar_startblock) -
281 		be32_to_cpu(k2->alloc.ar_startblock);
282 }
283 
284 static xfs_failaddr_t
285 xfs_allocbt_verify(
286 	struct xfs_buf		*bp)
287 {
288 	struct xfs_mount	*mp = bp->b_mount;
289 	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
290 	struct xfs_perag	*pag = bp->b_pag;
291 	xfs_failaddr_t		fa;
292 	unsigned int		level;
293 	xfs_btnum_t		btnum = XFS_BTNUM_BNOi;
294 
295 	if (!xfs_verify_magic(bp, block->bb_magic))
296 		return __this_address;
297 
298 	if (xfs_has_crc(mp)) {
299 		fa = xfs_btree_sblock_v5hdr_verify(bp);
300 		if (fa)
301 			return fa;
302 	}
303 
304 	/*
305 	 * The perag may not be attached during grow operations or fully
306 	 * initialized from the AGF during log recovery. Therefore we can only
307 	 * check against maximum tree depth from those contexts.
308 	 *
309 	 * Otherwise check against the per-tree limit. Peek at one of the
310 	 * verifier magic values to determine the type of tree we're verifying
311 	 * against.
312 	 */
313 	level = be16_to_cpu(block->bb_level);
314 	if (bp->b_ops->magic[0] == cpu_to_be32(XFS_ABTC_MAGIC))
315 		btnum = XFS_BTNUM_CNTi;
316 	if (pag && pag->pagf_init) {
317 		if (level >= pag->pagf_levels[btnum])
318 			return __this_address;
319 	} else if (level >= mp->m_ag_maxlevels)
320 		return __this_address;
321 
322 	return xfs_btree_sblock_verify(bp, mp->m_alloc_mxr[level != 0]);
323 }
324 
325 static void
326 xfs_allocbt_read_verify(
327 	struct xfs_buf	*bp)
328 {
329 	xfs_failaddr_t	fa;
330 
331 	if (!xfs_btree_sblock_verify_crc(bp))
332 		xfs_verifier_error(bp, -EFSBADCRC, __this_address);
333 	else {
334 		fa = xfs_allocbt_verify(bp);
335 		if (fa)
336 			xfs_verifier_error(bp, -EFSCORRUPTED, fa);
337 	}
338 
339 	if (bp->b_error)
340 		trace_xfs_btree_corrupt(bp, _RET_IP_);
341 }
342 
343 static void
344 xfs_allocbt_write_verify(
345 	struct xfs_buf	*bp)
346 {
347 	xfs_failaddr_t	fa;
348 
349 	fa = xfs_allocbt_verify(bp);
350 	if (fa) {
351 		trace_xfs_btree_corrupt(bp, _RET_IP_);
352 		xfs_verifier_error(bp, -EFSCORRUPTED, fa);
353 		return;
354 	}
355 	xfs_btree_sblock_calc_crc(bp);
356 
357 }
358 
359 const struct xfs_buf_ops xfs_bnobt_buf_ops = {
360 	.name = "xfs_bnobt",
361 	.magic = { cpu_to_be32(XFS_ABTB_MAGIC),
362 		   cpu_to_be32(XFS_ABTB_CRC_MAGIC) },
363 	.verify_read = xfs_allocbt_read_verify,
364 	.verify_write = xfs_allocbt_write_verify,
365 	.verify_struct = xfs_allocbt_verify,
366 };
367 
368 const struct xfs_buf_ops xfs_cntbt_buf_ops = {
369 	.name = "xfs_cntbt",
370 	.magic = { cpu_to_be32(XFS_ABTC_MAGIC),
371 		   cpu_to_be32(XFS_ABTC_CRC_MAGIC) },
372 	.verify_read = xfs_allocbt_read_verify,
373 	.verify_write = xfs_allocbt_write_verify,
374 	.verify_struct = xfs_allocbt_verify,
375 };
376 
377 STATIC int
378 xfs_bnobt_keys_inorder(
379 	struct xfs_btree_cur		*cur,
380 	const union xfs_btree_key	*k1,
381 	const union xfs_btree_key	*k2)
382 {
383 	return be32_to_cpu(k1->alloc.ar_startblock) <
384 	       be32_to_cpu(k2->alloc.ar_startblock);
385 }
386 
387 STATIC int
388 xfs_bnobt_recs_inorder(
389 	struct xfs_btree_cur		*cur,
390 	const union xfs_btree_rec	*r1,
391 	const union xfs_btree_rec	*r2)
392 {
393 	return be32_to_cpu(r1->alloc.ar_startblock) +
394 		be32_to_cpu(r1->alloc.ar_blockcount) <=
395 		be32_to_cpu(r2->alloc.ar_startblock);
396 }
397 
398 STATIC int
399 xfs_cntbt_keys_inorder(
400 	struct xfs_btree_cur		*cur,
401 	const union xfs_btree_key	*k1,
402 	const union xfs_btree_key	*k2)
403 {
404 	return be32_to_cpu(k1->alloc.ar_blockcount) <
405 		be32_to_cpu(k2->alloc.ar_blockcount) ||
406 		(k1->alloc.ar_blockcount == k2->alloc.ar_blockcount &&
407 		 be32_to_cpu(k1->alloc.ar_startblock) <
408 		 be32_to_cpu(k2->alloc.ar_startblock));
409 }
410 
411 STATIC int
412 xfs_cntbt_recs_inorder(
413 	struct xfs_btree_cur		*cur,
414 	const union xfs_btree_rec	*r1,
415 	const union xfs_btree_rec	*r2)
416 {
417 	return be32_to_cpu(r1->alloc.ar_blockcount) <
418 		be32_to_cpu(r2->alloc.ar_blockcount) ||
419 		(r1->alloc.ar_blockcount == r2->alloc.ar_blockcount &&
420 		 be32_to_cpu(r1->alloc.ar_startblock) <
421 		 be32_to_cpu(r2->alloc.ar_startblock));
422 }
423 
424 static const struct xfs_btree_ops xfs_bnobt_ops = {
425 	.rec_len		= sizeof(xfs_alloc_rec_t),
426 	.key_len		= sizeof(xfs_alloc_key_t),
427 
428 	.dup_cursor		= xfs_allocbt_dup_cursor,
429 	.set_root		= xfs_allocbt_set_root,
430 	.alloc_block		= xfs_allocbt_alloc_block,
431 	.free_block		= xfs_allocbt_free_block,
432 	.update_lastrec		= xfs_allocbt_update_lastrec,
433 	.get_minrecs		= xfs_allocbt_get_minrecs,
434 	.get_maxrecs		= xfs_allocbt_get_maxrecs,
435 	.init_key_from_rec	= xfs_allocbt_init_key_from_rec,
436 	.init_high_key_from_rec	= xfs_bnobt_init_high_key_from_rec,
437 	.init_rec_from_cur	= xfs_allocbt_init_rec_from_cur,
438 	.init_ptr_from_cur	= xfs_allocbt_init_ptr_from_cur,
439 	.key_diff		= xfs_bnobt_key_diff,
440 	.buf_ops		= &xfs_bnobt_buf_ops,
441 	.diff_two_keys		= xfs_bnobt_diff_two_keys,
442 	.keys_inorder		= xfs_bnobt_keys_inorder,
443 	.recs_inorder		= xfs_bnobt_recs_inorder,
444 };
445 
446 static const struct xfs_btree_ops xfs_cntbt_ops = {
447 	.rec_len		= sizeof(xfs_alloc_rec_t),
448 	.key_len		= sizeof(xfs_alloc_key_t),
449 
450 	.dup_cursor		= xfs_allocbt_dup_cursor,
451 	.set_root		= xfs_allocbt_set_root,
452 	.alloc_block		= xfs_allocbt_alloc_block,
453 	.free_block		= xfs_allocbt_free_block,
454 	.update_lastrec		= xfs_allocbt_update_lastrec,
455 	.get_minrecs		= xfs_allocbt_get_minrecs,
456 	.get_maxrecs		= xfs_allocbt_get_maxrecs,
457 	.init_key_from_rec	= xfs_allocbt_init_key_from_rec,
458 	.init_high_key_from_rec	= xfs_cntbt_init_high_key_from_rec,
459 	.init_rec_from_cur	= xfs_allocbt_init_rec_from_cur,
460 	.init_ptr_from_cur	= xfs_allocbt_init_ptr_from_cur,
461 	.key_diff		= xfs_cntbt_key_diff,
462 	.buf_ops		= &xfs_cntbt_buf_ops,
463 	.diff_two_keys		= xfs_cntbt_diff_two_keys,
464 	.keys_inorder		= xfs_cntbt_keys_inorder,
465 	.recs_inorder		= xfs_cntbt_recs_inorder,
466 };
467 
468 /* Allocate most of a new allocation btree cursor. */
469 STATIC struct xfs_btree_cur *
470 xfs_allocbt_init_common(
471 	struct xfs_mount	*mp,
472 	struct xfs_trans	*tp,
473 	struct xfs_perag	*pag,
474 	xfs_btnum_t		btnum)
475 {
476 	struct xfs_btree_cur	*cur;
477 
478 	ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT);
479 
480 	cur = xfs_btree_alloc_cursor(mp, tp, btnum);
481 	cur->bc_ag.abt.active = false;
482 
483 	if (btnum == XFS_BTNUM_CNT) {
484 		cur->bc_ops = &xfs_cntbt_ops;
485 		cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtc_2);
486 		cur->bc_flags = XFS_BTREE_LASTREC_UPDATE;
487 	} else {
488 		cur->bc_ops = &xfs_bnobt_ops;
489 		cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2);
490 	}
491 
492 	/* take a reference for the cursor */
493 	atomic_inc(&pag->pag_ref);
494 	cur->bc_ag.pag = pag;
495 
496 	if (xfs_has_crc(mp))
497 		cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
498 
499 	return cur;
500 }
501 
502 /*
503  * Allocate a new allocation btree cursor.
504  */
505 struct xfs_btree_cur *			/* new alloc btree cursor */
506 xfs_allocbt_init_cursor(
507 	struct xfs_mount	*mp,		/* file system mount point */
508 	struct xfs_trans	*tp,		/* transaction pointer */
509 	struct xfs_buf		*agbp,		/* buffer for agf structure */
510 	struct xfs_perag	*pag,
511 	xfs_btnum_t		btnum)		/* btree identifier */
512 {
513 	struct xfs_agf		*agf = agbp->b_addr;
514 	struct xfs_btree_cur	*cur;
515 
516 	cur = xfs_allocbt_init_common(mp, tp, pag, btnum);
517 	if (btnum == XFS_BTNUM_CNT)
518 		cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]);
519 	else
520 		cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]);
521 
522 	cur->bc_ag.agbp = agbp;
523 
524 	return cur;
525 }
526 
527 /* Create a free space btree cursor with a fake root for staging. */
528 struct xfs_btree_cur *
529 xfs_allocbt_stage_cursor(
530 	struct xfs_mount	*mp,
531 	struct xbtree_afakeroot	*afake,
532 	struct xfs_perag	*pag,
533 	xfs_btnum_t		btnum)
534 {
535 	struct xfs_btree_cur	*cur;
536 
537 	cur = xfs_allocbt_init_common(mp, NULL, pag, btnum);
538 	xfs_btree_stage_afakeroot(cur, afake);
539 	return cur;
540 }
541 
542 /*
543  * Install a new free space btree root.  Caller is responsible for invalidating
544  * and freeing the old btree blocks.
545  */
546 void
547 xfs_allocbt_commit_staged_btree(
548 	struct xfs_btree_cur	*cur,
549 	struct xfs_trans	*tp,
550 	struct xfs_buf		*agbp)
551 {
552 	struct xfs_agf		*agf = agbp->b_addr;
553 	struct xbtree_afakeroot	*afake = cur->bc_ag.afake;
554 
555 	ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
556 
557 	agf->agf_roots[cur->bc_btnum] = cpu_to_be32(afake->af_root);
558 	agf->agf_levels[cur->bc_btnum] = cpu_to_be32(afake->af_levels);
559 	xfs_alloc_log_agf(tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
560 
561 	if (cur->bc_btnum == XFS_BTNUM_BNO) {
562 		xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_bnobt_ops);
563 	} else {
564 		cur->bc_flags |= XFS_BTREE_LASTREC_UPDATE;
565 		xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_cntbt_ops);
566 	}
567 }
568 
569 /*
570  * Calculate number of records in an alloc btree block.
571  */
572 int
573 xfs_allocbt_maxrecs(
574 	struct xfs_mount	*mp,
575 	int			blocklen,
576 	int			leaf)
577 {
578 	blocklen -= XFS_ALLOC_BLOCK_LEN(mp);
579 
580 	if (leaf)
581 		return blocklen / sizeof(xfs_alloc_rec_t);
582 	return blocklen / (sizeof(xfs_alloc_key_t) + sizeof(xfs_alloc_ptr_t));
583 }
584 
585 /* Calculate the freespace btree size for some records. */
586 xfs_extlen_t
587 xfs_allocbt_calc_size(
588 	struct xfs_mount	*mp,
589 	unsigned long long	len)
590 {
591 	return xfs_btree_calc_size(mp->m_alloc_mnr, len);
592 }
593