xref: /openbmc/linux/fs/xfs/xfs_rtalloc.c (revision adb19164)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4  * All Rights Reserved.
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_bit.h"
13 #include "xfs_mount.h"
14 #include "xfs_inode.h"
15 #include "xfs_bmap.h"
16 #include "xfs_bmap_btree.h"
17 #include "xfs_trans.h"
18 #include "xfs_trans_space.h"
19 #include "xfs_icache.h"
20 #include "xfs_rtalloc.h"
21 #include "xfs_sb.h"
22 
23 /*
24  * Read and return the summary information for a given extent size,
25  * bitmap block combination.
26  * Keeps track of a current summary block, so we don't keep reading
27  * it from the buffer cache.
28  */
29 static int
30 xfs_rtget_summary(
31 	xfs_mount_t	*mp,		/* file system mount structure */
32 	xfs_trans_t	*tp,		/* transaction pointer */
33 	int		log,		/* log2 of extent size */
34 	xfs_rtblock_t	bbno,		/* bitmap block number */
35 	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
36 	xfs_fsblock_t	*rsb,		/* in/out: summary block number */
37 	xfs_suminfo_t	*sum)		/* out: summary info for this block */
38 {
39 	return xfs_rtmodify_summary_int(mp, tp, log, bbno, 0, rbpp, rsb, sum);
40 }
41 
42 /*
43  * Return whether there are any free extents in the size range given
44  * by low and high, for the bitmap block bbno.
45  */
46 STATIC int				/* error */
47 xfs_rtany_summary(
48 	xfs_mount_t	*mp,		/* file system mount structure */
49 	xfs_trans_t	*tp,		/* transaction pointer */
50 	int		low,		/* low log2 extent size */
51 	int		high,		/* high log2 extent size */
52 	xfs_rtblock_t	bbno,		/* bitmap block number */
53 	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
54 	xfs_fsblock_t	*rsb,		/* in/out: summary block number */
55 	int		*stat)		/* out: any good extents here? */
56 {
57 	int		error;		/* error value */
58 	int		log;		/* loop counter, log2 of ext. size */
59 	xfs_suminfo_t	sum;		/* summary data */
60 
61 	/* There are no extents at levels < m_rsum_cache[bbno]. */
62 	if (mp->m_rsum_cache && low < mp->m_rsum_cache[bbno])
63 		low = mp->m_rsum_cache[bbno];
64 
65 	/*
66 	 * Loop over logs of extent sizes.
67 	 */
68 	for (log = low; log <= high; log++) {
69 		/*
70 		 * Get one summary datum.
71 		 */
72 		error = xfs_rtget_summary(mp, tp, log, bbno, rbpp, rsb, &sum);
73 		if (error) {
74 			return error;
75 		}
76 		/*
77 		 * If there are any, return success.
78 		 */
79 		if (sum) {
80 			*stat = 1;
81 			goto out;
82 		}
83 	}
84 	/*
85 	 * Found nothing, return failure.
86 	 */
87 	*stat = 0;
88 out:
89 	/* There were no extents at levels < log. */
90 	if (mp->m_rsum_cache && log > mp->m_rsum_cache[bbno])
91 		mp->m_rsum_cache[bbno] = log;
92 	return 0;
93 }
94 
95 
96 /*
97  * Copy and transform the summary file, given the old and new
98  * parameters in the mount structures.
99  */
100 STATIC int				/* error */
101 xfs_rtcopy_summary(
102 	xfs_mount_t	*omp,		/* old file system mount point */
103 	xfs_mount_t	*nmp,		/* new file system mount point */
104 	xfs_trans_t	*tp)		/* transaction pointer */
105 {
106 	xfs_rtblock_t	bbno;		/* bitmap block number */
107 	struct xfs_buf	*bp;		/* summary buffer */
108 	int		error;		/* error return value */
109 	int		log;		/* summary level number (log length) */
110 	xfs_suminfo_t	sum;		/* summary data */
111 	xfs_fsblock_t	sumbno;		/* summary block number */
112 
113 	bp = NULL;
114 	for (log = omp->m_rsumlevels - 1; log >= 0; log--) {
115 		for (bbno = omp->m_sb.sb_rbmblocks - 1;
116 		     (xfs_srtblock_t)bbno >= 0;
117 		     bbno--) {
118 			error = xfs_rtget_summary(omp, tp, log, bbno, &bp,
119 				&sumbno, &sum);
120 			if (error)
121 				return error;
122 			if (sum == 0)
123 				continue;
124 			error = xfs_rtmodify_summary(omp, tp, log, bbno, -sum,
125 				&bp, &sumbno);
126 			if (error)
127 				return error;
128 			error = xfs_rtmodify_summary(nmp, tp, log, bbno, sum,
129 				&bp, &sumbno);
130 			if (error)
131 				return error;
132 			ASSERT(sum > 0);
133 		}
134 	}
135 	return 0;
136 }
137 /*
138  * Mark an extent specified by start and len allocated.
139  * Updates all the summary information as well as the bitmap.
140  */
141 STATIC int				/* error */
142 xfs_rtallocate_range(
143 	xfs_mount_t	*mp,		/* file system mount point */
144 	xfs_trans_t	*tp,		/* transaction pointer */
145 	xfs_rtblock_t	start,		/* start block to allocate */
146 	xfs_extlen_t	len,		/* length to allocate */
147 	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
148 	xfs_fsblock_t	*rsb)		/* in/out: summary block number */
149 {
150 	xfs_rtblock_t	end;		/* end of the allocated extent */
151 	int		error;		/* error value */
152 	xfs_rtblock_t	postblock = 0;	/* first block allocated > end */
153 	xfs_rtblock_t	preblock = 0;	/* first block allocated < start */
154 
155 	end = start + len - 1;
156 	/*
157 	 * Assume we're allocating out of the middle of a free extent.
158 	 * We need to find the beginning and end of the extent so we can
159 	 * properly update the summary.
160 	 */
161 	error = xfs_rtfind_back(mp, tp, start, 0, &preblock);
162 	if (error) {
163 		return error;
164 	}
165 	/*
166 	 * Find the next allocated block (end of free extent).
167 	 */
168 	error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1,
169 		&postblock);
170 	if (error) {
171 		return error;
172 	}
173 	/*
174 	 * Decrement the summary information corresponding to the entire
175 	 * (old) free extent.
176 	 */
177 	error = xfs_rtmodify_summary(mp, tp,
178 		XFS_RTBLOCKLOG(postblock + 1 - preblock),
179 		XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb);
180 	if (error) {
181 		return error;
182 	}
183 	/*
184 	 * If there are blocks not being allocated at the front of the
185 	 * old extent, add summary data for them to be free.
186 	 */
187 	if (preblock < start) {
188 		error = xfs_rtmodify_summary(mp, tp,
189 			XFS_RTBLOCKLOG(start - preblock),
190 			XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb);
191 		if (error) {
192 			return error;
193 		}
194 	}
195 	/*
196 	 * If there are blocks not being allocated at the end of the
197 	 * old extent, add summary data for them to be free.
198 	 */
199 	if (postblock > end) {
200 		error = xfs_rtmodify_summary(mp, tp,
201 			XFS_RTBLOCKLOG(postblock - end),
202 			XFS_BITTOBLOCK(mp, end + 1), 1, rbpp, rsb);
203 		if (error) {
204 			return error;
205 		}
206 	}
207 	/*
208 	 * Modify the bitmap to mark this extent allocated.
209 	 */
210 	error = xfs_rtmodify_range(mp, tp, start, len, 0);
211 	return error;
212 }
213 
214 /*
215  * Make sure we don't run off the end of the rt volume.  Be careful that
216  * adjusting maxlen downwards doesn't cause us to fail the alignment checks.
217  */
218 static inline xfs_extlen_t
219 xfs_rtallocate_clamp_len(
220 	struct xfs_mount	*mp,
221 	xfs_rtblock_t		startrtx,
222 	xfs_extlen_t		rtxlen,
223 	xfs_extlen_t		prod)
224 {
225 	xfs_extlen_t		ret;
226 
227 	ret = min(mp->m_sb.sb_rextents, startrtx + rtxlen) - startrtx;
228 	return rounddown(ret, prod);
229 }
230 
231 /*
232  * Attempt to allocate an extent minlen<=len<=maxlen starting from
233  * bitmap block bbno.  If we don't get maxlen then use prod to trim
234  * the length, if given.  Returns error; returns starting block in *rtblock.
235  * The lengths are all in rtextents.
236  */
237 STATIC int				/* error */
238 xfs_rtallocate_extent_block(
239 	xfs_mount_t	*mp,		/* file system mount point */
240 	xfs_trans_t	*tp,		/* transaction pointer */
241 	xfs_rtblock_t	bbno,		/* bitmap block number */
242 	xfs_extlen_t	minlen,		/* minimum length to allocate */
243 	xfs_extlen_t	maxlen,		/* maximum length to allocate */
244 	xfs_extlen_t	*len,		/* out: actual length allocated */
245 	xfs_rtblock_t	*nextp,		/* out: next block to try */
246 	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
247 	xfs_fsblock_t	*rsb,		/* in/out: summary block number */
248 	xfs_extlen_t	prod,		/* extent product factor */
249 	xfs_rtblock_t	*rtblock)	/* out: start block allocated */
250 {
251 	xfs_rtblock_t	besti;		/* best rtblock found so far */
252 	xfs_rtblock_t	bestlen;	/* best length found so far */
253 	xfs_rtblock_t	end;		/* last rtblock in chunk */
254 	int		error;		/* error value */
255 	xfs_rtblock_t	i;		/* current rtblock trying */
256 	xfs_rtblock_t	next;		/* next rtblock to try */
257 	int		stat;		/* status from internal calls */
258 
259 	/*
260 	 * Loop over all the extents starting in this bitmap block,
261 	 * looking for one that's long enough.
262 	 */
263 	for (i = XFS_BLOCKTOBIT(mp, bbno), besti = -1, bestlen = 0,
264 		end = XFS_BLOCKTOBIT(mp, bbno + 1) - 1;
265 	     i <= end;
266 	     i++) {
267 		/* Make sure we don't scan off the end of the rt volume. */
268 		maxlen = xfs_rtallocate_clamp_len(mp, i, maxlen, prod);
269 
270 		/*
271 		 * See if there's a free extent of maxlen starting at i.
272 		 * If it's not so then next will contain the first non-free.
273 		 */
274 		error = xfs_rtcheck_range(mp, tp, i, maxlen, 1, &next, &stat);
275 		if (error) {
276 			return error;
277 		}
278 		if (stat) {
279 			/*
280 			 * i for maxlen is all free, allocate and return that.
281 			 */
282 			error = xfs_rtallocate_range(mp, tp, i, maxlen, rbpp,
283 				rsb);
284 			if (error) {
285 				return error;
286 			}
287 			*len = maxlen;
288 			*rtblock = i;
289 			return 0;
290 		}
291 		/*
292 		 * In the case where we have a variable-sized allocation
293 		 * request, figure out how big this free piece is,
294 		 * and if it's big enough for the minimum, and the best
295 		 * so far, remember it.
296 		 */
297 		if (minlen < maxlen) {
298 			xfs_rtblock_t	thislen;	/* this extent size */
299 
300 			thislen = next - i;
301 			if (thislen >= minlen && thislen > bestlen) {
302 				besti = i;
303 				bestlen = thislen;
304 			}
305 		}
306 		/*
307 		 * If not done yet, find the start of the next free space.
308 		 */
309 		if (next < end) {
310 			error = xfs_rtfind_forw(mp, tp, next, end, &i);
311 			if (error) {
312 				return error;
313 			}
314 		} else
315 			break;
316 	}
317 	/*
318 	 * Searched the whole thing & didn't find a maxlen free extent.
319 	 */
320 	if (minlen < maxlen && besti != -1) {
321 		xfs_extlen_t	p;	/* amount to trim length by */
322 
323 		/*
324 		 * If size should be a multiple of prod, make that so.
325 		 */
326 		if (prod > 1) {
327 			div_u64_rem(bestlen, prod, &p);
328 			if (p)
329 				bestlen -= p;
330 		}
331 
332 		/*
333 		 * Allocate besti for bestlen & return that.
334 		 */
335 		error = xfs_rtallocate_range(mp, tp, besti, bestlen, rbpp, rsb);
336 		if (error) {
337 			return error;
338 		}
339 		*len = bestlen;
340 		*rtblock = besti;
341 		return 0;
342 	}
343 	/*
344 	 * Allocation failed.  Set *nextp to the next block to try.
345 	 */
346 	*nextp = next;
347 	*rtblock = NULLRTBLOCK;
348 	return 0;
349 }
350 
351 /*
352  * Allocate an extent of length minlen<=len<=maxlen, starting at block
353  * bno.  If we don't get maxlen then use prod to trim the length, if given.
354  * Returns error; returns starting block in *rtblock.
355  * The lengths are all in rtextents.
356  */
357 STATIC int				/* error */
358 xfs_rtallocate_extent_exact(
359 	xfs_mount_t	*mp,		/* file system mount point */
360 	xfs_trans_t	*tp,		/* transaction pointer */
361 	xfs_rtblock_t	bno,		/* starting block number to allocate */
362 	xfs_extlen_t	minlen,		/* minimum length to allocate */
363 	xfs_extlen_t	maxlen,		/* maximum length to allocate */
364 	xfs_extlen_t	*len,		/* out: actual length allocated */
365 	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
366 	xfs_fsblock_t	*rsb,		/* in/out: summary block number */
367 	xfs_extlen_t	prod,		/* extent product factor */
368 	xfs_rtblock_t	*rtblock)	/* out: start block allocated */
369 {
370 	int		error;		/* error value */
371 	xfs_extlen_t	i;		/* extent length trimmed due to prod */
372 	int		isfree;		/* extent is free */
373 	xfs_rtblock_t	next;		/* next block to try (dummy) */
374 
375 	ASSERT(minlen % prod == 0);
376 	ASSERT(maxlen % prod == 0);
377 	/*
378 	 * Check if the range in question (for maxlen) is free.
379 	 */
380 	error = xfs_rtcheck_range(mp, tp, bno, maxlen, 1, &next, &isfree);
381 	if (error) {
382 		return error;
383 	}
384 	if (isfree) {
385 		/*
386 		 * If it is, allocate it and return success.
387 		 */
388 		error = xfs_rtallocate_range(mp, tp, bno, maxlen, rbpp, rsb);
389 		if (error) {
390 			return error;
391 		}
392 		*len = maxlen;
393 		*rtblock = bno;
394 		return 0;
395 	}
396 	/*
397 	 * If not, allocate what there is, if it's at least minlen.
398 	 */
399 	maxlen = next - bno;
400 	if (maxlen < minlen) {
401 		/*
402 		 * Failed, return failure status.
403 		 */
404 		*rtblock = NULLRTBLOCK;
405 		return 0;
406 	}
407 	/*
408 	 * Trim off tail of extent, if prod is specified.
409 	 */
410 	if (prod > 1 && (i = maxlen % prod)) {
411 		maxlen -= i;
412 		if (maxlen < minlen) {
413 			/*
414 			 * Now we can't do it, return failure status.
415 			 */
416 			*rtblock = NULLRTBLOCK;
417 			return 0;
418 		}
419 	}
420 	/*
421 	 * Allocate what we can and return it.
422 	 */
423 	error = xfs_rtallocate_range(mp, tp, bno, maxlen, rbpp, rsb);
424 	if (error) {
425 		return error;
426 	}
427 	*len = maxlen;
428 	*rtblock = bno;
429 	return 0;
430 }
431 
432 /*
433  * Allocate an extent of length minlen<=len<=maxlen, starting as near
434  * to bno as possible.  If we don't get maxlen then use prod to trim
435  * the length, if given.  The lengths are all in rtextents.
436  */
437 STATIC int				/* error */
438 xfs_rtallocate_extent_near(
439 	xfs_mount_t	*mp,		/* file system mount point */
440 	xfs_trans_t	*tp,		/* transaction pointer */
441 	xfs_rtblock_t	bno,		/* starting block number to allocate */
442 	xfs_extlen_t	minlen,		/* minimum length to allocate */
443 	xfs_extlen_t	maxlen,		/* maximum length to allocate */
444 	xfs_extlen_t	*len,		/* out: actual length allocated */
445 	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
446 	xfs_fsblock_t	*rsb,		/* in/out: summary block number */
447 	xfs_extlen_t	prod,		/* extent product factor */
448 	xfs_rtblock_t	*rtblock)	/* out: start block allocated */
449 {
450 	int		any;		/* any useful extents from summary */
451 	xfs_rtblock_t	bbno;		/* bitmap block number */
452 	int		error;		/* error value */
453 	int		i;		/* bitmap block offset (loop control) */
454 	int		j;		/* secondary loop control */
455 	int		log2len;	/* log2 of minlen */
456 	xfs_rtblock_t	n;		/* next block to try */
457 	xfs_rtblock_t	r;		/* result block */
458 
459 	ASSERT(minlen % prod == 0);
460 	ASSERT(maxlen % prod == 0);
461 
462 	/*
463 	 * If the block number given is off the end, silently set it to
464 	 * the last block.
465 	 */
466 	if (bno >= mp->m_sb.sb_rextents)
467 		bno = mp->m_sb.sb_rextents - 1;
468 
469 	/* Make sure we don't run off the end of the rt volume. */
470 	maxlen = xfs_rtallocate_clamp_len(mp, bno, maxlen, prod);
471 	if (maxlen < minlen) {
472 		*rtblock = NULLRTBLOCK;
473 		return 0;
474 	}
475 
476 	/*
477 	 * Try the exact allocation first.
478 	 */
479 	error = xfs_rtallocate_extent_exact(mp, tp, bno, minlen, maxlen, len,
480 		rbpp, rsb, prod, &r);
481 	if (error) {
482 		return error;
483 	}
484 	/*
485 	 * If the exact allocation worked, return that.
486 	 */
487 	if (r != NULLRTBLOCK) {
488 		*rtblock = r;
489 		return 0;
490 	}
491 	bbno = XFS_BITTOBLOCK(mp, bno);
492 	i = 0;
493 	ASSERT(minlen != 0);
494 	log2len = xfs_highbit32(minlen);
495 	/*
496 	 * Loop over all bitmap blocks (bbno + i is current block).
497 	 */
498 	for (;;) {
499 		/*
500 		 * Get summary information of extents of all useful levels
501 		 * starting in this bitmap block.
502 		 */
503 		error = xfs_rtany_summary(mp, tp, log2len, mp->m_rsumlevels - 1,
504 			bbno + i, rbpp, rsb, &any);
505 		if (error) {
506 			return error;
507 		}
508 		/*
509 		 * If there are any useful extents starting here, try
510 		 * allocating one.
511 		 */
512 		if (any) {
513 			/*
514 			 * On the positive side of the starting location.
515 			 */
516 			if (i >= 0) {
517 				/*
518 				 * Try to allocate an extent starting in
519 				 * this block.
520 				 */
521 				error = xfs_rtallocate_extent_block(mp, tp,
522 					bbno + i, minlen, maxlen, len, &n, rbpp,
523 					rsb, prod, &r);
524 				if (error) {
525 					return error;
526 				}
527 				/*
528 				 * If it worked, return it.
529 				 */
530 				if (r != NULLRTBLOCK) {
531 					*rtblock = r;
532 					return 0;
533 				}
534 			}
535 			/*
536 			 * On the negative side of the starting location.
537 			 */
538 			else {		/* i < 0 */
539 				/*
540 				 * Loop backwards through the bitmap blocks from
541 				 * the starting point-1 up to where we are now.
542 				 * There should be an extent which ends in this
543 				 * bitmap block and is long enough.
544 				 */
545 				for (j = -1; j > i; j--) {
546 					/*
547 					 * Grab the summary information for
548 					 * this bitmap block.
549 					 */
550 					error = xfs_rtany_summary(mp, tp,
551 						log2len, mp->m_rsumlevels - 1,
552 						bbno + j, rbpp, rsb, &any);
553 					if (error) {
554 						return error;
555 					}
556 					/*
557 					 * If there's no extent given in the
558 					 * summary that means the extent we
559 					 * found must carry over from an
560 					 * earlier block.  If there is an
561 					 * extent given, we've already tried
562 					 * that allocation, don't do it again.
563 					 */
564 					if (any)
565 						continue;
566 					error = xfs_rtallocate_extent_block(mp,
567 						tp, bbno + j, minlen, maxlen,
568 						len, &n, rbpp, rsb, prod, &r);
569 					if (error) {
570 						return error;
571 					}
572 					/*
573 					 * If it works, return the extent.
574 					 */
575 					if (r != NULLRTBLOCK) {
576 						*rtblock = r;
577 						return 0;
578 					}
579 				}
580 				/*
581 				 * There weren't intervening bitmap blocks
582 				 * with a long enough extent, or the
583 				 * allocation didn't work for some reason
584 				 * (i.e. it's a little * too short).
585 				 * Try to allocate from the summary block
586 				 * that we found.
587 				 */
588 				error = xfs_rtallocate_extent_block(mp, tp,
589 					bbno + i, minlen, maxlen, len, &n, rbpp,
590 					rsb, prod, &r);
591 				if (error) {
592 					return error;
593 				}
594 				/*
595 				 * If it works, return the extent.
596 				 */
597 				if (r != NULLRTBLOCK) {
598 					*rtblock = r;
599 					return 0;
600 				}
601 			}
602 		}
603 		/*
604 		 * Loop control.  If we were on the positive side, and there's
605 		 * still more blocks on the negative side, go there.
606 		 */
607 		if (i > 0 && (int)bbno - i >= 0)
608 			i = -i;
609 		/*
610 		 * If positive, and no more negative, but there are more
611 		 * positive, go there.
612 		 */
613 		else if (i > 0 && (int)bbno + i < mp->m_sb.sb_rbmblocks - 1)
614 			i++;
615 		/*
616 		 * If negative or 0 (just started), and there are positive
617 		 * blocks to go, go there.  The 0 case moves to block 1.
618 		 */
619 		else if (i <= 0 && (int)bbno - i < mp->m_sb.sb_rbmblocks - 1)
620 			i = 1 - i;
621 		/*
622 		 * If negative or 0 and there are more negative blocks,
623 		 * go there.
624 		 */
625 		else if (i <= 0 && (int)bbno + i > 0)
626 			i--;
627 		/*
628 		 * Must be done.  Return failure.
629 		 */
630 		else
631 			break;
632 	}
633 	*rtblock = NULLRTBLOCK;
634 	return 0;
635 }
636 
637 /*
638  * Allocate an extent of length minlen<=len<=maxlen, with no position
639  * specified.  If we don't get maxlen then use prod to trim
640  * the length, if given.  The lengths are all in rtextents.
641  */
642 STATIC int				/* error */
643 xfs_rtallocate_extent_size(
644 	xfs_mount_t	*mp,		/* file system mount point */
645 	xfs_trans_t	*tp,		/* transaction pointer */
646 	xfs_extlen_t	minlen,		/* minimum length to allocate */
647 	xfs_extlen_t	maxlen,		/* maximum length to allocate */
648 	xfs_extlen_t	*len,		/* out: actual length allocated */
649 	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
650 	xfs_fsblock_t	*rsb,		/* in/out: summary block number */
651 	xfs_extlen_t	prod,		/* extent product factor */
652 	xfs_rtblock_t	*rtblock)	/* out: start block allocated */
653 {
654 	int		error;		/* error value */
655 	int		i;		/* bitmap block number */
656 	int		l;		/* level number (loop control) */
657 	xfs_rtblock_t	n;		/* next block to be tried */
658 	xfs_rtblock_t	r;		/* result block number */
659 	xfs_suminfo_t	sum;		/* summary information for extents */
660 
661 	ASSERT(minlen % prod == 0);
662 	ASSERT(maxlen % prod == 0);
663 	ASSERT(maxlen != 0);
664 
665 	/*
666 	 * Loop over all the levels starting with maxlen.
667 	 * At each level, look at all the bitmap blocks, to see if there
668 	 * are extents starting there that are long enough (>= maxlen).
669 	 * Note, only on the initial level can the allocation fail if
670 	 * the summary says there's an extent.
671 	 */
672 	for (l = xfs_highbit32(maxlen); l < mp->m_rsumlevels; l++) {
673 		/*
674 		 * Loop over all the bitmap blocks.
675 		 */
676 		for (i = 0; i < mp->m_sb.sb_rbmblocks; i++) {
677 			/*
678 			 * Get the summary for this level/block.
679 			 */
680 			error = xfs_rtget_summary(mp, tp, l, i, rbpp, rsb,
681 				&sum);
682 			if (error) {
683 				return error;
684 			}
685 			/*
686 			 * Nothing there, on to the next block.
687 			 */
688 			if (!sum)
689 				continue;
690 			/*
691 			 * Try allocating the extent.
692 			 */
693 			error = xfs_rtallocate_extent_block(mp, tp, i, maxlen,
694 				maxlen, len, &n, rbpp, rsb, prod, &r);
695 			if (error) {
696 				return error;
697 			}
698 			/*
699 			 * If it worked, return that.
700 			 */
701 			if (r != NULLRTBLOCK) {
702 				*rtblock = r;
703 				return 0;
704 			}
705 			/*
706 			 * If the "next block to try" returned from the
707 			 * allocator is beyond the next bitmap block,
708 			 * skip to that bitmap block.
709 			 */
710 			if (XFS_BITTOBLOCK(mp, n) > i + 1)
711 				i = XFS_BITTOBLOCK(mp, n) - 1;
712 		}
713 	}
714 	/*
715 	 * Didn't find any maxlen blocks.  Try smaller ones, unless
716 	 * we're asking for a fixed size extent.
717 	 */
718 	if (minlen > --maxlen) {
719 		*rtblock = NULLRTBLOCK;
720 		return 0;
721 	}
722 	ASSERT(minlen != 0);
723 	ASSERT(maxlen != 0);
724 
725 	/*
726 	 * Loop over sizes, from maxlen down to minlen.
727 	 * This time, when we do the allocations, allow smaller ones
728 	 * to succeed.
729 	 */
730 	for (l = xfs_highbit32(maxlen); l >= xfs_highbit32(minlen); l--) {
731 		/*
732 		 * Loop over all the bitmap blocks, try an allocation
733 		 * starting in that block.
734 		 */
735 		for (i = 0; i < mp->m_sb.sb_rbmblocks; i++) {
736 			/*
737 			 * Get the summary information for this level/block.
738 			 */
739 			error =	xfs_rtget_summary(mp, tp, l, i, rbpp, rsb,
740 						  &sum);
741 			if (error) {
742 				return error;
743 			}
744 			/*
745 			 * If nothing there, go on to next.
746 			 */
747 			if (!sum)
748 				continue;
749 			/*
750 			 * Try the allocation.  Make sure the specified
751 			 * minlen/maxlen are in the possible range for
752 			 * this summary level.
753 			 */
754 			error = xfs_rtallocate_extent_block(mp, tp, i,
755 					XFS_RTMAX(minlen, 1 << l),
756 					XFS_RTMIN(maxlen, (1 << (l + 1)) - 1),
757 					len, &n, rbpp, rsb, prod, &r);
758 			if (error) {
759 				return error;
760 			}
761 			/*
762 			 * If it worked, return that extent.
763 			 */
764 			if (r != NULLRTBLOCK) {
765 				*rtblock = r;
766 				return 0;
767 			}
768 			/*
769 			 * If the "next block to try" returned from the
770 			 * allocator is beyond the next bitmap block,
771 			 * skip to that bitmap block.
772 			 */
773 			if (XFS_BITTOBLOCK(mp, n) > i + 1)
774 				i = XFS_BITTOBLOCK(mp, n) - 1;
775 		}
776 	}
777 	/*
778 	 * Got nothing, return failure.
779 	 */
780 	*rtblock = NULLRTBLOCK;
781 	return 0;
782 }
783 
784 /*
785  * Allocate space to the bitmap or summary file, and zero it, for growfs.
786  */
787 STATIC int
788 xfs_growfs_rt_alloc(
789 	struct xfs_mount	*mp,		/* file system mount point */
790 	xfs_extlen_t		oblocks,	/* old count of blocks */
791 	xfs_extlen_t		nblocks,	/* new count of blocks */
792 	struct xfs_inode	*ip)		/* inode (bitmap/summary) */
793 {
794 	xfs_fileoff_t		bno;		/* block number in file */
795 	struct xfs_buf		*bp;	/* temporary buffer for zeroing */
796 	xfs_daddr_t		d;		/* disk block address */
797 	int			error;		/* error return value */
798 	xfs_fsblock_t		fsbno;		/* filesystem block for bno */
799 	struct xfs_bmbt_irec	map;		/* block map output */
800 	int			nmap;		/* number of block maps */
801 	int			resblks;	/* space reservation */
802 	enum xfs_blft		buf_type;
803 	struct xfs_trans	*tp;
804 
805 	if (ip == mp->m_rsumip)
806 		buf_type = XFS_BLFT_RTSUMMARY_BUF;
807 	else
808 		buf_type = XFS_BLFT_RTBITMAP_BUF;
809 
810 	/*
811 	 * Allocate space to the file, as necessary.
812 	 */
813 	while (oblocks < nblocks) {
814 		resblks = XFS_GROWFSRT_SPACE_RES(mp, nblocks - oblocks);
815 		/*
816 		 * Reserve space & log for one extent added to the file.
817 		 */
818 		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtalloc, resblks,
819 				0, 0, &tp);
820 		if (error)
821 			return error;
822 		/*
823 		 * Lock the inode.
824 		 */
825 		xfs_ilock(ip, XFS_ILOCK_EXCL);
826 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
827 
828 		error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK,
829 				XFS_IEXT_ADD_NOSPLIT_CNT);
830 		if (error == -EFBIG)
831 			error = xfs_iext_count_upgrade(tp, ip,
832 					XFS_IEXT_ADD_NOSPLIT_CNT);
833 		if (error)
834 			goto out_trans_cancel;
835 
836 		/*
837 		 * Allocate blocks to the bitmap file.
838 		 */
839 		nmap = 1;
840 		error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks,
841 					XFS_BMAPI_METADATA, 0, &map, &nmap);
842 		if (!error && nmap < 1)
843 			error = -ENOSPC;
844 		if (error)
845 			goto out_trans_cancel;
846 		/*
847 		 * Free any blocks freed up in the transaction, then commit.
848 		 */
849 		error = xfs_trans_commit(tp);
850 		if (error)
851 			return error;
852 		/*
853 		 * Now we need to clear the allocated blocks.
854 		 * Do this one block per transaction, to keep it simple.
855 		 */
856 		for (bno = map.br_startoff, fsbno = map.br_startblock;
857 		     bno < map.br_startoff + map.br_blockcount;
858 		     bno++, fsbno++) {
859 			/*
860 			 * Reserve log for one block zeroing.
861 			 */
862 			error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtzero,
863 					0, 0, 0, &tp);
864 			if (error)
865 				return error;
866 			/*
867 			 * Lock the bitmap inode.
868 			 */
869 			xfs_ilock(ip, XFS_ILOCK_EXCL);
870 			xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
871 			/*
872 			 * Get a buffer for the block.
873 			 */
874 			d = XFS_FSB_TO_DADDR(mp, fsbno);
875 			error = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
876 					mp->m_bsize, 0, &bp);
877 			if (error)
878 				goto out_trans_cancel;
879 
880 			xfs_trans_buf_set_type(tp, bp, buf_type);
881 			bp->b_ops = &xfs_rtbuf_ops;
882 			memset(bp->b_addr, 0, mp->m_sb.sb_blocksize);
883 			xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
884 			/*
885 			 * Commit the transaction.
886 			 */
887 			error = xfs_trans_commit(tp);
888 			if (error)
889 				return error;
890 		}
891 		/*
892 		 * Go on to the next extent, if any.
893 		 */
894 		oblocks = map.br_startoff + map.br_blockcount;
895 	}
896 
897 	return 0;
898 
899 out_trans_cancel:
900 	xfs_trans_cancel(tp);
901 	return error;
902 }
903 
904 static void
905 xfs_alloc_rsum_cache(
906 	xfs_mount_t	*mp,		/* file system mount structure */
907 	xfs_extlen_t	rbmblocks)	/* number of rt bitmap blocks */
908 {
909 	/*
910 	 * The rsum cache is initialized to all zeroes, which is trivially a
911 	 * lower bound on the minimum level with any free extents. We can
912 	 * continue without the cache if it couldn't be allocated.
913 	 */
914 	mp->m_rsum_cache = kvzalloc(rbmblocks, GFP_KERNEL);
915 	if (!mp->m_rsum_cache)
916 		xfs_warn(mp, "could not allocate realtime summary cache");
917 }
918 
919 /*
920  * Visible (exported) functions.
921  */
922 
923 /*
924  * Grow the realtime area of the filesystem.
925  */
926 int
927 xfs_growfs_rt(
928 	xfs_mount_t	*mp,		/* mount point for filesystem */
929 	xfs_growfs_rt_t	*in)		/* growfs rt input struct */
930 {
931 	xfs_rtblock_t	bmbno;		/* bitmap block number */
932 	struct xfs_buf	*bp;		/* temporary buffer */
933 	int		error;		/* error return value */
934 	xfs_mount_t	*nmp;		/* new (fake) mount structure */
935 	xfs_rfsblock_t	nrblocks;	/* new number of realtime blocks */
936 	xfs_extlen_t	nrbmblocks;	/* new number of rt bitmap blocks */
937 	xfs_rtblock_t	nrextents;	/* new number of realtime extents */
938 	uint8_t		nrextslog;	/* new log2 of sb_rextents */
939 	xfs_extlen_t	nrsumblocks;	/* new number of summary blocks */
940 	uint		nrsumlevels;	/* new rt summary levels */
941 	uint		nrsumsize;	/* new size of rt summary, bytes */
942 	xfs_sb_t	*nsbp;		/* new superblock */
943 	xfs_extlen_t	rbmblocks;	/* current number of rt bitmap blocks */
944 	xfs_extlen_t	rsumblocks;	/* current number of rt summary blks */
945 	xfs_sb_t	*sbp;		/* old superblock */
946 	xfs_fsblock_t	sumbno;		/* summary block number */
947 	uint8_t		*rsum_cache;	/* old summary cache */
948 
949 	sbp = &mp->m_sb;
950 
951 	if (!capable(CAP_SYS_ADMIN))
952 		return -EPERM;
953 
954 	/* Needs to have been mounted with an rt device. */
955 	if (!XFS_IS_REALTIME_MOUNT(mp))
956 		return -EINVAL;
957 	/*
958 	 * Mount should fail if the rt bitmap/summary files don't load, but
959 	 * we'll check anyway.
960 	 */
961 	if (!mp->m_rbmip || !mp->m_rsumip)
962 		return -EINVAL;
963 
964 	/* Shrink not supported. */
965 	if (in->newblocks <= sbp->sb_rblocks)
966 		return -EINVAL;
967 
968 	/* Can only change rt extent size when adding rt volume. */
969 	if (sbp->sb_rblocks > 0 && in->extsize != sbp->sb_rextsize)
970 		return -EINVAL;
971 
972 	/* Range check the extent size. */
973 	if (XFS_FSB_TO_B(mp, in->extsize) > XFS_MAX_RTEXTSIZE ||
974 	    XFS_FSB_TO_B(mp, in->extsize) < XFS_MIN_RTEXTSIZE)
975 		return -EINVAL;
976 
977 	/* Unsupported realtime features. */
978 	if (xfs_has_rmapbt(mp) || xfs_has_reflink(mp) || xfs_has_quota(mp))
979 		return -EOPNOTSUPP;
980 
981 	nrblocks = in->newblocks;
982 	error = xfs_sb_validate_fsb_count(sbp, nrblocks);
983 	if (error)
984 		return error;
985 	/*
986 	 * Read in the last block of the device, make sure it exists.
987 	 */
988 	error = xfs_buf_read_uncached(mp->m_rtdev_targp,
989 				XFS_FSB_TO_BB(mp, nrblocks - 1),
990 				XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
991 	if (error)
992 		return error;
993 	xfs_buf_relse(bp);
994 
995 	/*
996 	 * Calculate new parameters.  These are the final values to be reached.
997 	 */
998 	nrextents = nrblocks;
999 	do_div(nrextents, in->extsize);
1000 	nrbmblocks = howmany_64(nrextents, NBBY * sbp->sb_blocksize);
1001 	nrextslog = xfs_highbit32(nrextents);
1002 	nrsumlevels = nrextslog + 1;
1003 	nrsumsize = (uint)sizeof(xfs_suminfo_t) * nrsumlevels * nrbmblocks;
1004 	nrsumblocks = XFS_B_TO_FSB(mp, nrsumsize);
1005 	nrsumsize = XFS_FSB_TO_B(mp, nrsumblocks);
1006 	/*
1007 	 * New summary size can't be more than half the size of
1008 	 * the log.  This prevents us from getting a log overflow,
1009 	 * since we'll log basically the whole summary file at once.
1010 	 */
1011 	if (nrsumblocks > (mp->m_sb.sb_logblocks >> 1))
1012 		return -EINVAL;
1013 	/*
1014 	 * Get the old block counts for bitmap and summary inodes.
1015 	 * These can't change since other growfs callers are locked out.
1016 	 */
1017 	rbmblocks = XFS_B_TO_FSB(mp, mp->m_rbmip->i_disk_size);
1018 	rsumblocks = XFS_B_TO_FSB(mp, mp->m_rsumip->i_disk_size);
1019 	/*
1020 	 * Allocate space to the bitmap and summary files, as necessary.
1021 	 */
1022 	error = xfs_growfs_rt_alloc(mp, rbmblocks, nrbmblocks, mp->m_rbmip);
1023 	if (error)
1024 		return error;
1025 	error = xfs_growfs_rt_alloc(mp, rsumblocks, nrsumblocks, mp->m_rsumip);
1026 	if (error)
1027 		return error;
1028 
1029 	rsum_cache = mp->m_rsum_cache;
1030 	if (nrbmblocks != sbp->sb_rbmblocks)
1031 		xfs_alloc_rsum_cache(mp, nrbmblocks);
1032 
1033 	/*
1034 	 * Allocate a new (fake) mount/sb.
1035 	 */
1036 	nmp = kmem_alloc(sizeof(*nmp), 0);
1037 	/*
1038 	 * Loop over the bitmap blocks.
1039 	 * We will do everything one bitmap block at a time.
1040 	 * Skip the current block if it is exactly full.
1041 	 * This also deals with the case where there were no rtextents before.
1042 	 */
1043 	for (bmbno = sbp->sb_rbmblocks -
1044 		     ((sbp->sb_rextents & ((1 << mp->m_blkbit_log) - 1)) != 0);
1045 	     bmbno < nrbmblocks;
1046 	     bmbno++) {
1047 		struct xfs_trans	*tp;
1048 		xfs_rfsblock_t		nrblocks_step;
1049 
1050 		*nmp = *mp;
1051 		nsbp = &nmp->m_sb;
1052 		/*
1053 		 * Calculate new sb and mount fields for this round.
1054 		 */
1055 		nsbp->sb_rextsize = in->extsize;
1056 		nsbp->sb_rbmblocks = bmbno + 1;
1057 		nrblocks_step = (bmbno + 1) * NBBY * nsbp->sb_blocksize *
1058 				nsbp->sb_rextsize;
1059 		nsbp->sb_rblocks = min(nrblocks, nrblocks_step);
1060 		nsbp->sb_rextents = nsbp->sb_rblocks;
1061 		do_div(nsbp->sb_rextents, nsbp->sb_rextsize);
1062 		ASSERT(nsbp->sb_rextents != 0);
1063 		nsbp->sb_rextslog = xfs_highbit32(nsbp->sb_rextents);
1064 		nrsumlevels = nmp->m_rsumlevels = nsbp->sb_rextslog + 1;
1065 		nrsumsize =
1066 			(uint)sizeof(xfs_suminfo_t) * nrsumlevels *
1067 			nsbp->sb_rbmblocks;
1068 		nrsumblocks = XFS_B_TO_FSB(mp, nrsumsize);
1069 		nmp->m_rsumsize = nrsumsize = XFS_FSB_TO_B(mp, nrsumblocks);
1070 		/*
1071 		 * Start a transaction, get the log reservation.
1072 		 */
1073 		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtfree, 0, 0, 0,
1074 				&tp);
1075 		if (error)
1076 			break;
1077 		/*
1078 		 * Lock out other callers by grabbing the bitmap inode lock.
1079 		 */
1080 		xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP);
1081 		xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
1082 		/*
1083 		 * Update the bitmap inode's size ondisk and incore.  We need
1084 		 * to update the incore size so that inode inactivation won't
1085 		 * punch what it thinks are "posteof" blocks.
1086 		 */
1087 		mp->m_rbmip->i_disk_size =
1088 			nsbp->sb_rbmblocks * nsbp->sb_blocksize;
1089 		i_size_write(VFS_I(mp->m_rbmip), mp->m_rbmip->i_disk_size);
1090 		xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
1091 		/*
1092 		 * Get the summary inode into the transaction.
1093 		 */
1094 		xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL | XFS_ILOCK_RTSUM);
1095 		xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
1096 		/*
1097 		 * Update the summary inode's size.  We need to update the
1098 		 * incore size so that inode inactivation won't punch what it
1099 		 * thinks are "posteof" blocks.
1100 		 */
1101 		mp->m_rsumip->i_disk_size = nmp->m_rsumsize;
1102 		i_size_write(VFS_I(mp->m_rsumip), mp->m_rsumip->i_disk_size);
1103 		xfs_trans_log_inode(tp, mp->m_rsumip, XFS_ILOG_CORE);
1104 		/*
1105 		 * Copy summary data from old to new sizes.
1106 		 * Do this when the real size (not block-aligned) changes.
1107 		 */
1108 		if (sbp->sb_rbmblocks != nsbp->sb_rbmblocks ||
1109 		    mp->m_rsumlevels != nmp->m_rsumlevels) {
1110 			error = xfs_rtcopy_summary(mp, nmp, tp);
1111 			if (error)
1112 				goto error_cancel;
1113 		}
1114 		/*
1115 		 * Update superblock fields.
1116 		 */
1117 		if (nsbp->sb_rextsize != sbp->sb_rextsize)
1118 			xfs_trans_mod_sb(tp, XFS_TRANS_SB_REXTSIZE,
1119 				nsbp->sb_rextsize - sbp->sb_rextsize);
1120 		if (nsbp->sb_rbmblocks != sbp->sb_rbmblocks)
1121 			xfs_trans_mod_sb(tp, XFS_TRANS_SB_RBMBLOCKS,
1122 				nsbp->sb_rbmblocks - sbp->sb_rbmblocks);
1123 		if (nsbp->sb_rblocks != sbp->sb_rblocks)
1124 			xfs_trans_mod_sb(tp, XFS_TRANS_SB_RBLOCKS,
1125 				nsbp->sb_rblocks - sbp->sb_rblocks);
1126 		if (nsbp->sb_rextents != sbp->sb_rextents)
1127 			xfs_trans_mod_sb(tp, XFS_TRANS_SB_REXTENTS,
1128 				nsbp->sb_rextents - sbp->sb_rextents);
1129 		if (nsbp->sb_rextslog != sbp->sb_rextslog)
1130 			xfs_trans_mod_sb(tp, XFS_TRANS_SB_REXTSLOG,
1131 				nsbp->sb_rextslog - sbp->sb_rextslog);
1132 		/*
1133 		 * Free new extent.
1134 		 */
1135 		bp = NULL;
1136 		error = xfs_rtfree_range(nmp, tp, sbp->sb_rextents,
1137 			nsbp->sb_rextents - sbp->sb_rextents, &bp, &sumbno);
1138 		if (error) {
1139 error_cancel:
1140 			xfs_trans_cancel(tp);
1141 			break;
1142 		}
1143 		/*
1144 		 * Mark more blocks free in the superblock.
1145 		 */
1146 		xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS,
1147 			nsbp->sb_rextents - sbp->sb_rextents);
1148 		/*
1149 		 * Update mp values into the real mp structure.
1150 		 */
1151 		mp->m_rsumlevels = nrsumlevels;
1152 		mp->m_rsumsize = nrsumsize;
1153 
1154 		error = xfs_trans_commit(tp);
1155 		if (error)
1156 			break;
1157 
1158 		/* Ensure the mount RT feature flag is now set. */
1159 		mp->m_features |= XFS_FEAT_REALTIME;
1160 	}
1161 	if (error)
1162 		goto out_free;
1163 
1164 	/* Update secondary superblocks now the physical grow has completed */
1165 	error = xfs_update_secondary_sbs(mp);
1166 
1167 out_free:
1168 	/*
1169 	 * Free the fake mp structure.
1170 	 */
1171 	kmem_free(nmp);
1172 
1173 	/*
1174 	 * If we had to allocate a new rsum_cache, we either need to free the
1175 	 * old one (if we succeeded) or free the new one and restore the old one
1176 	 * (if there was an error).
1177 	 */
1178 	if (rsum_cache != mp->m_rsum_cache) {
1179 		if (error) {
1180 			kmem_free(mp->m_rsum_cache);
1181 			mp->m_rsum_cache = rsum_cache;
1182 		} else {
1183 			kmem_free(rsum_cache);
1184 		}
1185 	}
1186 
1187 	return error;
1188 }
1189 
1190 /*
1191  * Allocate an extent in the realtime subvolume, with the usual allocation
1192  * parameters.  The length units are all in realtime extents, as is the
1193  * result block number.
1194  */
1195 int					/* error */
1196 xfs_rtallocate_extent(
1197 	xfs_trans_t	*tp,		/* transaction pointer */
1198 	xfs_rtblock_t	bno,		/* starting block number to allocate */
1199 	xfs_extlen_t	minlen,		/* minimum length to allocate */
1200 	xfs_extlen_t	maxlen,		/* maximum length to allocate */
1201 	xfs_extlen_t	*len,		/* out: actual length allocated */
1202 	int		wasdel,		/* was a delayed allocation extent */
1203 	xfs_extlen_t	prod,		/* extent product factor */
1204 	xfs_rtblock_t	*rtblock)	/* out: start block allocated */
1205 {
1206 	xfs_mount_t	*mp = tp->t_mountp;
1207 	int		error;		/* error value */
1208 	xfs_rtblock_t	r;		/* result allocated block */
1209 	xfs_fsblock_t	sb;		/* summary file block number */
1210 	struct xfs_buf	*sumbp;		/* summary file block buffer */
1211 
1212 	ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
1213 	ASSERT(minlen > 0 && minlen <= maxlen);
1214 
1215 	/*
1216 	 * If prod is set then figure out what to do to minlen and maxlen.
1217 	 */
1218 	if (prod > 1) {
1219 		xfs_extlen_t	i;
1220 
1221 		if ((i = maxlen % prod))
1222 			maxlen -= i;
1223 		if ((i = minlen % prod))
1224 			minlen += prod - i;
1225 		if (maxlen < minlen) {
1226 			*rtblock = NULLRTBLOCK;
1227 			return 0;
1228 		}
1229 	}
1230 
1231 retry:
1232 	sumbp = NULL;
1233 	if (bno == 0) {
1234 		error = xfs_rtallocate_extent_size(mp, tp, minlen, maxlen, len,
1235 				&sumbp,	&sb, prod, &r);
1236 	} else {
1237 		error = xfs_rtallocate_extent_near(mp, tp, bno, minlen, maxlen,
1238 				len, &sumbp, &sb, prod, &r);
1239 	}
1240 
1241 	if (error)
1242 		return error;
1243 
1244 	/*
1245 	 * If it worked, update the superblock.
1246 	 */
1247 	if (r != NULLRTBLOCK) {
1248 		long	slen = (long)*len;
1249 
1250 		ASSERT(*len >= minlen && *len <= maxlen);
1251 		if (wasdel)
1252 			xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FREXTENTS, -slen);
1253 		else
1254 			xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, -slen);
1255 	} else if (prod > 1) {
1256 		prod = 1;
1257 		goto retry;
1258 	}
1259 
1260 	*rtblock = r;
1261 	return 0;
1262 }
1263 
1264 /*
1265  * Initialize realtime fields in the mount structure.
1266  */
1267 int				/* error */
1268 xfs_rtmount_init(
1269 	struct xfs_mount	*mp)	/* file system mount structure */
1270 {
1271 	struct xfs_buf		*bp;	/* buffer for last block of subvolume */
1272 	struct xfs_sb		*sbp;	/* filesystem superblock copy in mount */
1273 	xfs_daddr_t		d;	/* address of last block of subvolume */
1274 	int			error;
1275 
1276 	sbp = &mp->m_sb;
1277 	if (sbp->sb_rblocks == 0)
1278 		return 0;
1279 	if (mp->m_rtdev_targp == NULL) {
1280 		xfs_warn(mp,
1281 	"Filesystem has a realtime volume, use rtdev=device option");
1282 		return -ENODEV;
1283 	}
1284 	mp->m_rsumlevels = sbp->sb_rextslog + 1;
1285 	mp->m_rsumsize =
1286 		(uint)sizeof(xfs_suminfo_t) * mp->m_rsumlevels *
1287 		sbp->sb_rbmblocks;
1288 	mp->m_rsumsize = roundup(mp->m_rsumsize, sbp->sb_blocksize);
1289 	mp->m_rbmip = mp->m_rsumip = NULL;
1290 	/*
1291 	 * Check that the realtime section is an ok size.
1292 	 */
1293 	d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
1294 	if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_rblocks) {
1295 		xfs_warn(mp, "realtime mount -- %llu != %llu",
1296 			(unsigned long long) XFS_BB_TO_FSB(mp, d),
1297 			(unsigned long long) mp->m_sb.sb_rblocks);
1298 		return -EFBIG;
1299 	}
1300 	error = xfs_buf_read_uncached(mp->m_rtdev_targp,
1301 					d - XFS_FSB_TO_BB(mp, 1),
1302 					XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
1303 	if (error) {
1304 		xfs_warn(mp, "realtime device size check failed");
1305 		return error;
1306 	}
1307 	xfs_buf_relse(bp);
1308 	return 0;
1309 }
1310 
1311 static int
1312 xfs_rtalloc_count_frextent(
1313 	struct xfs_mount		*mp,
1314 	struct xfs_trans		*tp,
1315 	const struct xfs_rtalloc_rec	*rec,
1316 	void				*priv)
1317 {
1318 	uint64_t			*valp = priv;
1319 
1320 	*valp += rec->ar_extcount;
1321 	return 0;
1322 }
1323 
1324 /*
1325  * Reinitialize the number of free realtime extents from the realtime bitmap.
1326  * Callers must ensure that there is no other activity in the filesystem.
1327  */
1328 int
1329 xfs_rtalloc_reinit_frextents(
1330 	struct xfs_mount	*mp)
1331 {
1332 	uint64_t		val = 0;
1333 	int			error;
1334 
1335 	xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
1336 	error = xfs_rtalloc_query_all(mp, NULL, xfs_rtalloc_count_frextent,
1337 			&val);
1338 	xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
1339 	if (error)
1340 		return error;
1341 
1342 	spin_lock(&mp->m_sb_lock);
1343 	mp->m_sb.sb_frextents = val;
1344 	spin_unlock(&mp->m_sb_lock);
1345 	percpu_counter_set(&mp->m_frextents, mp->m_sb.sb_frextents);
1346 	return 0;
1347 }
1348 
1349 /*
1350  * Read in the bmbt of an rt metadata inode so that we never have to load them
1351  * at runtime.  This enables the use of shared ILOCKs for rtbitmap scans.  Use
1352  * an empty transaction to avoid deadlocking on loops in the bmbt.
1353  */
1354 static inline int
1355 xfs_rtmount_iread_extents(
1356 	struct xfs_inode	*ip,
1357 	unsigned int		lock_class)
1358 {
1359 	struct xfs_trans	*tp;
1360 	int			error;
1361 
1362 	error = xfs_trans_alloc_empty(ip->i_mount, &tp);
1363 	if (error)
1364 		return error;
1365 
1366 	xfs_ilock(ip, XFS_ILOCK_EXCL | lock_class);
1367 
1368 	error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
1369 	if (error)
1370 		goto out_unlock;
1371 
1372 	if (xfs_inode_has_attr_fork(ip)) {
1373 		error = xfs_iread_extents(tp, ip, XFS_ATTR_FORK);
1374 		if (error)
1375 			goto out_unlock;
1376 	}
1377 
1378 out_unlock:
1379 	xfs_iunlock(ip, XFS_ILOCK_EXCL | lock_class);
1380 	xfs_trans_cancel(tp);
1381 	return error;
1382 }
1383 
1384 /*
1385  * Get the bitmap and summary inodes and the summary cache into the mount
1386  * structure at mount time.
1387  */
1388 int					/* error */
1389 xfs_rtmount_inodes(
1390 	xfs_mount_t	*mp)		/* file system mount structure */
1391 {
1392 	int		error;		/* error return value */
1393 	xfs_sb_t	*sbp;
1394 
1395 	sbp = &mp->m_sb;
1396 	error = xfs_iget(mp, NULL, sbp->sb_rbmino, 0, 0, &mp->m_rbmip);
1397 	if (error)
1398 		return error;
1399 	ASSERT(mp->m_rbmip != NULL);
1400 
1401 	error = xfs_rtmount_iread_extents(mp->m_rbmip, XFS_ILOCK_RTBITMAP);
1402 	if (error)
1403 		goto out_rele_bitmap;
1404 
1405 	error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip);
1406 	if (error)
1407 		goto out_rele_bitmap;
1408 	ASSERT(mp->m_rsumip != NULL);
1409 
1410 	error = xfs_rtmount_iread_extents(mp->m_rsumip, XFS_ILOCK_RTSUM);
1411 	if (error)
1412 		goto out_rele_summary;
1413 
1414 	xfs_alloc_rsum_cache(mp, sbp->sb_rbmblocks);
1415 	return 0;
1416 
1417 out_rele_summary:
1418 	xfs_irele(mp->m_rsumip);
1419 out_rele_bitmap:
1420 	xfs_irele(mp->m_rbmip);
1421 	return error;
1422 }
1423 
1424 void
1425 xfs_rtunmount_inodes(
1426 	struct xfs_mount	*mp)
1427 {
1428 	kmem_free(mp->m_rsum_cache);
1429 	if (mp->m_rbmip)
1430 		xfs_irele(mp->m_rbmip);
1431 	if (mp->m_rsumip)
1432 		xfs_irele(mp->m_rsumip);
1433 }
1434 
1435 /*
1436  * Pick an extent for allocation at the start of a new realtime file.
1437  * Use the sequence number stored in the atime field of the bitmap inode.
1438  * Translate this to a fraction of the rtextents, and return the product
1439  * of rtextents and the fraction.
1440  * The fraction sequence is 0, 1/2, 1/4, 3/4, 1/8, ..., 7/8, 1/16, ...
1441  */
1442 int					/* error */
1443 xfs_rtpick_extent(
1444 	xfs_mount_t	*mp,		/* file system mount point */
1445 	xfs_trans_t	*tp,		/* transaction pointer */
1446 	xfs_extlen_t	len,		/* allocation length (rtextents) */
1447 	xfs_rtblock_t	*pick)		/* result rt extent */
1448 {
1449 	xfs_rtblock_t	b;		/* result block */
1450 	int		log2;		/* log of sequence number */
1451 	uint64_t	resid;		/* residual after log removed */
1452 	uint64_t	seq;		/* sequence number of file creation */
1453 	uint64_t	*seqp;		/* pointer to seqno in inode */
1454 
1455 	ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
1456 
1457 	seqp = (uint64_t *)&VFS_I(mp->m_rbmip)->i_atime;
1458 	if (!(mp->m_rbmip->i_diflags & XFS_DIFLAG_NEWRTBM)) {
1459 		mp->m_rbmip->i_diflags |= XFS_DIFLAG_NEWRTBM;
1460 		*seqp = 0;
1461 	}
1462 	seq = *seqp;
1463 	if ((log2 = xfs_highbit64(seq)) == -1)
1464 		b = 0;
1465 	else {
1466 		resid = seq - (1ULL << log2);
1467 		b = (mp->m_sb.sb_rextents * ((resid << 1) + 1ULL)) >>
1468 		    (log2 + 1);
1469 		if (b >= mp->m_sb.sb_rextents)
1470 			div64_u64_rem(b, mp->m_sb.sb_rextents, &b);
1471 		if (b + len > mp->m_sb.sb_rextents)
1472 			b = mp->m_sb.sb_rextents - len;
1473 	}
1474 	*seqp = seq + 1;
1475 	xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
1476 	*pick = b;
1477 	return 0;
1478 }
1479